Files
medicalalert-web-reloaded/wp/plugins/relevanssi-premium/premium/class-relevanssi-spellcorrector.php
2024-01-09 22:14:20 -05:00

222 lines
6.7 KiB
PHP

<?php
/**
* /premium/class-spellcorrector.php
*
* @package Relevanssi_Premium
* @author Felipe Ribeiro <felipernb@gmail.com> with modifications by Mikko Saari
* @see https://www.relevanssi.com/
*/
/*
***************************************************************************
* Copyright (C) 2008 by Felipe Ribeiro *
* felipernb@gmail.com *
* http://www.feliperibeiro.com *
* *
* Permission is hereby granted, free of charge, to any person obtaining *
* a copy of this software and associated documentation files (the *
* "Software"), to deal in the Software without restriction, including *
* without limitation the rights to use, copy, modify, merge, publish, *
* distribute, sublicense, and/or sell copies of the Software, and to *
* permit persons to whom the Software is furnished to do so, subject to *
* the following conditions: *
* *
* The above copyright notice and this permission notice shall be *
* included in all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, *
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF *
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR *
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
* OTHER DEALINGS IN THE SOFTWARE. *
***************************************************************************
*/
/**
* Spell correcting feature.
*
* This class implements the Spell correcting feature, useful for the
* "Did you mean" functionality on the search engine. Using a dictionary of
* words extracted from the product catalog.
*
* Based on the concepts of Peter Norvig: http://norvig.com/spell-correct.html
*
* @author Felipe Ribeiro <felipernb@gmail.com>
* @date September 18th, 2008
*/
class Relevanssi_SpellCorrector {
/**
* Dictionary of words.
*
* @var array $dictionary Array of words, containing the dictionary.
*/
private static $dictionary;
/**
* Reads a text and extracts the list of words.
*
* @param string $text Source text for the words.
* @return array The list of words
*/
private static function words( $text ) {
$matches = array();
$text = relevanssi_strtolower( $text );
preg_match_all( '/[a-z]+/', $text, $matches );
return $matches[0];
}
/**
* Generates a list of possible "disturbances" on the passed string.
*
* @param string $word Word to disturb.
*
* @return array A list of variations.
*/
private static function edits1( $word ) {
/**
* Filters the alphabet used for Did you mean suggestions.
*
* In order to use the Did you mean suggestions with non-Latin alphabets
* (or even European languages with a wider range of characters than
* English), Relevanssi needs to be provided with the alphabet used.
*
* @param string A string containing the alphabet as a string of
* characters without spaces.
*/
$alphabet = apply_filters(
'relevanssi_didyoumean_alphabet',
'abcdefghijklmnopqrstuvwxyzäöåü'
);
$alphabet = preg_split( '/(?<!^)(?!$)/u', $alphabet );
$n = relevanssi_strlen( $word );
$edits = array();
$substr_function = 'substr';
if ( function_exists( 'mb_substr' ) ) {
$substr_function = 'mb_substr';
}
for ( $i = 0; $i < $n; $i++ ) {
// Removing one letter.
$edits[] = call_user_func( $substr_function, $word, 0, $i )
. call_user_func( $substr_function, $word, $i + 1 );
// Substituting one letter.
foreach ( $alphabet as $c ) {
$edits[] = call_user_func( $substr_function, $word, 0, $i )
. $c . call_user_func( $substr_function, $word, $i + 1 );
}
}
for ( $i = 0; $i < $n - 1; $i++ ) {
// Swapping character order.
$edits[] = call_user_func( $substr_function, $word, 0, $i )
. $word[ $i + 1 ] . $word[ $i ]
. call_user_func( $substr_function, $word, $i + 2 );
}
// Inserting one character.
for ( $i = 0; $i < $n + 1; $i++ ) {
foreach ( $alphabet as $c ) {
$edits[] = call_user_func( $substr_function, $word, 0, $i )
. $c . call_user_func( $substr_function, $word, $i );
}
}
return $edits;
}
/**
* Generate possible "disturbances" in a second level that exist on the
* dictionary.
*
* @param string $word Word to disturb.
*
* @return array Known disturbances.
*/
private static function known_edits2( $word ) {
$known = array();
foreach ( self::edits1( $word ) as $e1 ) {
foreach ( self::edits1( $e1 ) as $e2 ) {
if ( array_key_exists( $e2, self::$dictionary ) ) {
$known[] = $e2;
}
}
}
return $known;
}
/**
* Given a list of words, returns the subset that is present on the
* dictionary.
*
* @param array $words Array of words to check.
* @return array Words that are in the dictionary.
*/
private static function known( array $words ) {
$known = array();
foreach ( $words as $w ) {
if ( array_key_exists( $w, self::$dictionary ) ) {
$known[] = $w;
}
}
return $known;
}
/**
* Corrects the word.
*
* Returns the word that is present on the dictionary that is the most
* similar (and the most relevant) to the word passed as parameter.
*
* @param string $word Word to correct.
*
* @return string Correction suggestion, null if nothing found.
*/
public static function correct( $word ) {
$word = trim( $word );
if ( empty( $word ) ) {
return null;
}
$word = relevanssi_strtolower( $word );
if ( empty( self::$dictionary ) ) {
self::$dictionary = relevanssi_get_words();
}
$candidates = array();
if ( self::known( array( $word ) ) ) {
// Word is in the dictionary. It's fine.
return true;
} else {
$tmp_candidates = self::known( self::edits1( $word ) );
if ( ! empty( $tmp_candidates ) ) {
foreach ( $tmp_candidates as $candidate ) {
$candidates[] = $candidate;
}
} else {
$tmp_candidates = self::known_edits2( $word );
if ( ! empty( $tmp_candidates ) ) {
foreach ( $tmp_candidates as $candidate ) {
$candidates[] = $candidate;
}
} else {
return null;
}
}
}
$max = 0;
foreach ( $candidates as $c ) {
$value = self::$dictionary[ $c ];
if ( $value > $max ) {
$max = $value;
$word = $c;
}
}
return $word;
}
}