以关键词为key,构建字典数组,对每个关键词可实现常数级别的查找。使用最长匹配算法,具体代码如下:
1 class WordMatcher { 2 ????public $dict = []; 3 ????public $wordMaxLen = 0; 4 ?5 ????function __construct(){ 6 ????????if(! extension_loaded(‘mbstring‘)) { 7 ????????????exit(‘extension mbstring is not loaded‘); 8 ????????} 9 ????}10 11 ????function addWord($word) {12 ????????$len = mb_strlen($word);13 ????????$this->wordMaxLen = $len > $this->wordMaxLen ? $len : $this->wordMaxLen;14 ????????$this->dict[$word] = 1;15 ????}16 17 ????function removeWord($word) {18 ????????unset($this->dict[$word]);19 ????}20 21 ????function match($str, &$matched) {22 ????????if(mb_strlen($str) < 1) {23 ????????????return;24 ????????}25 26 ????????$len = $this->wordMaxLen;27 ????????while($len>0) {28 ????????????$substr = mb_substr($str, 0, $len);29 ????????????if(isset($this->dict[$substr])) {30 ????????????????$matched[] = $substr;31 ????????????????break;32 ????????????} else {33 ????????????????$len--;34 ????????????}35 ????????}36 ????????if($len == 0) {37 ????????????$len = 1;38 ????????}39 ????????$str = mb_substr($str, $len);40 ????????$this->match($str, $matched);41 ????}42 }43 44 $matcher = new WordMatcher;45 $matcher->addWord(‘PHP‘);46 $matcher->addWord(‘语言‘);47 48 49 $matcher->match(‘PHP是最好的语言‘, $matched);
PHP中文关键词匹配
原文地址:https://www.cnblogs.com/cnsr/p/8297123.html