| 1: | <?php | 
| 2: | /* | 
| 3: | You may not change or alter any portion of this comment or credits | 
| 4: | of supporting developers from this source code or any supporting source code | 
| 5: | which is considered copyrighted (c) material of the original comment or credit authors. | 
| 6: | |
| 7: | This program is distributed in the hope that it will be useful, | 
| 8: | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 9: | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 
| 10: | */ | 
| 11: | |
| 12: | namespace Xmf; | 
| 13: | |
| 14: | /** | 
| 15: | * StopWords - facilitate filtering of common or purely connective words for natural language processing | 
| 16: | * | 
| 17: | * @category Xmf\StopWords | 
| 18: | * @package Xmf | 
| 19: | * @author Richard Griffith <richard@geekwright.com> | 
| 20: | * @author trabis <lusopoemas@gmail.com> | 
| 21: | * @copyright 2011-2018 XOOPS Project (https://xoops.org) | 
| 22: | * @license GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html) | 
| 23: | * @link https://xoops.org | 
| 24: | * @see https://en.wikipedia.org/wiki/Stop_words | 
| 25: | */ | 
| 26: | class StopWords | 
| 27: | { | 
| 28: | |
| 29: | /** | 
| 30: | * mbstring encoding | 
| 31: | */ | 
| 32: | const ENCODING = 'UTF-8'; | 
| 33: | |
| 34: | /** @var string[] */ | 
| 35: | protected $stopwordList = array(); | 
| 36: | |
| 37: | /** | 
| 38: | * StopWords constructor - load stop words for current locale | 
| 39: | * | 
| 40: | * @todo specify locale to constructor, will require shift away from defined constant | 
| 41: | */ | 
| 42: | public function __construct() | 
| 43: | { | 
| 44: | if (!defined('_XMF_STOPWORDS')) { | 
| 45: | Language::load('stopwords'); | 
| 46: | } | 
| 47: | if (defined('_XMF_STOPWORDS')) { | 
| 48: | $sw = explode(' ', _XMF_STOPWORDS); | 
| 49: | $this->stopwordList = array_fill_keys($sw, true); | 
| 50: | } | 
| 51: | } | 
| 52: | |
| 53: | /** | 
| 54: | * check - look up a word in a list of stop words and | 
| 55: | * classify it as a significant word or a stop word. | 
| 56: | * | 
| 57: | * @param string $key the word to check | 
| 58: | * | 
| 59: | * @return bool True if word is significant, false if it is a stop word | 
| 60: | */ | 
| 61: | public function check($key) | 
| 62: | { | 
| 63: | $key = function_exists('mb_strtolower') | 
| 64: | ? mb_strtolower($key, static::ENCODING) | 
| 65: | : strtolower($key); | 
| 66: | return !isset($this->stopwordList[$key]); | 
| 67: | } | 
| 68: | } | 
| 69: |