| 1: | <?php |
| 2: | /* |
| 3: | You may not change or alter any portion of this comment or credits |
| 4: | of supporting developers from this source code or any supporting source code |
| 5: | which is considered copyrighted (c) material of the original comment or credit authors. |
| 6: | |
| 7: | This program is distributed in the hope that it will be useful, |
| 8: | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9: | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 10: | */ |
| 11: | |
| 12: | namespace Xmf; |
| 13: | |
| 14: | /** |
| 15: | * StopWords - facilitate filtering of common or purely connective words for natural language processing |
| 16: | * |
| 17: | * @category Xmf\StopWords |
| 18: | * @package Xmf |
| 19: | * @author Richard Griffith <richard@geekwright.com> |
| 20: | * @author trabis <lusopoemas@gmail.com> |
| 21: | * @copyright 2011-2018 XOOPS Project (https://xoops.org) |
| 22: | * @license GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html) |
| 23: | * @link https://xoops.org |
| 24: | * @see https://en.wikipedia.org/wiki/Stop_words |
| 25: | */ |
| 26: | class StopWords |
| 27: | { |
| 28: | |
| 29: | /** |
| 30: | * mbstring encoding |
| 31: | */ |
| 32: | const ENCODING = 'UTF-8'; |
| 33: | |
| 34: | /** @var string[] */ |
| 35: | protected $stopwordList = array(); |
| 36: | |
| 37: | /** |
| 38: | * StopWords constructor - load stop words for current locale |
| 39: | * |
| 40: | * @todo specify locale to constructor, will require shift away from defined constant |
| 41: | */ |
| 42: | public function __construct() |
| 43: | { |
| 44: | if (!defined('_XMF_STOPWORDS')) { |
| 45: | Language::load('stopwords'); |
| 46: | } |
| 47: | if (defined('_XMF_STOPWORDS')) { |
| 48: | $sw = explode(' ', _XMF_STOPWORDS); |
| 49: | $this->stopwordList = array_fill_keys($sw, true); |
| 50: | } |
| 51: | } |
| 52: | |
| 53: | /** |
| 54: | * check - look up a word in a list of stop words and |
| 55: | * classify it as a significant word or a stop word. |
| 56: | * |
| 57: | * @param string $key the word to check |
| 58: | * |
| 59: | * @return bool True if word is significant, false if it is a stop word |
| 60: | */ |
| 61: | public function check($key) |
| 62: | { |
| 63: | $key = function_exists('mb_strtolower') |
| 64: | ? mb_strtolower($key, static::ENCODING) |
| 65: | : strtolower($key); |
| 66: | return !isset($this->stopwordList[$key]); |
| 67: | } |
| 68: | } |
| 69: |