1: | <?php |
2: | /* |
3: | You may not change or alter any portion of this comment or credits |
4: | of supporting developers from this source code or any supporting source code |
5: | which is considered copyrighted (c) material of the original comment or credit authors. |
6: | |
7: | This program is distributed in the hope that it will be useful, |
8: | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9: | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
10: | */ |
11: | |
12: | namespace Xmf; |
13: | |
14: | /** |
15: | * StopWords - facilitate filtering of common or purely connective words for natural language processing |
16: | * |
17: | * @category Xmf\StopWords |
18: | * @package Xmf |
19: | * @author Richard Griffith <richard@geekwright.com> |
20: | * @author trabis <lusopoemas@gmail.com> |
21: | * @copyright 2011-2018 XOOPS Project (https://xoops.org) |
22: | * @license GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html) |
23: | * @link https://xoops.org |
24: | * @see https://en.wikipedia.org/wiki/Stop_words |
25: | */ |
26: | class StopWords |
27: | { |
28: | |
29: | /** |
30: | * mbstring encoding |
31: | */ |
32: | const ENCODING = 'UTF-8'; |
33: | |
34: | /** @var string[] */ |
35: | protected $stopwordList = array(); |
36: | |
37: | /** |
38: | * StopWords constructor - load stop words for current locale |
39: | * |
40: | * @todo specify locale to constructor, will require shift away from defined constant |
41: | */ |
42: | public function __construct() |
43: | { |
44: | if (!defined('_XMF_STOPWORDS')) { |
45: | Language::load('stopwords'); |
46: | } |
47: | if (defined('_XMF_STOPWORDS')) { |
48: | $sw = explode(' ', _XMF_STOPWORDS); |
49: | $this->stopwordList = array_fill_keys($sw, true); |
50: | } |
51: | } |
52: | |
53: | /** |
54: | * check - look up a word in a list of stop words and |
55: | * classify it as a significant word or a stop word. |
56: | * |
57: | * @param string $key the word to check |
58: | * |
59: | * @return bool True if word is significant, false if it is a stop word |
60: | */ |
61: | public function check($key) |
62: | { |
63: | $key = function_exists('mb_strtolower') |
64: | ? mb_strtolower($key, static::ENCODING) |
65: | : strtolower($key); |
66: | return !isset($this->stopwordList[$key]); |
67: | } |
68: | } |
69: |