1: <?php
2: /*
3: You may not change or alter any portion of this comment or credits
4: of supporting developers from this source code or any supporting source code
5: which is considered copyrighted (c) material of the original comment or credit authors.
6:
7: This program is distributed in the hope that it will be useful,
8: but WITHOUT ANY WARRANTY; without even the implied warranty of
9: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10: */
11:
12: namespace Xmf;
13:
14: /**
15: * StopWords - facilitate filtering of common or purely connective words for natural language processing
16: *
17: * @category Xmf\StopWords
18: * @package Xmf
19: * @author Richard Griffith <richard@geekwright.com>
20: * @author trabis <lusopoemas@gmail.com>
21: * @copyright 2011-2016 XOOPS Project (http://xoops.org)
22: * @license GNU GPL 2 or later (http://www.gnu.org/licenses/gpl-2.0.html)
23: * @link http://xoops.org
24: * @see https://en.wikipedia.org/wiki/Stop_words
25: */
26: class StopWords
27: {
28:
29: /**
30: * mbstring encoding
31: */
32: const ENCODING = 'UTF-8';
33:
34: /** @var string[] */
35: protected $stopwordList = array();
36:
37: /**
38: * StopWords constructor - load stop words for current locale
39: *
40: * @todo specify locale to constructor, will require shift away from defined constant
41: */
42: public function __construct()
43: {
44: if (!defined('_XMF_STOPWORDS')) {
45: Language::load('stopwords');
46: }
47: if (defined('_XMF_STOPWORDS')) {
48: $sw = explode(' ', _XMF_STOPWORDS);
49: $this->stopwordList = array_fill_keys($sw, true);
50: }
51: }
52:
53: /**
54: * check - look up a word in a list of stop words and
55: * classify it as a significant word or a stop word.
56: *
57: * @param string $key the word to check
58: *
59: * @return bool True if word is significant, false if it is a stop word
60: */
61: public function check($key)
62: {
63: $key = function_exists('mb_strtolower')
64: ? mb_strtolower($key, static::ENCODING)
65: : strtolower($key);
66: return !isset($this->stopwordList[$key]);
67: }
68: }
69: