1: <?php
2: /*
3: You may not change or alter any portion of this comment or credits
4: of supporting developers from this source code or any supporting source code
5: which is considered copyrighted (c) material of the original comment or credit authors.
6:
7: This program is distributed in the hope that it will be useful,
8: but WITHOUT ANY WARRANTY; without even the implied warranty of
9: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10: */
11:
12: namespace Xoops\Core\Text;
13:
14: use Xoops\Core\Text\Sanitizer\Configuration;
15: use Xoops\Core\Text\Sanitizer\SanitizerConfigurable;
16:
17: /**
18: * Class to "clean up" text for various uses
19: *
20: * @category Sanitizer
21: * @package Xoops\Core\Text
22: * @author Kazumi Ono <onokazu@xoops.org>
23: * @author Goghs Cheng (http://www.eqiao.com, http://www.devbeez.com/)
24: * @author Taiwen Jiang <phppp@users.sourceforge.net>
25: * @author Richard Griffith <richard@geekwright.com>
26: * @copyright 2000-2015 XOOPS Project (http://xoops.org)
27: * @license GNU GPL 2 (http://www.gnu.org/licenses/gpl-2.0.html)
28: * @link http://xoops.org
29: */
30: class Sanitizer extends SanitizerConfigurable
31: {
32: /**
33: * @var array default configuration values
34: */
35: protected static $defaultConfiguration = [
36: 'enabled' => true,
37: 'prefilters' => [],
38: 'postfilters' => ['embed', 'clickable'],
39: ];
40:
41: /**
42: * @var bool Have extensions been loaded?
43: */
44: protected $extensionsLoaded = false;
45:
46: /**
47: * @var ShortCodes
48: */
49: protected $shortcodes;
50:
51: /**
52: * @var array
53: */
54: protected $patterns = array();
55:
56: /**
57: * @var Configuration
58: */
59: protected $config;
60:
61: /**
62: * @var Sanitizer The reference to *Singleton* instance of this class
63: */
64: private static $instance;
65:
66: /**
67: * Returns the *Singleton* instance of this class.
68: *
69: * @return Sanitizer The *Singleton* instance.
70: */
71: public static function getInstance()
72: {
73: if (null === static::$instance) {
74: static::$instance = new static();
75: }
76:
77: return static::$instance;
78: }
79:
80: /**
81: * Construct - protected to enforce singleton. The singleton pattern minimizes the
82: * impact of the expense of the setup logic.
83: */
84: protected function __construct()
85: {
86: $this->shortcodes = new ShortCodes();
87: $this->config = new Configuration();
88: }
89:
90: /**
91: * get our ShortCodes instance. This is intended for internal use, as it is just the bare instance.
92: *
93: * @see getShortCodes
94: *
95: * @return ShortCodes
96: *
97: * @throws \ErrorException
98: */
99: public function getShortCodesInstance()
100: {
101: return $this->shortcodes;
102: }
103:
104: /**
105: * get our ShortCodes instance, but make sure extensions are loaded so caller can extend and override
106: *
107: * @return ShortCodes
108: *
109: * @throws \ErrorException
110: */
111: public function getShortCodes()
112: {
113: $this->registerExtensions();
114: return $this->shortcodes;
115: }
116:
117: /**
118: * Add a preg_replace_callback pattern and callback
119: *
120: * @param string $pattern a pattern as used in preg_replace_callback
121: * @param callable $callback callback to do processing as used in preg_replace_callback
122: *
123: * @return void
124: */
125: public function addPatternCallback($pattern, $callback)
126: {
127: $this->patterns[] = ['pattern' => $pattern, 'callback' => $callback];
128: }
129:
130: /**
131: * Replace emoticons in a string with smiley images
132: *
133: * @param string $text text to filter
134: *
135: * @return string
136: */
137: public function smiley($text)
138: {
139: $response = \Xoops::getInstance()->service('emoji')->renderEmoji($text);
140: return $response->isSuccess() ? $response->getValue() : $text;
141: }
142:
143:
144: /**
145: * Turn bare URLs and email addresses into links
146: *
147: * @param string $text text to filter
148: *
149: * @return string
150: */
151: public function makeClickable($text)
152: {
153: return $this->executeFilter('clickable', $text);
154: }
155:
156: /**
157: * Convert linebreaks to <br /> tags
158: *
159: * This is used instead of PHP's built-in nl2br() because it removes the line endings, replacing them
160: * with br tags, while the built in just adds br tags and leaves the line endings. We don't want to leave
161: * those, as something may try to process them again.
162: *
163: * @param string $text text
164: *
165: * @return string
166: */
167: public function nl2Br($text)
168: {
169: return preg_replace("/(\r\n)|(\n\r)|(\n)|(\r)/", "\n<br />\n", $text);
170: }
171:
172: /**
173: * Convert special characters to HTML entities
174: *
175: * Character set is locked to 'UTF-8', double_encode to true
176: *
177: * @param string $text string being converted
178: * @param int $quote_style ENT_QUOTES | ENT_SUBSTITUTE will forced
179: *
180: * @return string
181: */
182: public function htmlSpecialChars($text, $quote_style = ENT_QUOTES)
183: {
184: $text = htmlspecialchars($text, $quote_style | ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
185: return $text;
186: }
187:
188: /**
189: * Convert special characters to HTML entities with special attention to quotes for strings which
190: * may be used in a javascript context.
191: *
192: * Escape double quote as \x22 , single as \x27, and then send to htmlSpecialChars().
193: *
194: * @param string $text string being converted
195: *
196: * @return string
197: */
198: public function escapeForJavascript($text)
199: {
200: $text = str_replace(["'", '"'], ['\x27', '\x22'], $text);
201: return $this->htmlSpecialChars($text);
202: }
203:
204: /**
205: * Escape any brackets ([]) to make them invisible to ShortCodes
206: *
207: * @param string $text string to escape
208: *
209: * @return string
210: */
211: public function escapeShortCodes($text)
212: {
213: $text = str_replace(['[', ']'], ['[', ']'], $text);
214: return $text;
215: }
216:
217: /**
218: * Reverses htmlSpecialChars()
219: *
220: * @param string $text htmlSpecialChars encoded text
221: *
222: * @return string
223: */
224: public function undoHtmlSpecialChars($text)
225: {
226: return htmlspecialchars_decode($text, ENT_QUOTES);
227: }
228:
229: /**
230: * Apply extension specified transformation, such as ShortCodes, to the supplied text
231: *
232: * @param string $text text to filter
233: * @param bool $allowImage Allow images in the text? On FALSE, uses links to images.
234: *
235: * @return string
236: */
237: protected function xoopsCodeDecode($text, $allowImage = false)
238: {
239: $holdAllowImage = $this->config['image']['allowimage'];
240: $this->config['image']['allowimage'] = $allowImage;
241:
242: $this->registerExtensions();
243:
244: /**
245: * this should really be eliminated, and standardize with shortcodes and filters
246: * Currently, only Wiki needs this. The syntax '[[xxx]]' interferes with escaped shortcodes
247: */
248: foreach ($this->patterns as $pattern) {
249: $text = preg_replace_callback($pattern['pattern'], $pattern['callback'], $text);
250: }
251:
252: $text = $this->shortcodes->process($text);
253:
254: $this->config['image']['allowimage'] = $holdAllowImage;
255:
256: $text = $this->executeFilter('quote', $text);
257: return $text;
258: }
259:
260: /**
261: * Filters data for display
262: *
263: * @param string $text text to filter for display
264: * @param bool $html allow html?
265: * @param bool $smiley allow smileys?
266: * @param bool $xcode allow xoopscode (and shortcodes)?
267: * @param bool $image allow inline images?
268: * @param bool $br convert linebreaks?
269: *
270: * @return string
271: */
272: public function filterForDisplay($text, $html = false, $smiley = true, $xcode = true, $image = true, $br = true)
273: {
274: $config = $this->getConfig();
275:
276: foreach ((array) $config['prefilters'] as $filter) {
277: $text = $this->executeFilter($filter, $text);
278: }
279:
280: if (!(bool) $html) {
281: // html not allowed, so escape any special chars
282: // don't mess with quotes or shortcodes will fail
283: $text = htmlspecialchars($text, ENT_NOQUOTES);
284: }
285:
286: if ($xcode) {
287: $text = $this->prefilterCodeBlocks($text);
288: $text = $this->xoopsCodeDecode($text, (bool) $image);
289: }
290: if ((bool) $smiley) {
291: // process smiley
292: $text = $this->smiley($text);
293: }
294: if ((bool) $br) {
295: $text = $this->nl2Br($text);
296: }
297: if ($xcode) {
298: $text = $this->postfilterCodeBlocks($text);
299: }
300:
301: foreach ((array) $config['postfilters'] as $filter) {
302: $text = $this->executeFilter($filter, $text);
303: }
304:
305: return $text;
306: }
307:
308: /**
309: * Filters textarea form data submitted for preview
310: *
311: * @param string $text text to filter for display
312: * @param bool $html allow html?
313: * @param bool $smiley allow smileys?
314: * @param bool $xcode allow xoopscode?
315: * @param bool $image allow inline images?
316: * @param bool $br convert linebreaks?
317: *
318: * @return string
319: *
320: * @todo remove as it adds no value
321: */
322: public function displayTarea($text, $html = false, $smiley = true, $xcode = true, $image = true, $br = true)
323: {
324: return $this->filterForDisplay($text, $html, $smiley, $xcode, $image, $br);
325: }
326:
327: /**
328: * Filters textarea form data submitted for preview
329: *
330: * @param string $text text to filter for preview
331: * @param int $html allow html?
332: * @param int $smiley allow smileys?
333: * @param int $xcode allow xoopscode?
334: * @param int $image allow inline images?
335: * @param int $br convert linebreaks?
336: *
337: * @return string
338: *
339: * @todo remove as it adds no value
340: */
341: public function previewTarea($text, $html = 0, $smiley = 1, $xcode = 1, $image = 1, $br = 1)
342: {
343: return $this->filterForDisplay($text, $html, $smiley, $xcode, $image, $br);
344: }
345:
346: /**
347: * Replaces banned words in a string with their replacements
348: *
349: * @param string $text text to censor
350: *
351: * @return string
352: */
353: public function censorString($text)
354: {
355: return $this->executeFilter('censor', $text);
356: }
357:
358: /**
359: * Encode [code] elements as base64 to prevent processing of contents by other filters
360: *
361: * @param string $text text to filter
362: *
363: * @return string
364: */
365: protected function prefilterCodeBlocks($text)
366: {
367: $patterns = "/\[code([^\]]*?)\](.*)\[\/code\]/sU";
368: $text = preg_replace_callback(
369: $patterns,
370: function ($matches) {
371: return '[code' . $matches[1] . ']' . base64_encode($matches[2]). '[/code]';
372: },
373: $text
374: );
375:
376: return $text;
377: }
378:
379: /**
380: * convert code blocks, previously processed by prefilterCodeBlocks(), for display
381: *
382: * @param string $text text to filter
383: *
384: * @return string
385: */
386: protected function postfilterCodeBlocks($text)
387: {
388: $patterns = "/\[code([^\]]*?)\](.*)\[\/code\]/sU";
389: $text = preg_replace_callback(
390: $patterns,
391: function ($matches) {
392: return '<div class=\"xoopsCode\">' .
393: $this->executeFilter(
394: 'syntaxhighlight',
395: str_replace('\\\"', '\"', base64_decode($matches[2])),
396: $matches[1]
397: ) . '</div>';
398: },
399: $text
400: );
401:
402: return $text;
403: }
404:
405: /**
406: * listExtensions() - get list of active extensions
407: *
408: * @return string[]
409: */
410: public function listExtensions()
411: {
412: $list = [];
413: foreach ($this->config as $name => $configs) {
414: if (((bool) $configs['enabled']) && $configs['type'] === 'extension') {
415: $list[] = $name;
416: }
417: }
418: return $list;
419: }
420:
421: /**
422: * Provide button and javascript code used by the DhtmlTextArea
423: *
424: * @param string $extension extension name
425: * @param string $textAreaId dom element id
426: *
427: * @return string[] editor button as HTML, supporting javascript
428: */
429: public function getDhtmlEditorSupport($extension, $textAreaId)
430: {
431: return $this->loadExtension($extension)->getDhtmlEditorSupport($textAreaId);
432: }
433:
434: /**
435: * getConfig() - get the configuration for a component (extension, filter, sanitizer)
436: *
437: * @param string $componentName get the configuration for component of this name
438: *
439: * @return array
440: */
441: public function getConfig($componentName = 'sanitizer')
442: {
443: return $this->config->get(strtolower($componentName), []);
444: }
445:
446: /**
447: * registerExtensions()
448: *
449: * This sets up the shortcode processing that will be applied to text to be displayed
450: *
451: * @return void
452: */
453: protected function registerExtensions()
454: {
455: if (!$this->extensionsLoaded) {
456: $this->extensionsLoaded = true;
457: $extensions = $this->listExtensions();
458:
459: // we need xoopscode to be called first
460: $key = array_search('xoopscode', $extensions);
461: if ($key !== false) {
462: unset($extensions[$key]);
463: }
464: $this->registerExtension('xoopscode');
465:
466: foreach ($extensions as $extension) {
467: $this->registerExtension($extension);
468: }
469:
470: /**
471: * Register any custom shortcodes
472: *
473: * Listeners will be passed the ShortCodes object as the single argument, and should
474: * call $arg->addShortcode() to add any shortcodes
475: *
476: * NB: The last definition for a shortcode tag wins. Defining a shortcode here, with
477: * the same name as a standard system shortcode will override the system definition.
478: * This feature is very powerful, so play nice.
479: */
480: \Xoops::getInstance()->events()->triggerEvent('core.sanitizer.shortcodes.add', $this->shortcodes);
481: }
482: }
483:
484: /**
485: * Load a named component from specification in configuration
486: *
487: * @param string $name name of component to load
488: *
489: * @return object|null
490: */
491: protected function loadComponent($name)
492: {
493: $component = null;
494: $config = $this->getConfig($name);
495: if (isset($config['configured_class']) && class_exists($config['configured_class'])) {
496: $component = new $config['configured_class']($this);
497: }
498: return $component;
499: }
500:
501: /**
502: * Load an extension by name
503: *
504: * @param string $name extension name
505: *
506: * @return Sanitizer\ExtensionAbstract
507: */
508: protected function loadExtension($name)
509: {
510: $extension = $this->loadComponent($name);
511: if (!($extension instanceof Sanitizer\ExtensionAbstract)) {
512: $extension = new Sanitizer\NullExtension($this);
513: }
514: return $extension;
515: }
516:
517: /**
518: * Load a filter by name
519: *
520: * @param string $name name of filter to load
521: *
522: * @return Sanitizer\FilterAbstract
523: */
524: protected function loadFilter($name)
525: {
526: $filter = $this->loadComponent($name);
527: if (!($filter instanceof Sanitizer\FilterAbstract)) {
528: $filter = new Sanitizer\NullFilter($this);
529: }
530: return $filter;
531: }
532:
533: /**
534: * execute an extension
535: *
536: * @param string $name extension name
537: *
538: * @return mixed
539: */
540: protected function registerExtension($name)
541: {
542: $extension = $this->loadExtension($name);
543: $args = array_slice(func_get_args(), 1);
544: return call_user_func_array(array($extension, 'registerExtensionProcessing'), $args);
545: }
546:
547: /**
548: * execute a filter
549: *
550: * @param string $name extension name
551: *
552: * @return mixed
553: */
554: public function executeFilter($name)
555: {
556: $filter = $this->loadFilter($name);
557: $args = array_slice(func_get_args(), 1);
558: return call_user_func_array(array($filter, 'applyFilter'), $args);
559: }
560:
561: /**
562: * Filter out possible malicious text with the textfilter filter
563: *
564: * @param string $text text to filter
565: * @param bool $force force filtering
566: *
567: * @return string filtered text
568: */
569: public function textFilter($text, $force = false)
570: {
571: return $this->executeFilter('textfilter', $text, $force);
572: }
573:
574: /**
575: * Filter out possible malicious text with the xss filter
576: *
577: * @param string $text text to filter
578: *
579: * @return string filtered text
580: */
581: public function filterXss($text)
582: {
583: return $this->executeFilter('xss', $text);
584: }
585:
586: /**
587: * Test a string against an enumeration list.
588: *
589: * @param string $text string to check
590: * @param string[] $enumSet strings to match (case insensitive)
591: * @param string $default default value is no match
592: * @param bool $firstLetter match first letter only
593: *
594: * @return mixed matched string, or default if no match
595: */
596: public function cleanEnum($text, $enumSet, $default = '', $firstLetter = false)
597: {
598: if ($firstLetter) {
599: $test = strtolower(substr($text, 0, 1));
600: foreach ($enumSet as $enum) {
601: $match = strtolower(substr($enum, 0, 1));
602: if ($test === $match) {
603: return $enum;
604: }
605: }
606: } else {
607: foreach ($enumSet as $enum) {
608: if (0 === strcasecmp($text, $enum)) {
609: return $enum;
610: }
611: }
612: }
613: return $default;
614: }
615:
616: /**
617: * Force a component to be enabled.
618: *
619: * Note: This is intended to support testing, and is not recommended for any regular use
620: *
621: * @param string $name component to enable
622: */
623: public function enableComponentForTesting($name)
624: {
625: if ($this->config->has($name)) {
626: $this->config[$name]['enabled'] = true;
627: if($this->extensionsLoaded) {
628: $this->extensionsLoaded = false;
629: }
630: $this->registerExtensions();
631: }
632: }
633: }
634: