1: <?php
2: /*
3: You may not change or alter any portion of this comment or credits
4: of supporting developers from this source code or any supporting source code
5: which is considered copyrighted (c) material of the original comment or credit authors.
6:
7: This program is distributed in the hope that it will be useful,
8: but WITHOUT ANY WARRANTY; without even the implied warranty of
9: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10: */
11:
12: namespace Xmf;
13:
14: /**
15: * FilterInput is a class for filtering input from any data source
16: *
17: * Forked from the php input filter library by Daniel Morris
18: *
19: * Original Contributors: Gianpaolo Racca, Ghislain Picard,
20: * Marco Wandschneider, Chris Tobin and Andrew Eddie.
21: *
22: * @category Xmf\FilterInput
23: * @package Xmf
24: * @author Daniel Morris <dan@rootcube.com>
25: * @author Louis Landry <louis.landry@joomla.org>
26: * @author Grégory Mage (Aka Mage)
27: * @author trabis <lusopoemas@gmail.com>
28: * @author Richard Griffith <richard@geekwright.com>
29: * @copyright 2005 Daniel Morris
30: * @copyright 2005 - 2013 Open Source Matters, Inc. All rights reserved.
31: * @copyright 2011-2023 XOOPS Project (https://xoops.org)
32: * @license GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html)
33: * @link https://xoops.org
34: */
35: class FilterInput
36: {
37: protected $tagsArray; // default is empty array
38: protected $attrArray; // default is empty array
39:
40: protected $tagsMethod; // default is 0
41: protected $attrMethod; // default is 0
42:
43: protected $xssAuto; // default is 1
44: protected $tagBlacklist = array(
45: 'applet',
46: 'body',
47: 'bgsound',
48: 'base',
49: 'basefont',
50: 'embed',
51: 'frame',
52: 'frameset',
53: 'head',
54: 'html',
55: 'id',
56: 'iframe',
57: 'ilayer',
58: 'layer',
59: 'link',
60: 'meta',
61: 'name',
62: 'object',
63: 'script',
64: 'style',
65: 'title',
66: 'xml'
67: );
68: // also, it will strip ALL event handlers
69: protected $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc');
70:
71: /**
72: * Constructor
73: *
74: * @param array $tagsArray - list of user-defined tags
75: * @param array $attrArray - list of user-defined attributes
76: * @param int $tagsMethod - 0 = allow just user-defined, 1 = allow all but user-defined
77: * @param int $attrMethod - 0 = allow just user-defined, 1 = allow all but user-defined
78: * @param int $xssAuto - 0 = only auto clean essentials, 1 = allow clean blacklisted tags/attr
79: */
80: protected function __construct(
81: $tagsArray = array(),
82: $attrArray = array(),
83: $tagsMethod = 0,
84: $attrMethod = 0,
85: $xssAuto = 1
86: ) {
87: // make sure user defined arrays are in lowercase
88: $tagsArrayCount = count($tagsArray);
89: for ($i = 0; $i < $tagsArrayCount; ++$i) {
90: $tagsArray[$i] = strtolower($tagsArray[$i]);
91: }
92: $attrArrayCount = count($attrArray);
93: for ($i = 0; $i < $attrArrayCount; ++$i) {
94: $attrArray[$i] = strtolower($attrArray[$i]);
95: }
96: // assign to member vars
97: $this->tagsArray = (array) $tagsArray;
98: $this->attrArray = (array) $attrArray;
99: $this->tagsMethod = $tagsMethod;
100: $this->attrMethod = $attrMethod;
101: $this->xssAuto = $xssAuto;
102: }
103:
104: /**
105: * Returns an input filter object, only creating it if it does not already exist.
106: *
107: * This method must be invoked as:
108: * $filter = FilterInput::getInstance();
109: *
110: * @param array $tagsArray list of user-defined tags
111: * @param array $attrArray list of user-defined attributes
112: * @param int $tagsMethod WhiteList method = 0, BlackList method = 1
113: * @param int $attrMethod WhiteList method = 0, BlackList method = 1
114: * @param int $xssAuto Only auto clean essentials = 0,
115: * Allow clean blacklisted tags/attr = 1
116: *
117: * @return FilterInput object.
118: */
119: public static function getInstance(
120: $tagsArray = array(),
121: $attrArray = array(),
122: $tagsMethod = 0,
123: $attrMethod = 0,
124: $xssAuto = 1
125: ) {
126: static $instances;
127:
128: $className = get_called_class(); // so an extender gets an instance of itself
129:
130: $sig = md5(serialize(array($className, $tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
131:
132: if (!isset($instances)) {
133: $instances = array();
134: }
135:
136: if (empty($instances[$sig])) {
137: $instances[$sig] = new static($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
138: }
139:
140: return $instances[$sig];
141: }
142:
143: /**
144: * Method to be called by another php script. Processes for XSS and
145: * any specified bad code.
146: *
147: * @param mixed $source - input string/array-of-string to be 'cleaned'
148: *
149: * @return string|array $source - 'cleaned' version of input parameter
150: */
151: public function process($source)
152: {
153: if (is_array($source)) {
154: // clean all elements in this array
155: foreach ($source as $key => $value) {
156: // filter element for XSS and other 'bad' code etc.
157: if (is_string($value)) {
158: $source[$key] = $this->remove($this->decode($value));
159: }
160: }
161: return $source;
162: }
163: if (is_string($source)) {
164: // clean this string
165: return $this->remove($this->decode($source));
166: } else {
167: // return parameter as given
168: return $source;
169: }
170: }
171:
172: /**
173: * Static method to be called by another php script.
174: * Clean the supplied input using the default filter
175: *
176: * @param mixed $source Input string/array-of-string to be 'cleaned'
177: * @param string $type Return/cleaning type for the variable, one of
178: * (INTEGER, FLOAT, BOOLEAN, WORD, ALPHANUM, CMD, BASE64,
179: * STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
180: *
181: * @return mixed 'Cleaned' version of input parameter
182: * @static
183: */
184: public static function clean($source, $type = 'string')
185: {
186: static $filter = null;
187:
188: // need an instance for methods, since this is supposed to be static
189: // we must instantiate the class - this will take defaults
190: if (!is_object($filter)) {
191: $filter = static::getInstance();
192: }
193:
194: return $filter->cleanVar($source, $type);
195: }
196:
197: /**
198: * Method to be called by another php script. Processes for XSS and
199: * specified bad code according to rules supplied when this instance
200: * was instantiated.
201: *
202: * @param mixed $source Input string/array-of-string to be 'cleaned'
203: * @param string $type Return/cleaning type for the variable, one of
204: * (INTEGER, FLOAT, BOOLEAN, WORD, ALPHANUM, CMD, BASE64,
205: * STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
206: *
207: * @return mixed 'Cleaned' version of input parameter
208: * @static
209: */
210: public function cleanVar($source, $type = 'string')
211: {
212: // Handle the type constraint
213: switch (strtoupper($type)) {
214: case 'INT':
215: case 'INTEGER':
216: // Only use the first integer value
217: preg_match('/-?\d+/', (string) $source, $matches);
218: $result = isset($matches[0]) ? (int) $matches[0] : 0;
219: break;
220:
221: case 'FLOAT':
222: case 'DOUBLE':
223: // Only use the first floating point value
224: preg_match('/-?\d+(\.\d+)?/', (string) $source, $matches);
225: $result = isset($matches[0]) ? (float) $matches[0] : 0;
226: break;
227:
228: case 'BOOL':
229: case 'BOOLEAN':
230: $result = (bool) $source;
231: break;
232:
233: case 'WORD':
234: $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
235: break;
236:
237: case 'ALPHANUM':
238: case 'ALNUM':
239: $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
240: break;
241:
242: case 'CMD':
243: $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
244: $result = strtolower($result);
245: break;
246:
247: case 'BASE64':
248: $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
249: break;
250:
251: case 'STRING':
252: $result = (string) $this->process($source);
253: break;
254:
255: case 'ARRAY':
256: $result = (array) $this->process($source);
257: break;
258:
259: case 'PATH':
260: $source = trim((string) $source);
261: $pattern = '/^([-_\.\/A-Z0-9=&%?~]+)(.*)$/i';
262: preg_match($pattern, $source, $matches);
263: $result = isset($matches[1]) ? (string) $matches[1] : '';
264: break;
265:
266: case 'USERNAME':
267: $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
268: break;
269:
270: case 'WEBURL':
271: $result = (string) $this->process($source);
272: // allow only relative, http or https
273: $urlparts = parse_url($result);
274: if (!empty($urlparts['scheme'])
275: && !($urlparts['scheme'] === 'http' || $urlparts['scheme'] === 'https')
276: ) {
277: $result = '';
278: }
279: // do not allow quotes, tag brackets or controls
280: if (!preg_match('#^[^"<>\x00-\x1F]+$#', $result)) {
281: $result = '';
282: }
283: break;
284:
285: case 'EMAIL':
286: $result = (string) $source;
287: if (!filter_var((string) $source, FILTER_VALIDATE_EMAIL)) {
288: $result = '';
289: }
290: break;
291:
292: case 'IP':
293: $result = (string) $source;
294: // this may be too restrictive.
295: // Should the FILTER_FLAG_NO_PRIV_RANGE flag be excluded?
296: if (!filter_var((string) $source, FILTER_VALIDATE_IP)) {
297: $result = '';
298: }
299: break;
300:
301: default:
302: $result = $this->process($source);
303: break;
304: }
305:
306: return $result;
307: }
308:
309: /**
310: * Internal method to iteratively remove all unwanted tags and attributes
311: *
312: * @param String $source - input string to be 'cleaned'
313: *
314: * @return String $source - 'cleaned' version of input parameter
315: */
316: protected function remove($source)
317: {
318: $loopCounter = 0;
319: // provides nested-tag protection
320: while ($source != $this->filterTags($source)) {
321: $source = $this->filterTags($source);
322: ++$loopCounter;
323: }
324:
325: return $source;
326: }
327:
328: /**
329: * Internal method to strip a string of certain tags
330: *
331: * @param String $source - input string to be 'cleaned'
332: *
333: * @return String $source - 'cleaned' version of input parameter
334: */
335: protected function filterTags($source)
336: {
337: // filter pass setup
338: $preTag = null;
339: $postTag = $source;
340: // find initial tag's position
341: $tagOpen_start = strpos($source, '<');
342: // iterate through string until no tags left
343: while ($tagOpen_start !== false) {
344: // process tag iteratively
345: $preTag .= substr($postTag, 0, $tagOpen_start);
346: $postTag = substr($postTag, $tagOpen_start);
347: $fromTagOpen = substr($postTag, 1);
348: // end of tag
349: $tagOpen_end = strpos($fromTagOpen, '>');
350: if ($tagOpen_end === false) {
351: break;
352: }
353: // next start of tag (for nested tag assessment)
354: $tagOpen_nested = strpos($fromTagOpen, '<');
355: if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
356: $preTag .= substr($postTag, 0, ($tagOpen_nested + 1));
357: $postTag = substr($postTag, ($tagOpen_nested + 1));
358: $tagOpen_start = strpos($postTag, '<');
359: continue;
360: }
361: $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
362: $tagLength = strlen($currentTag);
363: if (!$tagOpen_end) {
364: $preTag .= $postTag;
365: }
366: // iterate through tag finding attribute pairs - setup
367: $tagLeft = $currentTag;
368: $attrSet = array();
369: $currentSpace = strpos($tagLeft, ' ');
370: if (substr($currentTag, 0, 1) === "/") {
371: // is end tag
372: $isCloseTag = true;
373: list($tagName) = explode(' ', $currentTag);
374: $tagName = substr($tagName, 1);
375: } else {
376: // is start tag
377: $isCloseTag = false;
378: list($tagName) = explode(' ', $currentTag);
379: }
380: // excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
381: if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName))
382: || (!$tagName)
383: || ((in_array(strtolower($tagName), $this->tagBlacklist))
384: && ($this->xssAuto))
385: ) {
386: $postTag = substr($postTag, ($tagLength + 2));
387: $tagOpen_start = strpos($postTag, '<');
388: // don't append this tag
389: continue;
390: }
391: // this while is needed to support attribute values with spaces in!
392: while ($currentSpace !== false) {
393: $fromSpace = substr($tagLeft, ($currentSpace + 1));
394: $nextSpace = strpos($fromSpace, ' ');
395: $openQuotes = strpos($fromSpace, '"');
396: $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
397: // another equals exists
398: if (strpos($fromSpace, '=') !== false) {
399: // opening and closing quotes exists
400: if (($openQuotes !== false)
401: && (strpos(substr($fromSpace, ($openQuotes + 1)), '"') !== false)
402: ) {
403: $attr = substr($fromSpace, 0, ($closeQuotes + 1));
404: } else {
405: $attr = substr($fromSpace, 0, $nextSpace);
406: }
407: // one or neither exist
408: } else {
409: // no more equals exist
410: $attr = substr($fromSpace, 0, $nextSpace);
411: }
412: // last attr pair
413: if (!$attr) {
414: $attr = $fromSpace;
415: }
416: // add to attribute pairs array
417: $attrSet[] = $attr;
418: // next inc
419: $tagLeft = substr($fromSpace, strlen($attr));
420: $currentSpace = strpos($tagLeft, ' ');
421: }
422: // appears in array specified by user
423: $tagFound = in_array(strtolower($tagName), $this->tagsArray);
424: // remove this tag on condition
425: if ($tagFound !== (bool) $this->tagsMethod) {
426: // reconstruct tag with allowed attributes
427: if (!$isCloseTag) {
428: $attrSet = $this->filterAttr($attrSet);
429: $preTag .= '<' . $tagName;
430: $attrSetCount = count($attrSet);
431: for ($i = 0; $i < $attrSetCount; ++$i) {
432: $preTag .= ' ' . $attrSet[$i];
433: }
434: // reformat single tags to XHTML
435: if (strpos($fromTagOpen, "</" . $tagName)) {
436: $preTag .= '>';
437: } else {
438: $preTag .= ' />';
439: }
440: } else {
441: // just the tagname
442: $preTag .= '</' . $tagName . '>';
443: }
444: }
445: // find next tag's start
446: $postTag = substr($postTag, ($tagLength + 2));
447: $tagOpen_start = strpos($postTag, '<');
448: }
449: // append any code after end of tags
450: $preTag .= $postTag;
451:
452: return $preTag;
453: }
454:
455: /**
456: * Internal method to strip a tag of certain attributes
457: *
458: * @param array $attrSet attributes
459: *
460: * @return array $newSet stripped attributes
461: */
462: protected function filterAttr($attrSet)
463: {
464: $newSet = array();
465: // process attributes
466: $attrSetCount = count($attrSet);
467: for ($i = 0; $i < $attrSetCount; ++$i) {
468: // skip blank spaces in tag
469: if (!$attrSet[$i]) {
470: continue;
471: }
472: // split into attr name and value
473: $attrSubSet = explode('=', trim($attrSet[$i]));
474: list($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
475: // removes all "non-regular" attr names AND also attr blacklisted
476: if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
477: || (($this->xssAuto)
478: && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
479: || (substr($attrSubSet[0], 0, 2) === 'on')))
480: ) {
481: continue;
482: }
483: // xss attr value filtering
484: if ($attrSubSet[1]) {
485: // strips unicode, hex, etc
486: $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
487: // strip normal newline within attr value
488: $attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
489: // strip double quotes
490: $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
491: // [requested feature] convert single quotes from either side to doubles
492: // (Single quotes shouldn't be used to pad attr value)
493: if ((substr($attrSubSet[1], 0, 1) === "'")
494: && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) === "'")
495: ) {
496: $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
497: }
498: // strip slashes
499: $attrSubSet[1] = stripslashes($attrSubSet[1]);
500: }
501: // auto strip attr's with "javascript:
502: if (((strpos(strtolower($attrSubSet[1]), 'expression') !== false)
503: && (strtolower($attrSubSet[0]) === 'style')) ||
504: (strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
505: (strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
506: (strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
507: (strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
508: (strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
509: ) {
510: continue;
511: }
512:
513: // if matches user defined array
514: $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
515: // keep this attr on condition
516: if ($attrFound !== (bool) $this->attrMethod) {
517: if ($attrSubSet[1]) {
518: // attr has value
519: $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
520: } elseif ($attrSubSet[1] == "0") {
521: // attr has decimal zero as value
522: $newSet[] = $attrSubSet[0] . '="0"';
523: } else {
524: // reformat single attributes to XHTML
525: $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
526: }
527: }
528: }
529:
530: return $newSet;
531: }
532:
533: /**
534: * Try to convert to plaintext
535: *
536: * @param String $source string to decode
537: *
538: * @return String $source decoded
539: */
540: protected function decode($source)
541: {
542: // url decode
543: $charset = defined('_CHARSET') ? constant('_CHARSET') : 'utf-8';
544: $source = html_entity_decode($source, ENT_QUOTES, $charset);
545: // convert decimal
546: $source = preg_replace_callback(
547: '/&#(\d+);/m',
548: function ($matches) {
549: return chr($matches[1]);
550: },
551: $source
552: );
553: // convert hex notation
554: $source = preg_replace_callback(
555: '/&#x([a-f0-9]+);/mi',
556: function ($matches) {
557: return chr('0x' . $matches[1]);
558: },
559: $source
560: );
561:
562: return $source;
563: }
564: }
565: