1: <?php
2: /*
3: You may not change or alter any portion of this comment or credits
4: of supporting developers from this source code or any supporting source code
5: which is considered copyrighted (c) material of the original comment or credit authors.
6:
7: This program is distributed in the hope that it will be useful,
8: but WITHOUT ANY WARRANTY; without even the implied warranty of
9: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10: */
11:
12: namespace Xmf;
13:
14: /**
15: * FilterInput is a class for filtering input from any data source
16: *
17: * Forked from the php input filter library by Daniel Morris
18: *
19: * Original Contributors: Gianpaolo Racca, Ghislain Picard,
20: * Marco Wandschneider, Chris Tobin and Andrew Eddie.
21: *
22: * @category Xmf\FilterInput
23: * @package Xmf
24: * @author Daniel Morris <dan@rootcube.com>
25: * @author Louis Landry <louis.landry@joomla.org>
26: * @author Grégory Mage (Aka Mage)
27: * @author trabis <lusopoemas@gmail.com>
28: * @author Richard Griffith <richard@geekwright.com>
29: * @copyright 2005 Daniel Morris
30: * @copyright 2005 - 2013 Open Source Matters, Inc. All rights reserved.
31: * @copyright 2011-2016 XOOPS Project (http://xoops.org)
32: * @license GNU GPL 2 or later (http://www.gnu.org/licenses/gpl-2.0.html)
33: * @link http://xoops.org
34: */
35: class FilterInput
36: {
37: protected $tagsArray; // default is empty array
38: protected $attrArray; // default is empty array
39:
40: protected $tagsMethod; // default is 0
41: protected $attrMethod; // default is 0
42:
43: protected $xssAuto; // default is 1
44: protected $tagBlacklist = array(
45: 'applet',
46: 'body',
47: 'bgsound',
48: 'base',
49: 'basefont',
50: 'embed',
51: 'frame',
52: 'frameset',
53: 'head',
54: 'html',
55: 'id',
56: 'iframe',
57: 'ilayer',
58: 'layer',
59: 'link',
60: 'meta',
61: 'name',
62: 'object',
63: 'script',
64: 'style',
65: 'title',
66: 'xml'
67: );
68: // also will strip ALL event handlers
69: protected $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc');
70:
71: /**
72: * Constructor
73: *
74: * @param array $tagsArray - list of user-defined tags
75: * @param array $attrArray - list of user-defined attributes
76: * @param int $tagsMethod - 0 = allow just user-defined, 1 = allow all but user-defined
77: * @param int $attrMethod - 0 = allow just user-defined, 1 = allow all but user-defined
78: * @param int $xssAuto - 0 = only auto clean essentials, 1 = allow clean blacklisted tags/attr
79: */
80: protected function __construct(
81: $tagsArray = array(),
82: $attrArray = array(),
83: $tagsMethod = 0,
84: $attrMethod = 0,
85: $xssAuto = 1
86: ) {
87: // make sure user defined arrays are in lowercase
88: $tagsArrayCount = count($tagsArray);
89: for ($i = 0; $i < $tagsArrayCount; ++$i) {
90: $tagsArray[$i] = strtolower($tagsArray[$i]);
91: }
92: $attrArrayCount = count($attrArray);
93: for ($i = 0; $i < $attrArrayCount; ++$i) {
94: $attrArray[$i] = strtolower($attrArray[$i]);
95: }
96: // assign to member vars
97: $this->tagsArray = (array) $tagsArray;
98: $this->attrArray = (array) $attrArray;
99: $this->tagsMethod = $tagsMethod;
100: $this->attrMethod = $attrMethod;
101: $this->xssAuto = $xssAuto;
102: }
103:
104: /**
105: * Returns an input filter object, only creating it if it does not already exist.
106: *
107: * This method must be invoked as:
108: * $filter = FilterInput::getInstance();
109: *
110: * @param array $tagsArray list of user-defined tags
111: * @param array $attrArray list of user-defined attributes
112: * @param int $tagsMethod WhiteList method = 0, BlackList method = 1
113: * @param int $attrMethod WhiteList method = 0, BlackList method = 1
114: * @param int $xssAuto Only auto clean essentials = 0,
115: * Allow clean blacklisted tags/attr = 1
116: *
117: * @return FilterInput object.
118: */
119: public static function getInstance(
120: $tagsArray = array(),
121: $attrArray = array(),
122: $tagsMethod = 0,
123: $attrMethod = 0,
124: $xssAuto = 1
125: ) {
126: static $instances;
127:
128: $className = get_called_class(); // so an extender gets an instance of itself
129:
130: $sig = md5(serialize(array($className, $tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
131:
132: if (!isset($instances)) {
133: $instances = array();
134: }
135:
136: if (empty($instances[$sig])) {
137: $instances[$sig] = new static($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
138: }
139:
140: return $instances[$sig];
141: }
142:
143: /**
144: * Method to be called by another php script. Processes for XSS and
145: * any specified bad code.
146: *
147: * @param mixed $source - input string/array-of-string to be 'cleaned'
148: *
149: * @return string $source - 'cleaned' version of input parameter
150: */
151: public function process($source)
152: {
153: if (is_array($source)) {
154: // clean all elements in this array
155: foreach ($source as $key => $value) {
156: // filter element for XSS and other 'bad' code etc.
157: if (is_string($value)) {
158: $source[$key] = $this->remove($this->decode($value));
159: }
160: }
161: return $source;
162: } elseif (is_string($source)) {
163: // clean this string
164: return $this->remove($this->decode($source));
165: } else {
166: // return parameter as given
167: return $source;
168: }
169: }
170:
171: /**
172: * Static method to be called by another php script.
173: * Clean the supplied input using the default filter
174: *
175: * @param mixed $source Input string/array-of-string to be 'cleaned'
176: * @param string $type Return/cleaning type for the variable, one of
177: * (INTEGER, FLOAT, BOOLEAN, WORD, ALPHANUM, CMD, BASE64,
178: * STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
179: *
180: * @return mixed 'Cleaned' version of input parameter
181: * @static
182: */
183: public static function clean($source, $type = 'string')
184: {
185: static $filter = null;
186:
187: // need an instance for methods, since this is supposed to be static
188: // we must instantiate the class - this will take defaults
189: if (!is_object($filter)) {
190: $filter = static::getInstance();
191: }
192:
193: return $filter->cleanVar($source, $type);
194: }
195:
196: /**
197: * Method to be called by another php script. Processes for XSS and
198: * specified bad code according to rules supplied when this instance
199: * was instantiated.
200: *
201: * @param mixed $source Input string/array-of-string to be 'cleaned'
202: * @param string $type Return/cleaning type for the variable, one of
203: * (INTEGER, FLOAT, BOOLEAN, WORD, ALPHANUM, CMD, BASE64,
204: * STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
205: *
206: * @return mixed 'Cleaned' version of input parameter
207: * @static
208: */
209: public function cleanVar($source, $type = 'string')
210: {
211: // Handle the type constraint
212: switch (strtoupper($type)) {
213: case 'INT':
214: case 'INTEGER':
215: // Only use the first integer value
216: preg_match('/-?\d+/', (string) $source, $matches);
217: $result = @ (int) $matches[0];
218: break;
219:
220: case 'FLOAT':
221: case 'DOUBLE':
222: // Only use the first floating point value
223: preg_match('/-?\d+(\.\d+)?/', (string) $source, $matches);
224: $result = @ (float) $matches[0];
225: break;
226:
227: case 'BOOL':
228: case 'BOOLEAN':
229: $result = (bool) $source;
230: break;
231:
232: case 'WORD':
233: $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
234: break;
235:
236: case 'ALPHANUM':
237: case 'ALNUM':
238: $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
239: break;
240:
241: case 'CMD':
242: $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
243: $result = strtolower($result);
244: break;
245:
246: case 'BASE64':
247: $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
248: break;
249:
250: case 'STRING':
251: $result = (string) $this->process($source);
252: break;
253:
254: case 'ARRAY':
255: $result = (array) $this->process($source);
256: break;
257:
258: case 'PATH':
259: $source = trim((string) $source);
260: $pattern = '/^([-_\.\/A-Z0-9=&%?~]+)(.*)$/i';
261: preg_match($pattern, $source, $matches);
262: $result = @ (string) $matches[1];
263: break;
264:
265: case 'USERNAME':
266: $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
267: break;
268:
269: case 'WEBURL':
270: $result = (string) $this->process($source);
271: // allow only relative, http or https
272: $urlparts = parse_url($result);
273: if (!empty($urlparts['scheme'])
274: && !($urlparts['scheme'] === 'http' || $urlparts['scheme'] === 'https')
275: ) {
276: $result = '';
277: }
278: // do not allow quotes, tag brackets or controls
279: if (!preg_match('#^[^"<>\x00-\x1F]+$#', $result)) {
280: $result = '';
281: }
282: break;
283:
284: case 'EMAIL':
285: $result = (string) $source;
286: if (!filter_var((string) $source, FILTER_VALIDATE_EMAIL)) {
287: $result = '';
288: }
289: break;
290:
291: case 'IP':
292: $result = (string) $source;
293: // this may be too restrictive.
294: // Should the FILTER_FLAG_NO_PRIV_RANGE flag be excluded?
295: if (!filter_var((string) $source, FILTER_VALIDATE_IP)) {
296: $result = '';
297: }
298: break;
299:
300: default:
301: $result = $this->process($source);
302: break;
303: }
304:
305: return $result;
306: }
307:
308: /**
309: * Internal method to iteratively remove all unwanted tags and attributes
310: *
311: * @param String $source - input string to be 'cleaned'
312: *
313: * @return String $source - 'cleaned' version of input parameter
314: */
315: protected function remove($source)
316: {
317: $loopCounter = 0;
318: // provides nested-tag protection
319: while ($source != $this->filterTags($source)) {
320: $source = $this->filterTags($source);
321: ++$loopCounter;
322: }
323:
324: return $source;
325: }
326:
327: /**
328: * Internal method to strip a string of certain tags
329: *
330: * @param String $source - input string to be 'cleaned'
331: *
332: * @return String $source - 'cleaned' version of input parameter
333: */
334: protected function filterTags($source)
335: {
336: // filter pass setup
337: $preTag = null;
338: $postTag = $source;
339: // find initial tag's position
340: $tagOpen_start = strpos($source, '<');
341: // iterate through string until no tags left
342: while ($tagOpen_start !== false) {
343: // process tag iteratively
344: $preTag .= substr($postTag, 0, $tagOpen_start);
345: $postTag = substr($postTag, $tagOpen_start);
346: $fromTagOpen = substr($postTag, 1);
347: // end of tag
348: $tagOpen_end = strpos($fromTagOpen, '>');
349: if ($tagOpen_end === false) {
350: break;
351: }
352: // next start of tag (for nested tag assessment)
353: $tagOpen_nested = strpos($fromTagOpen, '<');
354: if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
355: $preTag .= substr($postTag, 0, ($tagOpen_nested + 1));
356: $postTag = substr($postTag, ($tagOpen_nested + 1));
357: $tagOpen_start = strpos($postTag, '<');
358: continue;
359: }
360: $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
361: $tagLength = strlen($currentTag);
362: if (!$tagOpen_end) {
363: $preTag .= $postTag;
364: }
365: // iterate through tag finding attribute pairs - setup
366: $tagLeft = $currentTag;
367: $attrSet = array();
368: $currentSpace = strpos($tagLeft, ' ');
369: if (substr($currentTag, 0, 1) === "/") {
370: // is end tag
371: $isCloseTag = true;
372: list($tagName) = explode(' ', $currentTag);
373: $tagName = substr($tagName, 1);
374: } else {
375: // is start tag
376: $isCloseTag = false;
377: list($tagName) = explode(' ', $currentTag);
378: }
379: // excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
380: if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName))
381: || (!$tagName)
382: || ((in_array(strtolower($tagName), $this->tagBlacklist))
383: && ($this->xssAuto))
384: ) {
385: $postTag = substr($postTag, ($tagLength + 2));
386: $tagOpen_start = strpos($postTag, '<');
387: // don't append this tag
388: continue;
389: }
390: // this while is needed to support attribute values with spaces in!
391: while ($currentSpace !== false) {
392: $fromSpace = substr($tagLeft, ($currentSpace + 1));
393: $nextSpace = strpos($fromSpace, ' ');
394: $openQuotes = strpos($fromSpace, '"');
395: $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
396: // another equals exists
397: if (strpos($fromSpace, '=') !== false) {
398: // opening and closing quotes exists
399: if (($openQuotes !== false)
400: && (strpos(substr($fromSpace, ($openQuotes + 1)), '"') !== false)
401: ) {
402: $attr = substr($fromSpace, 0, ($closeQuotes + 1));
403: } else {
404: $attr = substr($fromSpace, 0, $nextSpace);
405: }
406: // one or neither exist
407: } else {
408: // no more equals exist
409: $attr = substr($fromSpace, 0, $nextSpace);
410: }
411: // last attr pair
412: if (!$attr) {
413: $attr = $fromSpace;
414: }
415: // add to attribute pairs array
416: $attrSet[] = $attr;
417: // next inc
418: $tagLeft = substr($fromSpace, strlen($attr));
419: $currentSpace = strpos($tagLeft, ' ');
420: }
421: // appears in array specified by user
422: $tagFound = in_array(strtolower($tagName), $this->tagsArray);
423: // remove this tag on condition
424: if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
425: // reconstruct tag with allowed attributes
426: if (!$isCloseTag) {
427: $attrSet = $this->filterAttr($attrSet);
428: $preTag .= '<' . $tagName;
429: $attrSetCount = count($attrSet);
430: for ($i = 0; $i < $attrSetCount; ++$i) {
431: $preTag .= ' ' . $attrSet[$i];
432: }
433: // reformat single tags to XHTML
434: if (strpos($fromTagOpen, "</" . $tagName)) {
435: $preTag .= '>';
436: } else {
437: $preTag .= ' />';
438: }
439: } else {
440: // just the tagname
441: $preTag .= '</' . $tagName . '>';
442: }
443: }
444: // find next tag's start
445: $postTag = substr($postTag, ($tagLength + 2));
446: $tagOpen_start = strpos($postTag, '<');
447: }
448: // append any code after end of tags
449: $preTag .= $postTag;
450:
451: return $preTag;
452: }
453:
454: /**
455: * Internal method to strip a tag of certain attributes
456: *
457: * @param array $attrSet attributes
458: *
459: * @return array $newSet stripped attributes
460: */
461: protected function filterAttr($attrSet)
462: {
463: $newSet = array();
464: // process attributes
465: $attrSetCount = count($attrSet);
466: for ($i = 0; $i < $attrSetCount; ++$i) {
467: // skip blank spaces in tag
468: if (!$attrSet[$i]) {
469: continue;
470: }
471: // split into attr name and value
472: $attrSubSet = explode('=', trim($attrSet[$i]));
473: list($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
474: // removes all "non-regular" attr names AND also attr blacklisted
475: if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
476: || (($this->xssAuto)
477: && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
478: || (substr($attrSubSet[0], 0, 2) === 'on')))
479: ) {
480: continue;
481: }
482: // xss attr value filtering
483: if ($attrSubSet[1]) {
484: // strips unicode, hex, etc
485: $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
486: // strip normal newline within attr value
487: $attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
488: // strip double quotes
489: $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
490: // [requested feature] convert single quotes from either side to doubles
491: // (Single quotes shouldn't be used to pad attr value)
492: if ((substr($attrSubSet[1], 0, 1) === "'")
493: && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) === "'")
494: ) {
495: $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
496: }
497: // strip slashes
498: $attrSubSet[1] = stripslashes($attrSubSet[1]);
499: }
500: // auto strip attr's with "javascript:
501: if (((strpos(strtolower($attrSubSet[1]), 'expression') !== false)
502: && (strtolower($attrSubSet[0]) === 'style')) ||
503: (strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
504: (strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
505: (strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
506: (strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
507: (strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
508: ) {
509: continue;
510: }
511:
512: // if matches user defined array
513: $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
514: // keep this attr on condition
515: if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
516: if ($attrSubSet[1]) {
517: // attr has value
518: $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
519: } elseif ($attrSubSet[1] == "0") {
520: // attr has decimal zero as value
521: $newSet[] = $attrSubSet[0] . '="0"';
522: } else {
523: // reformat single attributes to XHTML
524: $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
525: }
526: }
527: }
528:
529: return $newSet;
530: }
531:
532: /**
533: * Try to convert to plaintext
534: *
535: * @param String $source string to decode
536: *
537: * @return String $source decoded
538: */
539: protected function decode($source)
540: {
541: // url decode
542: $charset = defined('_CHARSET') ? constant('_CHARSET') : 'utf-8';
543: $source = html_entity_decode($source, ENT_QUOTES, $charset);
544: // convert decimal
545: $source = preg_replace_callback(
546: '/&#(\d+);/m',
547: function ($matches) {
548: return chr($matches[1]);
549: },
550: $source
551: );
552: // convert hex notation
553: $source = preg_replace_callback(
554: '/&#x([a-f0-9]+);/mi',
555: function ($matches) {
556: return chr('0x' . $matches[1]);
557: },
558: $source
559: );
560:
561: return $source;
562: }
563: }
564: