1: <?php
2: /*
3: You may not change or alter any portion of this comment or credits
4: of supporting developers from this source code or any supporting source code
5: which is considered copyrighted (c) material of the original comment or credit authors.
6:
7: This program is distributed in the hope that it will be useful,
8: but WITHOUT ANY WARRANTY; without even the implied warranty of
9: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10: */
11:
12: namespace Xoops\Core;
13:
14: /**
15: * XoopsFilterInput is a class for filtering input from any data source
16: *
17: * Forked from the php input filter library by Daniel Morris
18: *
19: * Original Contributors: Gianpaolo Racca, Ghislain Picard,
20: * Marco Wandschneider, Chris Tobin and Andrew Eddie.
21: *
22: * @category Xoops\Core\FilterInput
23: * @package Xoops\Core
24: * @author Daniel Morris <dan@rootcube.com>
25: * @author Louis Landry <louis.landry@joomla.org>
26: * @author Grégory Mage (Aka Mage)
27: * @author trabis <lusopoemas@gmail.com>
28: * @author Richard Griffith <richard@geekwright.com>
29: * @copyright 2005 Daniel Morris
30: * @copyright 2005 - 2013 Open Source Matters, Inc. All rights reserved.
31: * @copyright 2011-2015 XOOPS Project (http://xoops.org)
32: * @license GNU GPL 2 or later (http://www.gnu.org/licenses/gpl-2.0.html)
33: * @version Release: 1.0
34: * @link http://xoops.org
35: * @since 2.5.7
36: */
37: class FilterInput
38: {
39: protected $tagsArray; // default = empty array
40: protected $attrArray; // default = empty array
41:
42: protected $tagsMethod; // default = 0
43: protected $attrMethod; // default = 0
44:
45: protected $xssAuto; // default = 1
46: protected $tagBlacklist = array(
47: 'applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame',
48: 'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer',
49: 'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml'
50: );
51: // also will strip ALL event handlers
52: protected $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc');
53:
54: /**
55: * Constructor
56: *
57: * @param Array $tagsArray - list of user-defined tags
58: * @param Array $attrArray - list of user-defined attributes
59: * @param int $tagsMethod - 0 = allow just user-defined, 1 = allow all but user-defined
60: * @param int $attrMethod - 0 = allow just user-defined, 1 = allow all but user-defined
61: * @param int $xssAuto - 0 = only auto clean essentials, 1 = allow clean blacklisted tags/attr
62: */
63: public function __construct(
64: $tagsArray = array(),
65: $attrArray = array(),
66: $tagsMethod = 0,
67: $attrMethod = 0,
68: $xssAuto = 1
69: ) {
70: // make sure user defined arrays are in lowercase
71: $countTagsArray = count($tagsArray);
72: for ($i = 0; $i < $countTagsArray; ++$i) {
73: $tagsArray[$i] = strtolower($tagsArray[$i]);
74: }
75: $countAttrArray = count($attrArray);
76: for ($i = 0; $i < $countAttrArray; ++$i) {
77: $attrArray[$i] = strtolower($attrArray[$i]);
78: }
79: // assign to member vars
80: $this->tagsArray = (array) $tagsArray;
81: $this->attrArray = (array) $attrArray;
82: $this->tagsMethod = $tagsMethod;
83: $this->attrMethod = $attrMethod;
84: $this->xssAuto = $xssAuto;
85: }
86:
87: /**
88: * Returns a reference to an input filter object, only creating it if it doesn't already exist.
89: *
90: * This method must be invoked as:
91: * $filter = & XoopsFilterInput::getInstance();
92: *
93: * @param array $tagsArray list of user-defined tags
94: * @param array $attrArray list of user-defined attributes
95: * @param int $tagsMethod WhiteList method = 0, BlackList method = 1
96: * @param int $attrMethod WhiteList method = 0, BlackList method = 1
97: * @param int $xssAuto Only auto clean essentials = 0,
98: * Allow clean blacklisted tags/attr = 1
99: *
100: * @return XoopsFilterInput object.
101: * @since 1.5
102: * @static
103: */
104: public static function getInstance(
105: $tagsArray = array(),
106: $attrArray = array(),
107: $tagsMethod = 0,
108: $attrMethod = 0,
109: $xssAuto = 1
110: ) {
111: static $instances;
112:
113: $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
114:
115: if (!isset ($instances)) {
116: $instances = array();
117: }
118:
119: if (empty ($instances[$sig])) {
120: $classname = __CLASS__ ;
121: $instances[$sig] = new $classname ($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
122: }
123:
124: return $instances[$sig];
125: }
126:
127: /**
128: * Method to be called by another php script. Processes for XSS and
129: * any specified bad code.
130: *
131: * @param Mixed $source - input string/array-of-string to be 'cleaned'
132: *
133: * @return String $source - 'cleaned' version of input parameter
134: */
135: public function process($source)
136: {
137: if (is_array($source)) {
138: // clean all elements in this array
139: foreach ($source as $key => $value) {
140: // filter element for XSS and other 'bad' code etc.
141: if (is_string($value)) {
142: $source[$key] = $this->remove($this->decode($value));
143: }
144: }
145: return $source;
146: } elseif (is_string($source)) {
147: // clean this string
148: return $this->remove($this->decode($source));
149: } else {
150: // return parameter as given
151: return $source;
152: }
153: }
154:
155: /**
156: * Method to be called by another php script. Processes for XSS and
157: * specified bad code.
158: *
159: * @param mixed $source Input string/array-of-string to be 'cleaned'
160: * @param string $type Return/cleaning type for the variable, one of
161: * (INTEGER, FLOAT, BOOLEAN, WORD, ALNUM, CMD, BASE64,
162: * STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
163: *
164: * @return mixed 'Cleaned' version of input parameter
165: * @static
166: */
167: public static function clean($source, $type = 'string')
168: {
169: static $filter = null;
170:
171: // need an instance for methods, since this is supposed to be static
172: // we must instantiate the class - this will take defaults
173: if (!is_object($filter)) {
174: $classname = get_called_class() ;
175: $filter = $classname::getInstance();
176: }
177:
178: // Handle the type constraint
179: switch (strtoupper($type)) {
180: case 'INT':
181: case 'INTEGER':
182: // Only use the first integer value
183: preg_match('/-?[0-9]+/', (string) $source, $matches);
184: $result = @ (int) $matches[0];
185: break;
186:
187: case 'FLOAT':
188: case 'DOUBLE':
189: // Only use the first floating point value
190: preg_match('/-?[0-9]+(\.[0-9]+)?/', (string) $source, $matches);
191: $result = @ (float) $matches[0];
192: break;
193:
194: case 'BOOL':
195: case 'BOOLEAN':
196: $result = (bool) $source;
197: break;
198:
199: case 'WORD':
200: $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
201: break;
202:
203: case 'ALNUM':
204: $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
205: break;
206:
207: case 'CMD':
208: $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
209: $result = strtolower($result);
210: break;
211:
212: case 'BASE64':
213: $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
214: break;
215:
216: case 'STRING':
217: $result = (string) $filter->process($source);
218: break;
219:
220: case 'ARRAY':
221: $result = (array) $filter->process($source);
222: break;
223:
224: case 'PATH':
225: $source = trim((string) $source);
226: $pattern = '/^([-_\.\/A-Z0-9=&%?~]+)(.*)$/i';
227: preg_match($pattern, $source, $matches);
228: $result = @ (string) $matches[1];
229: break;
230:
231: case 'USERNAME':
232: $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
233: break;
234:
235: case 'WEBURL':
236: $result = (string) $filter->process($source);
237: // allow only relative, http or https
238: $urlparts=parse_url($result);
239: if (!empty($urlparts['scheme'])
240: && !($urlparts['scheme']==='http' || $urlparts['scheme']==='https')
241: ) {
242: $result='';
243: }
244: // do not allow quotes, tag brackets or controls
245: if (!preg_match('#^[^"<>\x00-\x1F]+$#', $result)) {
246: $result='';
247: }
248: break;
249:
250: case 'EMAIL':
251: $result = (string) $source;
252: if (!filter_var((string) $source, FILTER_VALIDATE_EMAIL)) {
253: $result = '';
254: }
255: break;
256:
257: case 'IP':
258: $result = (string) $source;
259: // this may be too restrictive.
260: // Should the FILTER_FLAG_NO_PRIV_RANGE flag be excluded?
261: if (!filter_var((string) $source, FILTER_VALIDATE_IP)) {
262: $result = '';
263: }
264: break;
265:
266: default:
267: $result = $filter->process($source);
268: break;
269: }
270:
271: return $result;
272: }
273:
274: /**
275: * Internal method to iteratively remove all unwanted tags and attributes
276: *
277: * @param String $source - input string to be 'cleaned'
278: *
279: * @return String $source - 'cleaned' version of input parameter
280: */
281: protected function remove($source)
282: {
283: $loopCounter=0;
284: // provides nested-tag protection
285: while ($source != $this->filterTags($source)) {
286: $source = $this->filterTags($source);
287: ++$loopCounter;
288: }
289:
290: return $source;
291: }
292:
293: /**
294: * Internal method to strip a string of certain tags
295: *
296: * @param String $source - input string to be 'cleaned'
297: *
298: * @return String $source - 'cleaned' version of input parameter
299: */
300: protected function filterTags($source)
301: {
302: // filter pass setup
303: $preTag = null;
304: $postTag = $source;
305: // find initial tag's position
306: $tagOpen_start = strpos($source, '<');
307: // interate through string until no tags left
308: while ($tagOpen_start !== false) {
309: // process tag interatively
310: $preTag .= substr($postTag, 0, $tagOpen_start);
311: $postTag = substr($postTag, $tagOpen_start);
312: $fromTagOpen = substr($postTag, 1);
313: // end of tag
314: $tagOpen_end = strpos($fromTagOpen, '>');
315: if ($tagOpen_end === false) {
316: break;
317: }
318: // next start of tag (for nested tag assessment)
319: $tagOpen_nested = strpos($fromTagOpen, '<');
320: if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
321: $preTag .= substr($postTag, 0, ($tagOpen_nested+1));
322: $postTag = substr($postTag, ($tagOpen_nested+1));
323: $tagOpen_start = strpos($postTag, '<');
324: continue;
325: }
326: $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start + 1);
327: $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
328: $tagLength = strlen($currentTag);
329: if (!$tagOpen_end) {
330: $preTag .= $postTag;
331: $tagOpen_start = strpos($postTag, '<');
332: }
333: // iterate through tag finding attribute pairs - setup
334: $tagLeft = $currentTag;
335: $attrSet = array();
336: $currentSpace = strpos($tagLeft, ' ');
337: if (substr($currentTag, 0, 1) === "/") {
338: // is end tag
339: $isCloseTag = true;
340: list($tagName) = explode(' ', $currentTag);
341: $tagName = substr($tagName, 1);
342: } else {
343: // is start tag
344: $isCloseTag = false;
345: list($tagName) = explode(' ', $currentTag);
346: }
347: // excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
348: if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName))
349: || (!$tagName)
350: || ((in_array(strtolower($tagName), $this->tagBlacklist))
351: && ($this->xssAuto))
352: ) {
353: $postTag = substr($postTag, ($tagLength + 2));
354: $tagOpen_start = strpos($postTag, '<');
355: // don't append this tag
356: continue;
357: }
358: // this while is needed to support attribute values with spaces in!
359: while ($currentSpace !== false) {
360: $fromSpace = substr($tagLeft, ($currentSpace+1));
361: $nextSpace = strpos($fromSpace, ' ');
362: $openQuotes = strpos($fromSpace, '"');
363: $closeQuotes = strpos(substr($fromSpace, ($openQuotes+1)), '"') + $openQuotes + 1;
364: // another equals exists
365: if (strpos($fromSpace, '=') !== false) {
366: // opening and closing quotes exists
367: if (($openQuotes !== false)
368: && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== false)
369: ) {
370: $attr = substr($fromSpace, 0, ($closeQuotes+1));
371: } else {
372: $attr = substr($fromSpace, 0, $nextSpace);
373: }
374: // one or neither exist
375:
376: } else {
377: // no more equals exist
378: $attr = substr($fromSpace, 0, $nextSpace);
379: }
380: // last attr pair
381: if (!$attr) {
382: $attr = $fromSpace;
383: }
384: // add to attribute pairs array
385: $attrSet[] = $attr;
386: // next inc
387: $tagLeft = substr($fromSpace, strlen($attr));
388: $currentSpace = strpos($tagLeft, ' ');
389: }
390: // appears in array specified by user
391: $tagFound = in_array(strtolower($tagName), $this->tagsArray);
392: // remove this tag on condition
393: if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
394: // reconstruct tag with allowed attributes
395: if (!$isCloseTag) {
396: $attrSet = $this->filterAttr($attrSet);
397: $preTag .= '<' . $tagName;
398: $countAttrSet = count($attrSet);
399: for ($i = 0; $i < $countAttrSet; ++$i) {
400: $preTag .= ' ' . $attrSet[$i];
401: }
402: // reformat single tags to XHTML
403: if (strpos($fromTagOpen, "</" . $tagName)) {
404: $preTag .= '>';
405: } else {
406: $preTag .= ' />';
407: }
408: } else {
409: // just the tagname
410: $preTag .= '</' . $tagName . '>';
411: }
412: }
413: // find next tag's start
414: $postTag = substr($postTag, ($tagLength + 2));
415: $tagOpen_start = strpos($postTag, '<');
416: }
417: // append any code after end of tags
418: $preTag .= $postTag;
419:
420: return $preTag;
421: }
422:
423: /**
424: * Internal method to strip a tag of certain attributes
425: *
426: * @param array $attrSet attributes
427: *
428: * @return Array $newSet stripped attributes
429: */
430: protected function filterAttr($attrSet)
431: {
432: $newSet = array();
433: // process attributes
434: $countAttrSet = count($attrSet);
435: for ($i = 0; $i < $countAttrSet; ++$i) {
436: // skip blank spaces in tag
437: if (!$attrSet[$i]) {
438: continue;
439: }
440: // split into attr name and value
441: $attrSubSet = explode('=', trim($attrSet[$i]));
442: list($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
443: // removes all "non-regular" attr names AND also attr blacklisted
444: if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
445: || (($this->xssAuto)
446: && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
447: || (substr($attrSubSet[0], 0, 2) === 'on')))
448: ) {
449: continue;
450: }
451: // xss attr value filtering
452: if ($attrSubSet[1]) {
453: // strips unicode, hex, etc
454: $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
455: // strip normal newline within attr value
456: $attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
457: // strip double quotes
458: $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
459: // [requested feature] convert single quotes from either side to doubles
460: // (Single quotes shouldn't be used to pad attr value)
461: if ((substr($attrSubSet[1], 0, 1) === "'")
462: && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) === "'")
463: ) {
464: $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
465: }
466: // strip slashes
467: $attrSubSet[1] = stripslashes($attrSubSet[1]);
468: }
469: // auto strip attr's with "javascript:
470: if (((strpos(strtolower($attrSubSet[1]), 'expression') !== false)
471: && (strtolower($attrSubSet[0]) === 'style')) ||
472: (strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
473: (strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
474: (strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
475: (strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
476: (strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
477: ) {
478: continue;
479: }
480:
481: // if matches user defined array
482: $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
483: // keep this attr on condition
484: if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
485: if ($attrSubSet[1]) {
486: // attr has value
487: $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
488: } elseif ($attrSubSet[1] == "0") {
489: // attr has decimal zero as value
490: $newSet[] = $attrSubSet[0] . '="0"';
491: } else {
492: // reformat single attributes to XHTML
493: $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
494: }
495: }
496: }
497:
498: return $newSet;
499: }
500:
501: /**
502: * Try to convert to plaintext
503: *
504: * @param String $source string to decode
505: *
506: * @return String $source decoded
507: */
508: protected function decode($source)
509: {
510: // url decode
511: $charset = defined('_CHARSET') ? constant('_CHARSET') : 'utf-8';
512: $source = html_entity_decode($source, ENT_QUOTES, $charset);
513: // convert decimal
514: $source = preg_replace_callback(
515: '/&#(\d+);/m',
516: function ($matches) {
517: return chr($matches[1]);
518: },
519: $source
520: );
521: // convert hex
522: $source = preg_replace_callback(
523: '/&#x([a-f0-9]+);/mi',
524: function ($matches) {
525: return chr('0x'.$matches[1]);
526: },
527: $source
528: ); // hex notation
529:
530: return $source;
531: }
532:
533: /**
534: * gather - gather input from a source
535: *
536: * @param string $source name of source superglobal, get, post or cookie
537: * @param array $input_map each element of the array is an array consisting of
538: * elements to gather and clean from source
539: * - name - key in source superglobal, no default
540: * - type - XoopsFilterInput::clean type, default string
541: * - default - default value, default ''
542: * - trim - true to trim spaces from input, default true
543: * - max length - maximum length to accept, 0=no limit, default 0
544: * Example: array('op','string','view',true)
545: * @param mixed $require name of required element, or false for nothing
546: * required name. If the require name is set, values
547: * will only be returned if the key $require is set
548: * in the source array.
549: *
550: * @return array|false array of cleaned elements as specified by input_map, or
551: * false if require key specified but not set
552: */
553: public static function gather($source, $input_map, $require = false)
554: {
555: $output = array();
556:
557: if (!empty($source)) {
558: $source = strtolower($source);
559: foreach ($input_map as $input) {
560: // set defaults
561: if (isset($input[0])) {
562: $name = $input[0];
563: $type = isset($input[1]) ? $input[1] : 'string';
564: $default = isset($input[2]) ?
565: (($require && $require==$name) ? '': $input[2]) : '';
566: $trim = isset($input[3]) ? $input[3] : true;
567: $maxlen = isset($input[4]) ? $input[4] : 0;
568: $value = $default;
569: switch ($source) {
570: case 'get':
571: if (isset($_GET[$name])) {
572: $value=$_GET[$name];
573: }
574: break;
575: case 'post':
576: if (isset($_POST[$name])) {
577: $value=$_POST[$name];
578: }
579: break;
580: case 'cookie':
581: if (isset($_COOKIE[$name])) {
582: $value=$_COOKIE[$name];
583: }
584: break;
585: }
586: if ($trim) {
587: $value = trim($value);
588: }
589: if ($maxlen>0) {
590: if (function_exists('mb_strlen')) {
591: if (mb_strlen($value)>$maxlen) {
592: $value=mb_substr($value, 0, $maxlen);
593: }
594: } else {
595: $value=substr($value, 0, $maxlen);
596: }
597: if ($trim) {
598: $value = trim($value);
599: }
600: }
601: $output[$name] = self::clean($value, $type);
602: }
603: }
604: }
605: if ($require) {
606: if (empty($output[$require])) {
607: $output = false;
608: }
609: }
610: return $output;
611: }
612: }
613: