XOOPS  2.6.0
FilterInput.php
Go to the documentation of this file.
1 <?php
2 /*
3  You may not change or alter any portion of this comment or credits
4  of supporting developers from this source code or any supporting source code
5  which is considered copyrighted (c) material of the original comment or credit authors.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10  */
11 
12 namespace Xoops\Core;
13 
38 {
39  protected $tagsArray; // default = empty array
40  protected $attrArray; // default = empty array
41 
42  protected $tagsMethod; // default = 0
43  protected $attrMethod; // default = 0
44 
45  protected $xssAuto; // default = 1
46  protected $tagBlacklist = array(
47  'applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame',
48  'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer',
49  'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml'
50  );
51  // also will strip ALL event handlers
52  protected $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc');
53 
63  public function __construct(
64  $tagsArray = array(),
65  $attrArray = array(),
66  $tagsMethod = 0,
67  $attrMethod = 0,
68  $xssAuto = 1
69  ) {
70  // make sure user defined arrays are in lowercase
71  $countTagsArray = count($tagsArray);
72  for ($i = 0; $i < $countTagsArray; ++$i) {
73  $tagsArray[$i] = strtolower($tagsArray[$i]);
74  }
75  $countAttrArray = count($attrArray);
76  for ($i = 0; $i < $countAttrArray; ++$i) {
77  $attrArray[$i] = strtolower($attrArray[$i]);
78  }
79  // assign to member vars
80  $this->tagsArray = (array) $tagsArray;
81  $this->attrArray = (array) $attrArray;
82  $this->tagsMethod = $tagsMethod;
83  $this->attrMethod = $attrMethod;
84  $this->xssAuto = $xssAuto;
85  }
86 
104  public static function getInstance(
105  $tagsArray = array(),
106  $attrArray = array(),
107  $tagsMethod = 0,
108  $attrMethod = 0,
109  $xssAuto = 1
110  ) {
111  static $instances;
112 
113  $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
114 
115  if (!isset ($instances)) {
116  $instances = array();
117  }
118 
119  if (empty ($instances[$sig])) {
120  $classname = __CLASS__ ;
121  $instances[$sig] = new $classname ($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
122  }
123 
124  return $instances[$sig];
125  }
126 
135  public function process($source)
136  {
137  if (is_array($source)) {
138  // clean all elements in this array
139  foreach ($source as $key => $value) {
140  // filter element for XSS and other 'bad' code etc.
141  if (is_string($value)) {
142  $source[$key] = $this->remove($this->decode($value));
143  }
144  }
145  return $source;
146  } elseif (is_string($source)) {
147  // clean this string
148  return $this->remove($this->decode($source));
149  } else {
150  // return parameter as given
151  return $source;
152  }
153  }
154 
167  public static function clean($source, $type = 'string')
168  {
169  static $filter = null;
170 
171  // need an instance for methods, since this is supposed to be static
172  // we must instantiate the class - this will take defaults
173  if (!is_object($filter)) {
174  if (isset($this) && is_a($this, __CLASS__)) {
175  $filter =& $this;
176  } else {
177  $classname = __CLASS__ ;
178  $filter = $classname::getInstance();
179  }
180  }
181 
182  // Handle the type constraint
183  switch (strtoupper($type)) {
184  case 'INT':
185  case 'INTEGER':
186  // Only use the first integer value
187  preg_match('/-?[0-9]+/', (string) $source, $matches);
188  $result = @ (int) $matches[0];
189  break;
190 
191  case 'FLOAT':
192  case 'DOUBLE':
193  // Only use the first floating point value
194  preg_match('/-?[0-9]+(\.[0-9]+)?/', (string) $source, $matches);
195  $result = @ (float) $matches[0];
196  break;
197 
198  case 'BOOL':
199  case 'BOOLEAN':
200  $result = (bool) $source;
201  break;
202 
203  case 'WORD':
204  $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
205  break;
206 
207  case 'ALNUM':
208  $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
209  break;
210 
211  case 'CMD':
212  $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
213  $result = strtolower($result);
214  break;
215 
216  case 'BASE64':
217  $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
218  break;
219 
220  case 'STRING':
221  $result = (string) $filter->process($source);
222  break;
223 
224  case 'ARRAY':
225  $result = (array) $filter->process($source);
226  break;
227 
228  case 'PATH':
229  $source = trim((string) $source);
230  $pattern = '/^([-_\.\/A-Z0-9=&%?~]+)(.*)$/i';
231  preg_match($pattern, $source, $matches);
232  $result = @ (string) $matches[1];
233  break;
234 
235  case 'USERNAME':
236  $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
237  break;
238 
239  case 'WEBURL':
240  $result = (string) $filter->process($source);
241  // allow only relative, http or https
242  $urlparts=parse_url($result);
243  if (!empty($urlparts['scheme'])
244  && !($urlparts['scheme']=='http' || $urlparts['scheme']=='https')
245  ) {
246  $result='';
247  }
248  // do not allow quotes, tag brackets or controls
249  if (!preg_match('#^[^"<>\x00-\x1F]+$#', $result)) {
250  $result='';
251  }
252  break;
253 
254  case 'EMAIL':
255  $result = (string) $source;
256  if (!filter_var((string) $source, FILTER_VALIDATE_EMAIL)) {
257  $result = '';
258  }
259  break;
260 
261  case 'IP':
262  $result = (string) $source;
263  // this may be too restrictive.
264  // Should the FILTER_FLAG_NO_PRIV_RANGE flag be excluded?
265  if (!filter_var((string) $source, FILTER_VALIDATE_IP)) {
266  $result = '';
267  }
268  break;
269 
270  default:
271  $result = $filter->process($source);
272  break;
273  }
274 
275  return $result;
276  }
277 
285  protected function remove($source)
286  {
287  $loopCounter=0;
288  // provides nested-tag protection
289  while ($source != $this->filterTags($source)) {
290  $source = $this->filterTags($source);
291  ++$loopCounter;
292  }
293 
294  return $source;
295  }
296 
304  protected function filterTags($source)
305  {
306  // filter pass setup
307  $preTag = null;
308  $postTag = $source;
309  // find initial tag's position
310  $tagOpen_start = strpos($source, '<');
311  // interate through string until no tags left
312  while ($tagOpen_start !== false) {
313  // process tag interatively
314  $preTag .= substr($postTag, 0, $tagOpen_start);
315  $postTag = substr($postTag, $tagOpen_start);
316  $fromTagOpen = substr($postTag, 1);
317  // end of tag
318  $tagOpen_end = strpos($fromTagOpen, '>');
319  if ($tagOpen_end === false) {
320  break;
321  }
322  // next start of tag (for nested tag assessment)
323  $tagOpen_nested = strpos($fromTagOpen, '<');
324  if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
325  $preTag .= substr($postTag, 0, ($tagOpen_nested+1));
326  $postTag = substr($postTag, ($tagOpen_nested+1));
327  $tagOpen_start = strpos($postTag, '<');
328  continue;
329  }
330  $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start + 1);
331  $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
332  $tagLength = strlen($currentTag);
333  if (!$tagOpen_end) {
334  $preTag .= $postTag;
335  $tagOpen_start = strpos($postTag, '<');
336  }
337  // iterate through tag finding attribute pairs - setup
338  $tagLeft = $currentTag;
339  $attrSet = array();
340  $currentSpace = strpos($tagLeft, ' ');
341  if (substr($currentTag, 0, 1) == "/") {
342  // is end tag
343  $isCloseTag = true;
344  list($tagName) = explode(' ', $currentTag);
345  $tagName = substr($tagName, 1);
346  } else {
347  // is start tag
348  $isCloseTag = false;
349  list($tagName) = explode(' ', $currentTag);
350  }
351  // excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
352  if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName))
353  || (!$tagName)
354  || ((in_array(strtolower($tagName), $this->tagBlacklist))
355  && ($this->xssAuto))
356  ) {
357  $postTag = substr($postTag, ($tagLength + 2));
358  $tagOpen_start = strpos($postTag, '<');
359  // don't append this tag
360  continue;
361  }
362  // this while is needed to support attribute values with spaces in!
363  while ($currentSpace !== false) {
364  $fromSpace = substr($tagLeft, ($currentSpace+1));
365  $nextSpace = strpos($fromSpace, ' ');
366  $openQuotes = strpos($fromSpace, '"');
367  $closeQuotes = strpos(substr($fromSpace, ($openQuotes+1)), '"') + $openQuotes + 1;
368  // another equals exists
369  if (strpos($fromSpace, '=') !== false) {
370  // opening and closing quotes exists
371  if (($openQuotes !== false)
372  && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== false)
373  ) {
374  $attr = substr($fromSpace, 0, ($closeQuotes+1));
375  } else {
376  $attr = substr($fromSpace, 0, $nextSpace);
377  }
378  // one or neither exist
379 
380  } else {
381  // no more equals exist
382  $attr = substr($fromSpace, 0, $nextSpace);
383  }
384  // last attr pair
385  if (!$attr) {
386  $attr = $fromSpace;
387  }
388  // add to attribute pairs array
389  $attrSet[] = $attr;
390  // next inc
391  $tagLeft = substr($fromSpace, strlen($attr));
392  $currentSpace = strpos($tagLeft, ' ');
393  }
394  // appears in array specified by user
395  $tagFound = in_array(strtolower($tagName), $this->tagsArray);
396  // remove this tag on condition
397  if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
398  // reconstruct tag with allowed attributes
399  if (!$isCloseTag) {
400  $attrSet = $this->filterAttr($attrSet);
401  $preTag .= '<' . $tagName;
402  $countAttrSet = count($attrSet);
403  for ($i = 0; $i < $countAttrSet; ++$i) {
404  $preTag .= ' ' . $attrSet[$i];
405  }
406  // reformat single tags to XHTML
407  if (strpos($fromTagOpen, "</" . $tagName)) {
408  $preTag .= '>';
409  } else {
410  $preTag .= ' />';
411  }
412  } else {
413  // just the tagname
414  $preTag .= '</' . $tagName . '>';
415  }
416  }
417  // find next tag's start
418  $postTag = substr($postTag, ($tagLength + 2));
419  $tagOpen_start = strpos($postTag, '<');
420  }
421  // append any code after end of tags
422  $preTag .= $postTag;
423 
424  return $preTag;
425  }
426 
434  protected function filterAttr($attrSet)
435  {
436  $newSet = array();
437  // process attributes
438  $countAttrSet = count($attrSet);
439  for ($i = 0; $i < $countAttrSet; ++$i) {
440  // skip blank spaces in tag
441  if (!$attrSet[$i]) {
442  continue;
443  }
444  // split into attr name and value
445  $attrSubSet = explode('=', trim($attrSet[$i]));
446  list($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
447  // removes all "non-regular" attr names AND also attr blacklisted
448  if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
449  || (($this->xssAuto)
450  && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
451  || (substr($attrSubSet[0], 0, 2) == 'on')))
452  ) {
453  continue;
454  }
455  // xss attr value filtering
456  if ($attrSubSet[1]) {
457  // strips unicode, hex, etc
458  $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
459  // strip normal newline within attr value
460  $attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
461  // strip double quotes
462  $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
463  // [requested feature] convert single quotes from either side to doubles
464  // (Single quotes shouldn't be used to pad attr value)
465  if ((substr($attrSubSet[1], 0, 1) == "'")
466  && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'")
467  ) {
468  $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
469  }
470  // strip slashes
471  $attrSubSet[1] = stripslashes($attrSubSet[1]);
472  }
473  // auto strip attr's with "javascript:
474  if (((strpos(strtolower($attrSubSet[1]), 'expression') !== false)
475  && (strtolower($attrSubSet[0]) == 'style')) ||
476  (strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
477  (strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
478  (strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
479  (strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
480  (strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
481  ) {
482  continue;
483  }
484 
485  // if matches user defined array
486  $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
487  // keep this attr on condition
488  if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
489  if ($attrSubSet[1]) {
490  // attr has value
491  $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
492  } elseif ($attrSubSet[1] == "0") {
493  // attr has decimal zero as value
494  $newSet[] = $attrSubSet[0] . '="0"';
495  } else {
496  // reformat single attributes to XHTML
497  $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
498  }
499  }
500  }
501 
502  return $newSet;
503  }
504 
512  protected function decode($source)
513  {
514  // url decode
515  $charset = defined('_CHARSET') ? constant('_CHARSET') : 'utf-8';
516  $source = html_entity_decode($source, ENT_QUOTES, $charset);
517  // convert decimal
518  $source = preg_replace_callback(
519  '/&#(\d+);/m',
520  function ($matches) {
521  return chr($matches[1]);
522  },
523  $source
524  );
525  // convert hex
526  $source = preg_replace_callback(
527  '/&#x([a-f0-9]+);/mi',
528  function ($matches) {
529  return chr('0x'.$matches[1]);
530  },
531  $source
532  ); // hex notation
533 
534  return $source;
535  }
536 
557  public static function gather($source, $input_map, $require = false)
558  {
559  $output = array();
560 
561  if (!empty($source)) {
562  $source = strtolower($source);
563  foreach ($input_map as $input) {
564  // set defaults
565  if (isset($input[0])) {
566  $name = $input[0];
567  $type = isset($input[1]) ? $input[1] : 'string';
568  $default = isset($input[2]) ?
569  (($require && $require==$name) ? '': $input[2]) : '';
570  $trim = isset($input[3]) ? $input[3] : true;
571  $maxlen = isset($input[4]) ? $input[4] : 0;
572  $value = $default;
573  switch ($source) {
574  case 'get':
575  if (isset($_GET[$name])) {
576  $value=$_GET[$name];
577  }
578  break;
579  case 'post':
580  if (isset($_POST[$name])) {
581  $value=$_POST[$name];
582  }
583  break;
584  case 'cookie':
585  if (isset($_COOKIE[$name])) {
586  $value=$_COOKIE[$name];
587  }
588  break;
589  }
590  if ($trim) {
591  $value = trim($value);
592  }
593  if ($maxlen>0) {
594  if (function_exists('mb_strlen')) {
595  if (mb_strlen($value)>$maxlen) {
596  $value=mb_substr($value, 0, $maxlen);
597  }
598  } else {
599  $value=substr($value, 0, $maxlen);
600  }
601  if ($trim) {
602  $value = trim($value);
603  }
604  }
605  $output[$name] = self::clean($value, $type);
606  }
607  }
608  }
609  if ($require) {
610  if (empty($output[$require])) {
611  $output = false;
612  }
613  }
614  return $output;
615  }
616 }
static getInstance($tagsArray=array(), $attrArray=array(), $tagsMethod=0, $attrMethod=0, $xssAuto=1)
$i
Definition: dialog.php:68
$result
Definition: pda.php:33
static gather($source, $input_map, $require=false)
else $filter
Definition: dialog.php:95
$type
Definition: misc.php:33
static clean($source, $type= 'string')
__construct($tagsArray=array(), $attrArray=array(), $tagsMethod=0, $attrMethod=0, $xssAuto=1)
Definition: FilterInput.php:63