| 1: | <?php |
| 2: | |
| 3: | /** |
| 4: | * Base class for all validating attribute definitions. |
| 5: | * |
| 6: | * This family of classes forms the core for not only HTML attribute validation, |
| 7: | * but also any sort of string that needs to be validated or cleaned (which |
| 8: | * means CSS properties and composite definitions are defined here too). |
| 9: | * Besides defining (through code) what precisely makes the string valid, |
| 10: | * subclasses are also responsible for cleaning the code if possible. |
| 11: | */ |
| 12: | |
| 13: | abstract class HTMLPurifier_AttrDef |
| 14: | { |
| 15: | |
| 16: | /** |
| 17: | * Tells us whether or not an HTML attribute is minimized. |
| 18: | * Has no meaning in other contexts. |
| 19: | * @type bool |
| 20: | */ |
| 21: | public $minimized = false; |
| 22: | |
| 23: | /** |
| 24: | * Tells us whether or not an HTML attribute is required. |
| 25: | * Has no meaning in other contexts |
| 26: | * @type bool |
| 27: | */ |
| 28: | public $required = false; |
| 29: | |
| 30: | /** |
| 31: | * Validates and cleans passed string according to a definition. |
| 32: | * |
| 33: | * @param string $string String to be validated and cleaned. |
| 34: | * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object. |
| 35: | * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object. |
| 36: | */ |
| 37: | abstract public function validate($string, $config, $context); |
| 38: | |
| 39: | /** |
| 40: | * Convenience method that parses a string as if it were CDATA. |
| 41: | * |
| 42: | * This method process a string in the manner specified at |
| 43: | * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing |
| 44: | * leading and trailing whitespace, ignoring line feeds, and replacing |
| 45: | * carriage returns and tabs with spaces. While most useful for HTML |
| 46: | * attributes specified as CDATA, it can also be applied to most CSS |
| 47: | * values. |
| 48: | * |
| 49: | * @note This method is not entirely standards compliant, as trim() removes |
| 50: | * more types of whitespace than specified in the spec. In practice, |
| 51: | * this is rarely a problem, as those extra characters usually have |
| 52: | * already been removed by HTMLPurifier_Encoder. |
| 53: | * |
| 54: | * @warning This processing is inconsistent with XML's whitespace handling |
| 55: | * as specified by section 3.3.3 and referenced XHTML 1.0 section |
| 56: | * 4.7. However, note that we are NOT necessarily |
| 57: | * parsing XML, thus, this behavior may still be correct. We |
| 58: | * assume that newlines have been normalized. |
| 59: | */ |
| 60: | public function parseCDATA($string) |
| 61: | { |
| 62: | $string = trim($string); |
| 63: | $string = str_replace(array("\n", "\t", "\r"), ' ', $string); |
| 64: | return $string; |
| 65: | } |
| 66: | |
| 67: | /** |
| 68: | * Factory method for creating this class from a string. |
| 69: | * @param string $string String construction info |
| 70: | * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string |
| 71: | */ |
| 72: | public function make($string) |
| 73: | { |
| 74: | // default implementation, return a flyweight of this object. |
| 75: | // If $string has an effect on the returned object (i.e. you |
| 76: | // need to overload this method), it is best |
| 77: | // to clone or instantiate new copies. (Instantiation is safer.) |
| 78: | return $this; |
| 79: | } |
| 80: | |
| 81: | /** |
| 82: | * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work |
| 83: | * properly. THIS IS A HACK! |
| 84: | * @param string $string a CSS colour definition |
| 85: | * @return string |
| 86: | */ |
| 87: | protected function mungeRgb($string) |
| 88: | { |
| 89: | $p = '\s*(\d+(\.\d+)?([%]?))\s*'; |
| 90: | |
| 91: | if (preg_match('/(rgba|hsla)\(/', $string)) { |
| 92: | return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); |
| 93: | } |
| 94: | |
| 95: | return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); |
| 96: | } |
| 97: | |
| 98: | /** |
| 99: | * Parses a possibly escaped CSS string and returns the "pure" |
| 100: | * version of it. |
| 101: | */ |
| 102: | protected function expandCSSEscape($string) |
| 103: | { |
| 104: | // flexibly parse it |
| 105: | $ret = ''; |
| 106: | for ($i = 0, $c = strlen($string); $i < $c; $i++) { |
| 107: | if ($string[$i] === '\\') { |
| 108: | $i++; |
| 109: | if ($i >= $c) { |
| 110: | $ret .= '\\'; |
| 111: | break; |
| 112: | } |
| 113: | if (ctype_xdigit($string[$i])) { |
| 114: | $code = $string[$i]; |
| 115: | for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { |
| 116: | if (!ctype_xdigit($string[$i])) { |
| 117: | break; |
| 118: | } |
| 119: | $code .= $string[$i]; |
| 120: | } |
| 121: | // We have to be extremely careful when adding |
| 122: | // new characters, to make sure we're not breaking |
| 123: | // the encoding. |
| 124: | $char = HTMLPurifier_Encoder::unichr(hexdec($code)); |
| 125: | if (HTMLPurifier_Encoder::cleanUTF8($char) === '') { |
| 126: | continue; |
| 127: | } |
| 128: | $ret .= $char; |
| 129: | if ($i < $c && trim($string[$i]) !== '') { |
| 130: | $i--; |
| 131: | } |
| 132: | continue; |
| 133: | } |
| 134: | if ($string[$i] === "\n") { |
| 135: | continue; |
| 136: | } |
| 137: | } |
| 138: | $ret .= $string[$i]; |
| 139: | } |
| 140: | return $ret; |
| 141: | } |
| 142: | } |
| 143: | |
| 144: | // vim: et sw=4 sts=4 |
| 145: |