1: | <?php |
2: | |
3: | /** |
4: | * Base class for all validating attribute definitions. |
5: | * |
6: | * This family of classes forms the core for not only HTML attribute validation, |
7: | * but also any sort of string that needs to be validated or cleaned (which |
8: | * means CSS properties and composite definitions are defined here too). |
9: | * Besides defining (through code) what precisely makes the string valid, |
10: | * subclasses are also responsible for cleaning the code if possible. |
11: | */ |
12: | |
13: | abstract class HTMLPurifier_AttrDef |
14: | { |
15: | |
16: | /** |
17: | * Tells us whether or not an HTML attribute is minimized. |
18: | * Has no meaning in other contexts. |
19: | * @type bool |
20: | */ |
21: | public $minimized = false; |
22: | |
23: | /** |
24: | * Tells us whether or not an HTML attribute is required. |
25: | * Has no meaning in other contexts |
26: | * @type bool |
27: | */ |
28: | public $required = false; |
29: | |
30: | /** |
31: | * Validates and cleans passed string according to a definition. |
32: | * |
33: | * @param string $string String to be validated and cleaned. |
34: | * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object. |
35: | * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object. |
36: | */ |
37: | abstract public function validate($string, $config, $context); |
38: | |
39: | /** |
40: | * Convenience method that parses a string as if it were CDATA. |
41: | * |
42: | * This method process a string in the manner specified at |
43: | * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing |
44: | * leading and trailing whitespace, ignoring line feeds, and replacing |
45: | * carriage returns and tabs with spaces. While most useful for HTML |
46: | * attributes specified as CDATA, it can also be applied to most CSS |
47: | * values. |
48: | * |
49: | * @note This method is not entirely standards compliant, as trim() removes |
50: | * more types of whitespace than specified in the spec. In practice, |
51: | * this is rarely a problem, as those extra characters usually have |
52: | * already been removed by HTMLPurifier_Encoder. |
53: | * |
54: | * @warning This processing is inconsistent with XML's whitespace handling |
55: | * as specified by section 3.3.3 and referenced XHTML 1.0 section |
56: | * 4.7. However, note that we are NOT necessarily |
57: | * parsing XML, thus, this behavior may still be correct. We |
58: | * assume that newlines have been normalized. |
59: | */ |
60: | public function parseCDATA($string) |
61: | { |
62: | $string = trim($string); |
63: | $string = str_replace(array("\n", "\t", "\r"), ' ', $string); |
64: | return $string; |
65: | } |
66: | |
67: | /** |
68: | * Factory method for creating this class from a string. |
69: | * @param string $string String construction info |
70: | * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string |
71: | */ |
72: | public function make($string) |
73: | { |
74: | // default implementation, return a flyweight of this object. |
75: | // If $string has an effect on the returned object (i.e. you |
76: | // need to overload this method), it is best |
77: | // to clone or instantiate new copies. (Instantiation is safer.) |
78: | return $this; |
79: | } |
80: | |
81: | /** |
82: | * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work |
83: | * properly. THIS IS A HACK! |
84: | * @param string $string a CSS colour definition |
85: | * @return string |
86: | */ |
87: | protected function mungeRgb($string) |
88: | { |
89: | $p = '\s*(\d+(\.\d+)?([%]?))\s*'; |
90: | |
91: | if (preg_match('/(rgba|hsla)\(/', $string)) { |
92: | return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); |
93: | } |
94: | |
95: | return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); |
96: | } |
97: | |
98: | /** |
99: | * Parses a possibly escaped CSS string and returns the "pure" |
100: | * version of it. |
101: | */ |
102: | protected function expandCSSEscape($string) |
103: | { |
104: | // flexibly parse it |
105: | $ret = ''; |
106: | for ($i = 0, $c = strlen($string); $i < $c; $i++) { |
107: | if ($string[$i] === '\\') { |
108: | $i++; |
109: | if ($i >= $c) { |
110: | $ret .= '\\'; |
111: | break; |
112: | } |
113: | if (ctype_xdigit($string[$i])) { |
114: | $code = $string[$i]; |
115: | for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { |
116: | if (!ctype_xdigit($string[$i])) { |
117: | break; |
118: | } |
119: | $code .= $string[$i]; |
120: | } |
121: | // We have to be extremely careful when adding |
122: | // new characters, to make sure we're not breaking |
123: | // the encoding. |
124: | $char = HTMLPurifier_Encoder::unichr(hexdec($code)); |
125: | if (HTMLPurifier_Encoder::cleanUTF8($char) === '') { |
126: | continue; |
127: | } |
128: | $ret .= $char; |
129: | if ($i < $c && trim($string[$i]) !== '') { |
130: | $i--; |
131: | } |
132: | continue; |
133: | } |
134: | if ($string[$i] === "\n") { |
135: | continue; |
136: | } |
137: | } |
138: | $ret .= $string[$i]; |
139: | } |
140: | return $ret; |
141: | } |
142: | } |
143: | |
144: | // vim: et sw=4 sts=4 |
145: |