1: <?php
2:
3: /**
4: * Base class for all validating attribute definitions.
5: *
6: * This family of classes forms the core for not only HTML attribute validation,
7: * but also any sort of string that needs to be validated or cleaned (which
8: * means CSS properties and composite definitions are defined here too).
9: * Besides defining (through code) what precisely makes the string valid,
10: * subclasses are also responsible for cleaning the code if possible.
11: */
12:
13: abstract class HTMLPurifier_AttrDef
14: {
15:
16: /**
17: * Tells us whether or not an HTML attribute is minimized.
18: * Has no meaning in other contexts.
19: * @type bool
20: */
21: public $minimized = false;
22:
23: /**
24: * Tells us whether or not an HTML attribute is required.
25: * Has no meaning in other contexts
26: * @type bool
27: */
28: public $required = false;
29:
30: /**
31: * Validates and cleans passed string according to a definition.
32: *
33: * @param string $string String to be validated and cleaned.
34: * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
35: * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
36: */
37: abstract public function validate($string, $config, $context);
38:
39: /**
40: * Convenience method that parses a string as if it were CDATA.
41: *
42: * This method process a string in the manner specified at
43: * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
44: * leading and trailing whitespace, ignoring line feeds, and replacing
45: * carriage returns and tabs with spaces. While most useful for HTML
46: * attributes specified as CDATA, it can also be applied to most CSS
47: * values.
48: *
49: * @note This method is not entirely standards compliant, as trim() removes
50: * more types of whitespace than specified in the spec. In practice,
51: * this is rarely a problem, as those extra characters usually have
52: * already been removed by HTMLPurifier_Encoder.
53: *
54: * @warning This processing is inconsistent with XML's whitespace handling
55: * as specified by section 3.3.3 and referenced XHTML 1.0 section
56: * 4.7. However, note that we are NOT necessarily
57: * parsing XML, thus, this behavior may still be correct. We
58: * assume that newlines have been normalized.
59: */
60: public function parseCDATA($string)
61: {
62: $string = trim($string);
63: $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
64: return $string;
65: }
66:
67: /**
68: * Factory method for creating this class from a string.
69: * @param string $string String construction info
70: * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
71: */
72: public function make($string)
73: {
74: // default implementation, return a flyweight of this object.
75: // If $string has an effect on the returned object (i.e. you
76: // need to overload this method), it is best
77: // to clone or instantiate new copies. (Instantiation is safer.)
78: return $this;
79: }
80:
81: /**
82: * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
83: * properly. THIS IS A HACK!
84: * @param string $string a CSS colour definition
85: * @return string
86: */
87: protected function mungeRgb($string)
88: {
89: $p = '\s*(\d+(\.\d+)?([%]?))\s*';
90:
91: if (preg_match('/(rgba|hsla)\(/', $string)) {
92: return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
93: }
94:
95: return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
96: }
97:
98: /**
99: * Parses a possibly escaped CSS string and returns the "pure"
100: * version of it.
101: */
102: protected function expandCSSEscape($string)
103: {
104: // flexibly parse it
105: $ret = '';
106: for ($i = 0, $c = strlen($string); $i < $c; $i++) {
107: if ($string[$i] === '\\') {
108: $i++;
109: if ($i >= $c) {
110: $ret .= '\\';
111: break;
112: }
113: if (ctype_xdigit($string[$i])) {
114: $code = $string[$i];
115: for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
116: if (!ctype_xdigit($string[$i])) {
117: break;
118: }
119: $code .= $string[$i];
120: }
121: // We have to be extremely careful when adding
122: // new characters, to make sure we're not breaking
123: // the encoding.
124: $char = HTMLPurifier_Encoder::unichr(hexdec($code));
125: if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
126: continue;
127: }
128: $ret .= $char;
129: if ($i < $c && trim($string[$i]) !== '') {
130: $i--;
131: }
132: continue;
133: }
134: if ($string[$i] === "\n") {
135: continue;
136: }
137: }
138: $ret .= $string[$i];
139: }
140: return $ret;
141: }
142: }
143:
144: // vim: et sw=4 sts=4
145: