1: | <?php
|
2: |
|
3: |
|
4: |
|
5: |
|
6: |
|
7: | |
8: | |
9: |
|
10: | class HTMLPurifier_EntityParser
|
11: | {
|
12: |
|
13: | |
14: | |
15: | |
16: |
|
17: | protected $_entity_lookup;
|
18: |
|
19: | |
20: | |
21: | |
22: |
|
23: | protected $_textEntitiesRegex;
|
24: |
|
25: | |
26: | |
27: | |
28: |
|
29: | protected $_attrEntitiesRegex;
|
30: |
|
31: | |
32: | |
33: |
|
34: | protected $_semiOptionalPrefixRegex;
|
35: |
|
36: | public function __construct() {
|
37: |
|
38: |
|
39: | $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
|
40: |
|
41: |
|
42: |
|
43: | $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
|
44: |
|
45: | $this->_textEntitiesRegex =
|
46: | '/&(?:'.
|
47: |
|
48: | '[#]x([a-fA-F0-9]+);?|'.
|
49: |
|
50: | '[#]0*(\d+);?|'.
|
51: |
|
52: |
|
53: | '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
|
54: |
|
55: | "($semi_optional)".
|
56: | ')/';
|
57: |
|
58: | $this->_attrEntitiesRegex =
|
59: | '/&(?:'.
|
60: |
|
61: | '[#]x([a-fA-F0-9]+);?|'.
|
62: |
|
63: | '[#]0*(\d+);?|'.
|
64: |
|
65: |
|
66: | '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
|
67: |
|
68: |
|
69: |
|
70: | "($semi_optional)(?![=;A-Za-z0-9])".
|
71: | ')/';
|
72: |
|
73: | }
|
74: |
|
75: | |
76: | |
77: | |
78: | |
79: | |
80: | |
81: |
|
82: | public function substituteTextEntities($string)
|
83: | {
|
84: | return preg_replace_callback(
|
85: | $this->_textEntitiesRegex,
|
86: | array($this, 'entityCallback'),
|
87: | $string
|
88: | );
|
89: | }
|
90: |
|
91: | |
92: | |
93: | |
94: | |
95: | |
96: | |
97: |
|
98: | public function substituteAttrEntities($string)
|
99: | {
|
100: | return preg_replace_callback(
|
101: | $this->_attrEntitiesRegex,
|
102: | array($this, 'entityCallback'),
|
103: | $string
|
104: | );
|
105: | }
|
106: |
|
107: | |
108: | |
109: | |
110: | |
111: | |
112: | |
113: | |
114: |
|
115: |
|
116: | protected function entityCallback($matches)
|
117: | {
|
118: | $entity = $matches[0];
|
119: | $hex_part = @$matches[1];
|
120: | $dec_part = @$matches[2];
|
121: | $named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3];
|
122: | if ($hex_part !== NULL && $hex_part !== "") {
|
123: | return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
|
124: | } elseif ($dec_part !== NULL && $dec_part !== "") {
|
125: | return HTMLPurifier_Encoder::unichr((int) $dec_part);
|
126: | } else {
|
127: | if (!$this->_entity_lookup) {
|
128: | $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
129: | }
|
130: | if (isset($this->_entity_lookup->table[$named_part])) {
|
131: | return $this->_entity_lookup->table[$named_part];
|
132: | } else {
|
133: |
|
134: |
|
135: |
|
136: |
|
137: | if (!empty($matches[3])) {
|
138: | return preg_replace_callback(
|
139: | $this->_semiOptionalPrefixRegex,
|
140: | array($this, 'entityCallback'),
|
141: | $entity
|
142: | );
|
143: | }
|
144: | return $entity;
|
145: | }
|
146: | }
|
147: | }
|
148: |
|
149: |
|
150: |
|
151: | |
152: | |
153: | |
154: |
|
155: | protected $_substituteEntitiesRegex =
|
156: | '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
|
157: |
|
158: |
|
159: | |
160: | |
161: | |
162: |
|
163: | protected $_special_dec2str =
|
164: | array(
|
165: | 34 => '"',
|
166: | 38 => '&',
|
167: | 39 => "'",
|
168: | 60 => '<',
|
169: | 62 => '>'
|
170: | );
|
171: |
|
172: | |
173: | |
174: | |
175: |
|
176: | protected $_special_ent2dec =
|
177: | array(
|
178: | 'quot' => 34,
|
179: | 'amp' => 38,
|
180: | 'lt' => 60,
|
181: | 'gt' => 62
|
182: | );
|
183: |
|
184: | |
185: | |
186: | |
187: | |
188: | |
189: | |
190: | |
191: |
|
192: | public function substituteNonSpecialEntities($string)
|
193: | {
|
194: |
|
195: | return preg_replace_callback(
|
196: | $this->_substituteEntitiesRegex,
|
197: | array($this, 'nonSpecialEntityCallback'),
|
198: | $string
|
199: | );
|
200: | }
|
201: |
|
202: | |
203: | |
204: | |
205: | |
206: | |
207: | |
208: | |
209: |
|
210: |
|
211: | protected function nonSpecialEntityCallback($matches)
|
212: | {
|
213: |
|
214: | $entity = $matches[0];
|
215: | $is_num = (@$matches[0][1] === '#');
|
216: | if ($is_num) {
|
217: | $is_hex = (@$entity[2] === 'x');
|
218: | $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
|
219: |
|
220: | if (isset($this->_special_dec2str[$code])) {
|
221: | return $entity;
|
222: | }
|
223: | return HTMLPurifier_Encoder::unichr($code);
|
224: | } else {
|
225: | if (isset($this->_special_ent2dec[$matches[3]])) {
|
226: | return $entity;
|
227: | }
|
228: | if (!$this->_entity_lookup) {
|
229: | $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
230: | }
|
231: | if (isset($this->_entity_lookup->table[$matches[3]])) {
|
232: | return $this->_entity_lookup->table[$matches[3]];
|
233: | } else {
|
234: | return $entity;
|
235: | }
|
236: | }
|
237: | }
|
238: |
|
239: | |
240: | |
241: | |
242: | |
243: | |
244: | |
245: | |
246: | |
247: |
|
248: | public function substituteSpecialEntities($string)
|
249: | {
|
250: | return preg_replace_callback(
|
251: | $this->_substituteEntitiesRegex,
|
252: | array($this, 'specialEntityCallback'),
|
253: | $string
|
254: | );
|
255: | }
|
256: |
|
257: | |
258: | |
259: | |
260: | |
261: | |
262: | |
263: | |
264: | |
265: | |
266: |
|
267: | protected function specialEntityCallback($matches)
|
268: | {
|
269: | $entity = $matches[0];
|
270: | $is_num = (@$matches[0][1] === '#');
|
271: | if ($is_num) {
|
272: | $is_hex = (@$entity[2] === 'x');
|
273: | $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
|
274: | return isset($this->_special_dec2str[$int]) ?
|
275: | $this->_special_dec2str[$int] :
|
276: | $entity;
|
277: | } else {
|
278: | return isset($this->_special_ent2dec[$matches[3]]) ?
|
279: | $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
|
280: | $entity;
|
281: | }
|
282: | }
|
283: | }
|
284: |
|
285: |
|
286: | |