1: | <?php
|
2: |
|
3: | |
4: | |
5: | |
6: | |
7: | |
8: | |
9: | |
10: | |
11: | |
12: | |
13: |
|
14: | class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
15: | {
|
16: |
|
17: | |
18: | |
19: | |
20: |
|
21: | protected $tokens;
|
22: |
|
23: | |
24: | |
25: | |
26: |
|
27: | protected $token;
|
28: |
|
29: | |
30: | |
31: | |
32: |
|
33: | protected $zipper;
|
34: |
|
35: | |
36: | |
37: | |
38: |
|
39: | protected $stack;
|
40: |
|
41: | |
42: | |
43: | |
44: |
|
45: | protected $injectors;
|
46: |
|
47: | |
48: | |
49: | |
50: |
|
51: | protected $config;
|
52: |
|
53: | |
54: | |
55: | |
56: |
|
57: | protected $context;
|
58: |
|
59: | |
60: | |
61: | |
62: | |
63: | |
64: | |
65: |
|
66: | public function execute($tokens, $config, $context)
|
67: | {
|
68: | $definition = $config->getHTMLDefinition();
|
69: |
|
70: |
|
71: | $generator = new HTMLPurifier_Generator($config, $context);
|
72: | $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
|
73: |
|
74: | $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
|
75: | $e = $context->get('ErrorCollector', true);
|
76: | $i = false;
|
77: | list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
|
78: | if ($token === NULL) {
|
79: | return array();
|
80: | }
|
81: | $reprocess = false;
|
82: | $stack = array();
|
83: |
|
84: |
|
85: | $this->stack =& $stack;
|
86: | $this->tokens =& $tokens;
|
87: | $this->token =& $token;
|
88: | $this->zipper =& $zipper;
|
89: | $this->config = $config;
|
90: | $this->context = $context;
|
91: |
|
92: |
|
93: | $context->register('CurrentNesting', $stack);
|
94: | $context->register('InputZipper', $zipper);
|
95: | $context->register('CurrentToken', $token);
|
96: |
|
97: |
|
98: |
|
99: | $this->injectors = array();
|
100: |
|
101: | $injectors = $config->getBatch('AutoFormat');
|
102: | $def_injectors = $definition->info_injector;
|
103: | $custom_injectors = $injectors['Custom'];
|
104: | unset($injectors['Custom']);
|
105: | foreach ($injectors as $injector => $b) {
|
106: |
|
107: | if (strpos($injector, '.') !== false) {
|
108: | continue;
|
109: | }
|
110: | $injector = "HTMLPurifier_Injector_$injector";
|
111: | if (!$b) {
|
112: | continue;
|
113: | }
|
114: | $this->injectors[] = new $injector;
|
115: | }
|
116: | foreach ($def_injectors as $injector) {
|
117: |
|
118: | $this->injectors[] = $injector;
|
119: | }
|
120: | foreach ($custom_injectors as $injector) {
|
121: | if (!$injector) {
|
122: | continue;
|
123: | }
|
124: | if (is_string($injector)) {
|
125: | $injector = "HTMLPurifier_Injector_$injector";
|
126: | $injector = new $injector;
|
127: | }
|
128: | $this->injectors[] = $injector;
|
129: | }
|
130: |
|
131: |
|
132: |
|
133: | foreach ($this->injectors as $ix => $injector) {
|
134: | $error = $injector->prepare($config, $context);
|
135: | if (!$error) {
|
136: | continue;
|
137: | }
|
138: | array_splice($this->injectors, $ix, 1);
|
139: | trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
|
140: | }
|
141: |
|
142: |
|
143: |
|
144: |
|
145: |
|
146: |
|
147: |
|
148: |
|
149: |
|
150: |
|
151: |
|
152: |
|
153: | for (;;
|
154: |
|
155: | $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
|
156: |
|
157: |
|
158: | if (is_int($i)) {
|
159: |
|
160: |
|
161: |
|
162: | $rewind_offset = $this->injectors[$i]->getRewindOffset();
|
163: | if (is_int($rewind_offset)) {
|
164: | for ($j = 0; $j < $rewind_offset; $j++) {
|
165: | if (empty($zipper->front)) break;
|
166: | $token = $zipper->prev($token);
|
167: |
|
168: |
|
169: | unset($token->skip[$i]);
|
170: | $token->rewind = $i;
|
171: | if ($token instanceof HTMLPurifier_Token_Start) {
|
172: | array_pop($this->stack);
|
173: | } elseif ($token instanceof HTMLPurifier_Token_End) {
|
174: | $this->stack[] = $token->start;
|
175: | }
|
176: | }
|
177: | }
|
178: | $i = false;
|
179: | }
|
180: |
|
181: |
|
182: | if ($token === NULL) {
|
183: |
|
184: | if (empty($this->stack)) {
|
185: | break;
|
186: | }
|
187: |
|
188: |
|
189: | $top_nesting = array_pop($this->stack);
|
190: | $this->stack[] = $top_nesting;
|
191: |
|
192: |
|
193: | if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
|
194: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
|
195: | }
|
196: |
|
197: |
|
198: | $token = new HTMLPurifier_Token_End($top_nesting->name);
|
199: |
|
200: |
|
201: | $reprocess = true;
|
202: | continue;
|
203: | }
|
204: |
|
205: |
|
206: |
|
207: |
|
208: |
|
209: | if (empty($token->is_tag)) {
|
210: | if ($token instanceof HTMLPurifier_Token_Text) {
|
211: | foreach ($this->injectors as $i => $injector) {
|
212: | if (isset($token->skip[$i])) {
|
213: |
|
214: | continue;
|
215: | }
|
216: | if ($token->rewind !== null && $token->rewind !== $i) {
|
217: | continue;
|
218: | }
|
219: |
|
220: | $r = $token;
|
221: | $injector->handleText($r);
|
222: | $token = $this->processToken($r, $i);
|
223: | $reprocess = true;
|
224: | break;
|
225: | }
|
226: | }
|
227: |
|
228: | continue;
|
229: | }
|
230: |
|
231: | if (isset($definition->info[$token->name])) {
|
232: | $type = $definition->info[$token->name]->child->type;
|
233: | } else {
|
234: | $type = false;
|
235: | }
|
236: |
|
237: |
|
238: | $ok = false;
|
239: | if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
240: |
|
241: | $token = new HTMLPurifier_Token_Empty(
|
242: | $token->name,
|
243: | $token->attr,
|
244: | $token->line,
|
245: | $token->col,
|
246: | $token->armor
|
247: | );
|
248: | $ok = true;
|
249: | } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
250: |
|
251: |
|
252: | $old_token = $token;
|
253: | $token = new HTMLPurifier_Token_End($token->name);
|
254: | $token = $this->insertBefore(
|
255: | new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
|
256: | );
|
257: |
|
258: | $reprocess = true;
|
259: | continue;
|
260: | } elseif ($token instanceof HTMLPurifier_Token_Empty) {
|
261: |
|
262: | $ok = true;
|
263: | } elseif ($token instanceof HTMLPurifier_Token_Start) {
|
264: |
|
265: |
|
266: |
|
267: | if (!empty($this->stack)) {
|
268: |
|
269: |
|
270: |
|
271: |
|
272: |
|
273: |
|
274: |
|
275: |
|
276: |
|
277: |
|
278: |
|
279: |
|
280: |
|
281: |
|
282: | $parent = array_pop($this->stack);
|
283: | $this->stack[] = $parent;
|
284: |
|
285: | $parent_def = null;
|
286: | $parent_elements = null;
|
287: | $autoclose = false;
|
288: | if (isset($definition->info[$parent->name])) {
|
289: | $parent_def = $definition->info[$parent->name];
|
290: | $parent_elements = $parent_def->child->getAllowedElements($config);
|
291: | $autoclose = !isset($parent_elements[$token->name]);
|
292: | }
|
293: |
|
294: | if ($autoclose && $definition->info[$token->name]->wrap) {
|
295: |
|
296: |
|
297: |
|
298: | $wrapname = $definition->info[$token->name]->wrap;
|
299: | $wrapdef = $definition->info[$wrapname];
|
300: | $elements = $wrapdef->child->getAllowedElements($config);
|
301: | if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
|
302: | $newtoken = new HTMLPurifier_Token_Start($wrapname);
|
303: | $token = $this->insertBefore($newtoken);
|
304: | $reprocess = true;
|
305: | continue;
|
306: | }
|
307: | }
|
308: |
|
309: | $carryover = false;
|
310: | if ($autoclose && $parent_def->formatting) {
|
311: | $carryover = true;
|
312: | }
|
313: |
|
314: | if ($autoclose) {
|
315: |
|
316: |
|
317: | $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
|
318: | if (!$autoclose_ok) {
|
319: | foreach ($this->stack as $ancestor) {
|
320: | $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
|
321: | if (isset($elements[$token->name])) {
|
322: | $autoclose_ok = true;
|
323: | break;
|
324: | }
|
325: | if ($definition->info[$token->name]->wrap) {
|
326: | $wrapname = $definition->info[$token->name]->wrap;
|
327: | $wrapdef = $definition->info[$wrapname];
|
328: | $wrap_elements = $wrapdef->child->getAllowedElements($config);
|
329: | if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
|
330: | $autoclose_ok = true;
|
331: | break;
|
332: | }
|
333: | }
|
334: | }
|
335: | }
|
336: | if ($autoclose_ok) {
|
337: |
|
338: | $new_token = new HTMLPurifier_Token_End($parent->name);
|
339: | $new_token->start = $parent;
|
340: |
|
341: | if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
342: | if (!$carryover) {
|
343: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
344: | } else {
|
345: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
346: | }
|
347: | }
|
348: | if ($carryover) {
|
349: | $element = clone $parent;
|
350: |
|
351: | $element->armor['MakeWellFormed_TagClosedError'] = true;
|
352: | $element->carryover = true;
|
353: | $token = $this->processToken(array($new_token, $token, $element));
|
354: | } else {
|
355: | $token = $this->insertBefore($new_token);
|
356: | }
|
357: | } else {
|
358: | $token = $this->remove();
|
359: | }
|
360: | $reprocess = true;
|
361: | continue;
|
362: | }
|
363: |
|
364: | }
|
365: | $ok = true;
|
366: | }
|
367: |
|
368: | if ($ok) {
|
369: | foreach ($this->injectors as $i => $injector) {
|
370: | if (isset($token->skip[$i])) {
|
371: |
|
372: | continue;
|
373: | }
|
374: | if ($token->rewind !== null && $token->rewind !== $i) {
|
375: | continue;
|
376: | }
|
377: | $r = $token;
|
378: | $injector->handleElement($r);
|
379: | $token = $this->processToken($r, $i);
|
380: | $reprocess = true;
|
381: | break;
|
382: | }
|
383: | if (!$reprocess) {
|
384: |
|
385: | if ($token instanceof HTMLPurifier_Token_Start) {
|
386: | $this->stack[] = $token;
|
387: | } elseif ($token instanceof HTMLPurifier_Token_End) {
|
388: | throw new HTMLPurifier_Exception(
|
389: | 'Improper handling of end tag in start code; possible error in MakeWellFormed'
|
390: | );
|
391: | }
|
392: | }
|
393: | continue;
|
394: | }
|
395: |
|
396: |
|
397: | if (!$token instanceof HTMLPurifier_Token_End) {
|
398: | throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
|
399: | }
|
400: |
|
401: |
|
402: | if (empty($this->stack)) {
|
403: | if ($escape_invalid_tags) {
|
404: | if ($e) {
|
405: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
406: | }
|
407: | $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
|
408: | } else {
|
409: | if ($e) {
|
410: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
411: | }
|
412: | $token = $this->remove();
|
413: | }
|
414: | $reprocess = true;
|
415: | continue;
|
416: | }
|
417: |
|
418: |
|
419: |
|
420: |
|
421: |
|
422: | $current_parent = array_pop($this->stack);
|
423: | if ($current_parent->name == $token->name) {
|
424: | $token->start = $current_parent;
|
425: | foreach ($this->injectors as $i => $injector) {
|
426: | if (isset($token->skip[$i])) {
|
427: |
|
428: | continue;
|
429: | }
|
430: | if ($token->rewind !== null && $token->rewind !== $i) {
|
431: | continue;
|
432: | }
|
433: | $r = $token;
|
434: | $injector->handleEnd($r);
|
435: | $token = $this->processToken($r, $i);
|
436: | $this->stack[] = $current_parent;
|
437: | $reprocess = true;
|
438: | break;
|
439: | }
|
440: | continue;
|
441: | }
|
442: |
|
443: |
|
444: |
|
445: |
|
446: | $this->stack[] = $current_parent;
|
447: |
|
448: |
|
449: |
|
450: | $size = count($this->stack);
|
451: |
|
452: | $skipped_tags = false;
|
453: | for ($j = $size - 2; $j >= 0; $j--) {
|
454: | if ($this->stack[$j]->name == $token->name) {
|
455: | $skipped_tags = array_slice($this->stack, $j);
|
456: | break;
|
457: | }
|
458: | }
|
459: |
|
460: |
|
461: | if ($skipped_tags === false) {
|
462: | if ($escape_invalid_tags) {
|
463: | if ($e) {
|
464: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
465: | }
|
466: | $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
|
467: | } else {
|
468: | if ($e) {
|
469: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
470: | }
|
471: | $token = $this->remove();
|
472: | }
|
473: | $reprocess = true;
|
474: | continue;
|
475: | }
|
476: |
|
477: |
|
478: | $c = count($skipped_tags);
|
479: | if ($e) {
|
480: | for ($j = $c - 1; $j > 0; $j--) {
|
481: |
|
482: |
|
483: | if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
|
484: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
|
485: | }
|
486: | }
|
487: | }
|
488: |
|
489: |
|
490: | $replace = array($token);
|
491: | for ($j = 1; $j < $c; $j++) {
|
492: |
|
493: | $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
|
494: | $new_token->start = $skipped_tags[$j];
|
495: | array_unshift($replace, $new_token);
|
496: | if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
|
497: |
|
498: | $element = clone $skipped_tags[$j];
|
499: | $element->carryover = true;
|
500: | $element->armor['MakeWellFormed_TagClosedError'] = true;
|
501: | $replace[] = $element;
|
502: | }
|
503: | }
|
504: | $token = $this->processToken($replace);
|
505: | $reprocess = true;
|
506: | continue;
|
507: | }
|
508: |
|
509: | $context->destroy('CurrentToken');
|
510: | $context->destroy('CurrentNesting');
|
511: | $context->destroy('InputZipper');
|
512: |
|
513: | unset($this->injectors, $this->stack, $this->tokens);
|
514: | return $zipper->toArray($token);
|
515: | }
|
516: |
|
517: | |
518: | |
519: | |
520: | |
521: | |
522: | |
523: | |
524: | |
525: | |
526: | |
527: | |
528: | |
529: | |
530: | |
531: | |
532: | |
533: | |
534: | |
535: | |
536: | |
537: |
|
538: | protected function processToken($token, $injector = -1)
|
539: | {
|
540: |
|
541: |
|
542: |
|
543: |
|
544: | if (is_object($token)) {
|
545: | $tmp = $token;
|
546: | $token = array(1, $tmp);
|
547: | }
|
548: | if (is_int($token)) {
|
549: | $tmp = $token;
|
550: | $token = array($tmp);
|
551: | }
|
552: | if ($token === false) {
|
553: | $token = array(1);
|
554: | }
|
555: | if (!is_array($token)) {
|
556: | throw new HTMLPurifier_Exception('Invalid token type from injector');
|
557: | }
|
558: | if (!is_int($token[0])) {
|
559: | array_unshift($token, 1);
|
560: | }
|
561: | if ($token[0] === 0) {
|
562: | throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
|
563: | }
|
564: |
|
565: |
|
566: |
|
567: |
|
568: | $delete = array_shift($token);
|
569: | list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
|
570: |
|
571: | if ($injector > -1) {
|
572: |
|
573: |
|
574: |
|
575: |
|
576: |
|
577: |
|
578: | $oldskip = isset($old[0]) ? $old[0]->skip : array();
|
579: | foreach ($token as $object) {
|
580: | $object->skip = $oldskip;
|
581: | $object->skip[$injector] = true;
|
582: | }
|
583: | }
|
584: |
|
585: | return $r;
|
586: |
|
587: | }
|
588: |
|
589: | |
590: | |
591: | |
592: | |
593: |
|
594: | private function insertBefore($token)
|
595: | {
|
596: |
|
597: |
|
598: | $splice = $this->zipper->splice($this->token, 0, array($token));
|
599: |
|
600: | return $splice[1];
|
601: | }
|
602: |
|
603: | |
604: | |
605: | |
606: |
|
607: | private function remove()
|
608: | {
|
609: | return $this->zipper->delete();
|
610: | }
|
611: | }
|
612: |
|
613: |
|
614: |
|
615: |
|
616: |
|
617: |
|
618: |
|
619: |
|
620: |
|
621: |
|
622: |
|
623: |
|
624: |
|
625: |
|
626: |
|
627: |
|
628: |
|
629: |
|
630: |
|
631: |
|
632: |
|
633: |
|
634: |
|
635: |
|
636: |
|
637: |
|
638: |
|
639: |
|
640: |
|
641: |
|
642: |
|
643: |
|
644: |
|
645: |
|
646: |
|
647: |
|
648: |
|
649: |
|
650: |
|
651: |
|
652: |
|
653: |
|
654: |
|
655: |
|
656: |
|
657: |
|
658: |
|
659: |
|
660: | |