1: | <?php
|
2: |
|
3: | |
4: | |
5: | |
6: | |
7: | |
8: | |
9: | |
10: | |
11: |
|
12: |
|
13: | class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
|
14: | {
|
15: | |
16: | |
17: | |
18: | |
19: | |
20: |
|
21: | public function tokenizeHTML($html, $config, $context)
|
22: | {
|
23: | $new_html = $this->normalize($html, $config, $context);
|
24: | $new_html = $this->wrapHTML($new_html, $config, $context, false );
|
25: | try {
|
26: | $parser = new HTML5($new_html);
|
27: | $doc = $parser->save();
|
28: | } catch (DOMException $e) {
|
29: |
|
30: | $lexer = new HTMLPurifier_Lexer_DirectLex();
|
31: | $context->register('PH5PError', $e);
|
32: | return $lexer->tokenizeHTML($html, $config, $context);
|
33: | }
|
34: | $tokens = array();
|
35: | $this->tokenizeDOM(
|
36: | $doc->getElementsByTagName('html')->item(0)->
|
37: | getElementsByTagName('body')->item(0)
|
38: | ,
|
39: | $tokens, $config
|
40: | );
|
41: | return $tokens;
|
42: | }
|
43: | }
|
44: |
|
45: | |
46: | |
47: | |
48: | |
49: | |
50: | |
51: | |
52: | |
53: | |
54: | |
55: | |
56: | |
57: | |
58: | |
59: | |
60: | |
61: | |
62: | |
63: | |
64: | |
65: | |
66: | |
67: | |
68: |
|
69: |
|
70: | class HTML5
|
71: | {
|
72: | private $data;
|
73: | private $char;
|
74: | private $EOF;
|
75: | private $state;
|
76: | private $tree;
|
77: | private $token;
|
78: | private $content_model;
|
79: | private $escape = false;
|
80: | private $entities = array(
|
81: | 'AElig;',
|
82: | 'AElig',
|
83: | 'AMP;',
|
84: | 'AMP',
|
85: | 'Aacute;',
|
86: | 'Aacute',
|
87: | 'Acirc;',
|
88: | 'Acirc',
|
89: | 'Agrave;',
|
90: | 'Agrave',
|
91: | 'Alpha;',
|
92: | 'Aring;',
|
93: | 'Aring',
|
94: | 'Atilde;',
|
95: | 'Atilde',
|
96: | 'Auml;',
|
97: | 'Auml',
|
98: | 'Beta;',
|
99: | 'COPY;',
|
100: | 'COPY',
|
101: | 'Ccedil;',
|
102: | 'Ccedil',
|
103: | 'Chi;',
|
104: | 'Dagger;',
|
105: | 'Delta;',
|
106: | 'ETH;',
|
107: | 'ETH',
|
108: | 'Eacute;',
|
109: | 'Eacute',
|
110: | 'Ecirc;',
|
111: | 'Ecirc',
|
112: | 'Egrave;',
|
113: | 'Egrave',
|
114: | 'Epsilon;',
|
115: | 'Eta;',
|
116: | 'Euml;',
|
117: | 'Euml',
|
118: | 'GT;',
|
119: | 'GT',
|
120: | 'Gamma;',
|
121: | 'Iacute;',
|
122: | 'Iacute',
|
123: | 'Icirc;',
|
124: | 'Icirc',
|
125: | 'Igrave;',
|
126: | 'Igrave',
|
127: | 'Iota;',
|
128: | 'Iuml;',
|
129: | 'Iuml',
|
130: | 'Kappa;',
|
131: | 'LT;',
|
132: | 'LT',
|
133: | 'Lambda;',
|
134: | 'Mu;',
|
135: | 'Ntilde;',
|
136: | 'Ntilde',
|
137: | 'Nu;',
|
138: | 'OElig;',
|
139: | 'Oacute;',
|
140: | 'Oacute',
|
141: | 'Ocirc;',
|
142: | 'Ocirc',
|
143: | 'Ograve;',
|
144: | 'Ograve',
|
145: | 'Omega;',
|
146: | 'Omicron;',
|
147: | 'Oslash;',
|
148: | 'Oslash',
|
149: | 'Otilde;',
|
150: | 'Otilde',
|
151: | 'Ouml;',
|
152: | 'Ouml',
|
153: | 'Phi;',
|
154: | 'Pi;',
|
155: | 'Prime;',
|
156: | 'Psi;',
|
157: | 'QUOT;',
|
158: | 'QUOT',
|
159: | 'REG;',
|
160: | 'REG',
|
161: | 'Rho;',
|
162: | 'Scaron;',
|
163: | 'Sigma;',
|
164: | 'THORN;',
|
165: | 'THORN',
|
166: | 'TRADE;',
|
167: | 'Tau;',
|
168: | 'Theta;',
|
169: | 'Uacute;',
|
170: | 'Uacute',
|
171: | 'Ucirc;',
|
172: | 'Ucirc',
|
173: | 'Ugrave;',
|
174: | 'Ugrave',
|
175: | 'Upsilon;',
|
176: | 'Uuml;',
|
177: | 'Uuml',
|
178: | 'Xi;',
|
179: | 'Yacute;',
|
180: | 'Yacute',
|
181: | 'Yuml;',
|
182: | 'Zeta;',
|
183: | 'aacute;',
|
184: | 'aacute',
|
185: | 'acirc;',
|
186: | 'acirc',
|
187: | 'acute;',
|
188: | 'acute',
|
189: | 'aelig;',
|
190: | 'aelig',
|
191: | 'agrave;',
|
192: | 'agrave',
|
193: | 'alefsym;',
|
194: | 'alpha;',
|
195: | 'amp;',
|
196: | 'amp',
|
197: | 'and;',
|
198: | 'ang;',
|
199: | 'apos;',
|
200: | 'aring;',
|
201: | 'aring',
|
202: | 'asymp;',
|
203: | 'atilde;',
|
204: | 'atilde',
|
205: | 'auml;',
|
206: | 'auml',
|
207: | 'bdquo;',
|
208: | 'beta;',
|
209: | 'brvbar;',
|
210: | 'brvbar',
|
211: | 'bull;',
|
212: | 'cap;',
|
213: | 'ccedil;',
|
214: | 'ccedil',
|
215: | 'cedil;',
|
216: | 'cedil',
|
217: | 'cent;',
|
218: | 'cent',
|
219: | 'chi;',
|
220: | 'circ;',
|
221: | 'clubs;',
|
222: | 'cong;',
|
223: | 'copy;',
|
224: | 'copy',
|
225: | 'crarr;',
|
226: | 'cup;',
|
227: | 'curren;',
|
228: | 'curren',
|
229: | 'dArr;',
|
230: | 'dagger;',
|
231: | 'darr;',
|
232: | 'deg;',
|
233: | 'deg',
|
234: | 'delta;',
|
235: | 'diams;',
|
236: | 'divide;',
|
237: | 'divide',
|
238: | 'eacute;',
|
239: | 'eacute',
|
240: | 'ecirc;',
|
241: | 'ecirc',
|
242: | 'egrave;',
|
243: | 'egrave',
|
244: | 'empty;',
|
245: | 'emsp;',
|
246: | 'ensp;',
|
247: | 'epsilon;',
|
248: | 'equiv;',
|
249: | 'eta;',
|
250: | 'eth;',
|
251: | 'eth',
|
252: | 'euml;',
|
253: | 'euml',
|
254: | 'euro;',
|
255: | 'exist;',
|
256: | 'fnof;',
|
257: | 'forall;',
|
258: | 'frac12;',
|
259: | 'frac12',
|
260: | 'frac14;',
|
261: | 'frac14',
|
262: | 'frac34;',
|
263: | 'frac34',
|
264: | 'frasl;',
|
265: | 'gamma;',
|
266: | 'ge;',
|
267: | 'gt;',
|
268: | 'gt',
|
269: | 'hArr;',
|
270: | 'harr;',
|
271: | 'hearts;',
|
272: | 'hellip;',
|
273: | 'iacute;',
|
274: | 'iacute',
|
275: | 'icirc;',
|
276: | 'icirc',
|
277: | 'iexcl;',
|
278: | 'iexcl',
|
279: | 'igrave;',
|
280: | 'igrave',
|
281: | 'image;',
|
282: | 'infin;',
|
283: | 'int;',
|
284: | 'iota;',
|
285: | 'iquest;',
|
286: | 'iquest',
|
287: | 'isin;',
|
288: | 'iuml;',
|
289: | 'iuml',
|
290: | 'kappa;',
|
291: | 'lArr;',
|
292: | 'lambda;',
|
293: | 'lang;',
|
294: | 'laquo;',
|
295: | 'laquo',
|
296: | 'larr;',
|
297: | 'lceil;',
|
298: | 'ldquo;',
|
299: | 'le;',
|
300: | 'lfloor;',
|
301: | 'lowast;',
|
302: | 'loz;',
|
303: | 'lrm;',
|
304: | 'lsaquo;',
|
305: | 'lsquo;',
|
306: | 'lt;',
|
307: | 'lt',
|
308: | 'macr;',
|
309: | 'macr',
|
310: | 'mdash;',
|
311: | 'micro;',
|
312: | 'micro',
|
313: | 'middot;',
|
314: | 'middot',
|
315: | 'minus;',
|
316: | 'mu;',
|
317: | 'nabla;',
|
318: | 'nbsp;',
|
319: | 'nbsp',
|
320: | 'ndash;',
|
321: | 'ne;',
|
322: | 'ni;',
|
323: | 'not;',
|
324: | 'not',
|
325: | 'notin;',
|
326: | 'nsub;',
|
327: | 'ntilde;',
|
328: | 'ntilde',
|
329: | 'nu;',
|
330: | 'oacute;',
|
331: | 'oacute',
|
332: | 'ocirc;',
|
333: | 'ocirc',
|
334: | 'oelig;',
|
335: | 'ograve;',
|
336: | 'ograve',
|
337: | 'oline;',
|
338: | 'omega;',
|
339: | 'omicron;',
|
340: | 'oplus;',
|
341: | 'or;',
|
342: | 'ordf;',
|
343: | 'ordf',
|
344: | 'ordm;',
|
345: | 'ordm',
|
346: | 'oslash;',
|
347: | 'oslash',
|
348: | 'otilde;',
|
349: | 'otilde',
|
350: | 'otimes;',
|
351: | 'ouml;',
|
352: | 'ouml',
|
353: | 'para;',
|
354: | 'para',
|
355: | 'part;',
|
356: | 'permil;',
|
357: | 'perp;',
|
358: | 'phi;',
|
359: | 'pi;',
|
360: | 'piv;',
|
361: | 'plusmn;',
|
362: | 'plusmn',
|
363: | 'pound;',
|
364: | 'pound',
|
365: | 'prime;',
|
366: | 'prod;',
|
367: | 'prop;',
|
368: | 'psi;',
|
369: | 'quot;',
|
370: | 'quot',
|
371: | 'rArr;',
|
372: | 'radic;',
|
373: | 'rang;',
|
374: | 'raquo;',
|
375: | 'raquo',
|
376: | 'rarr;',
|
377: | 'rceil;',
|
378: | 'rdquo;',
|
379: | 'real;',
|
380: | 'reg;',
|
381: | 'reg',
|
382: | 'rfloor;',
|
383: | 'rho;',
|
384: | 'rlm;',
|
385: | 'rsaquo;',
|
386: | 'rsquo;',
|
387: | 'sbquo;',
|
388: | 'scaron;',
|
389: | 'sdot;',
|
390: | 'sect;',
|
391: | 'sect',
|
392: | 'shy;',
|
393: | 'shy',
|
394: | 'sigma;',
|
395: | 'sigmaf;',
|
396: | 'sim;',
|
397: | 'spades;',
|
398: | 'sub;',
|
399: | 'sube;',
|
400: | 'sum;',
|
401: | 'sup1;',
|
402: | 'sup1',
|
403: | 'sup2;',
|
404: | 'sup2',
|
405: | 'sup3;',
|
406: | 'sup3',
|
407: | 'sup;',
|
408: | 'supe;',
|
409: | 'szlig;',
|
410: | 'szlig',
|
411: | 'tau;',
|
412: | 'there4;',
|
413: | 'theta;',
|
414: | 'thetasym;',
|
415: | 'thinsp;',
|
416: | 'thorn;',
|
417: | 'thorn',
|
418: | 'tilde;',
|
419: | 'times;',
|
420: | 'times',
|
421: | 'trade;',
|
422: | 'uArr;',
|
423: | 'uacute;',
|
424: | 'uacute',
|
425: | 'uarr;',
|
426: | 'ucirc;',
|
427: | 'ucirc',
|
428: | 'ugrave;',
|
429: | 'ugrave',
|
430: | 'uml;',
|
431: | 'uml',
|
432: | 'upsih;',
|
433: | 'upsilon;',
|
434: | 'uuml;',
|
435: | 'uuml',
|
436: | 'weierp;',
|
437: | 'xi;',
|
438: | 'yacute;',
|
439: | 'yacute',
|
440: | 'yen;',
|
441: | 'yen',
|
442: | 'yuml;',
|
443: | 'yuml',
|
444: | 'zeta;',
|
445: | 'zwj;',
|
446: | 'zwnj;'
|
447: | );
|
448: |
|
449: | const PCDATA = 0;
|
450: | const RCDATA = 1;
|
451: | const CDATA = 2;
|
452: | const PLAINTEXT = 3;
|
453: |
|
454: | const DOCTYPE = 0;
|
455: | const STARTTAG = 1;
|
456: | const ENDTAG = 2;
|
457: | const COMMENT = 3;
|
458: | const CHARACTR = 4;
|
459: | const EOF = 5;
|
460: |
|
461: | public function __construct($data)
|
462: | {
|
463: | $this->data = $data;
|
464: | $this->char = -1;
|
465: | $this->EOF = strlen($data);
|
466: | $this->tree = new HTML5TreeConstructer;
|
467: | $this->content_model = self::PCDATA;
|
468: |
|
469: | $this->state = 'data';
|
470: |
|
471: | while ($this->state !== null) {
|
472: | $this->{$this->state . 'State'}();
|
473: | }
|
474: | }
|
475: |
|
476: | public function save()
|
477: | {
|
478: | return $this->tree->save();
|
479: | }
|
480: |
|
481: | private function char()
|
482: | {
|
483: | return ($this->char < $this->EOF)
|
484: | ? $this->data[$this->char]
|
485: | : false;
|
486: | }
|
487: |
|
488: | private function character($s, $l = 0)
|
489: | {
|
490: | if ($s + $l < $this->EOF) {
|
491: | if ($l === 0) {
|
492: | return $this->data[$s];
|
493: | } else {
|
494: | return substr($this->data, $s, $l);
|
495: | }
|
496: | }
|
497: | }
|
498: |
|
499: | private function characters($char_class, $start)
|
500: | {
|
501: | return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
|
502: | }
|
503: |
|
504: | private function dataState()
|
505: | {
|
506: |
|
507: | $this->char++;
|
508: | $char = $this->char();
|
509: |
|
510: | if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
|
511: | |
512: | |
513: | |
514: |
|
515: | $this->state = 'entityData';
|
516: |
|
517: | } elseif ($char === '-') {
|
518: | |
519: | |
520: | |
521: | |
522: | |
523: |
|
524: | if (($this->content_model === self::RCDATA || $this->content_model ===
|
525: | self::CDATA) && $this->escape === false &&
|
526: | $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
|
527: | ) {
|
528: | $this->escape = true;
|
529: | }
|
530: |
|
531: | |
532: |
|
533: | $this->emitToken(
|
534: | array(
|
535: | 'type' => self::CHARACTR,
|
536: | 'data' => $char
|
537: | )
|
538: | );
|
539: |
|
540: |
|
541: | } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
|
542: | (($this->content_model === self::RCDATA ||
|
543: | $this->content_model === self::CDATA) && $this->escape === false))
|
544: | ) {
|
545: | |
546: | |
547: | |
548: | |
549: | |
550: | |
551: | |
552: |
|
553: | $this->state = 'tagOpen';
|
554: |
|
555: |
|
556: | } elseif ($char === '>') {
|
557: | |
558: | |
559: | |
560: | |
561: |
|
562: | if (($this->content_model === self::RCDATA ||
|
563: | $this->content_model === self::CDATA) && $this->escape === true &&
|
564: | $this->character($this->char, 3) === '-->'
|
565: | ) {
|
566: | $this->escape = false;
|
567: | }
|
568: |
|
569: | |
570: |
|
571: | $this->emitToken(
|
572: | array(
|
573: | 'type' => self::CHARACTR,
|
574: | 'data' => $char
|
575: | )
|
576: | );
|
577: |
|
578: | } elseif ($this->char === $this->EOF) {
|
579: | |
580: |
|
581: | $this->EOF();
|
582: |
|
583: | } elseif ($this->content_model === self::PLAINTEXT) {
|
584: | |
585: | |
586: |
|
587: | $this->emitToken(
|
588: | array(
|
589: | 'type' => self::CHARACTR,
|
590: | 'data' => substr($this->data, $this->char)
|
591: | )
|
592: | );
|
593: |
|
594: | $this->EOF();
|
595: |
|
596: | } else {
|
597: | |
598: | |
599: | |
600: |
|
601: | $len = strcspn($this->data, '<&', $this->char);
|
602: | $char = substr($this->data, $this->char, $len);
|
603: | $this->char += $len - 1;
|
604: |
|
605: | $this->emitToken(
|
606: | array(
|
607: | 'type' => self::CHARACTR,
|
608: | 'data' => $char
|
609: | )
|
610: | );
|
611: |
|
612: | $this->state = 'data';
|
613: | }
|
614: | }
|
615: |
|
616: | private function entityDataState()
|
617: | {
|
618: |
|
619: | $entity = $this->entity();
|
620: |
|
621: |
|
622: |
|
623: | $char = (!$entity) ? '&' : $entity;
|
624: | $this->emitToken(
|
625: | array(
|
626: | 'type' => self::CHARACTR,
|
627: | 'data' => $char
|
628: | )
|
629: | );
|
630: |
|
631: |
|
632: | $this->state = 'data';
|
633: | }
|
634: |
|
635: | private function tagOpenState()
|
636: | {
|
637: | switch ($this->content_model) {
|
638: | case self::RCDATA:
|
639: | case self::CDATA:
|
640: | |
641: | |
642: | |
643: | |
644: |
|
645: | if ($this->character($this->char + 1) === '/') {
|
646: | $this->char++;
|
647: | $this->state = 'closeTagOpen';
|
648: |
|
649: | } else {
|
650: | $this->emitToken(
|
651: | array(
|
652: | 'type' => self::CHARACTR,
|
653: | 'data' => '<'
|
654: | )
|
655: | );
|
656: |
|
657: | $this->state = 'data';
|
658: | }
|
659: | break;
|
660: |
|
661: | case self::PCDATA:
|
662: |
|
663: |
|
664: | $this->char++;
|
665: | $char = $this->char();
|
666: |
|
667: | if ($char === '!') {
|
668: | |
669: |
|
670: | $this->state = 'markupDeclarationOpen';
|
671: |
|
672: | } elseif ($char === '/') {
|
673: | |
674: |
|
675: | $this->state = 'closeTagOpen';
|
676: |
|
677: | } elseif (preg_match('/^[A-Za-z]$/', $char)) {
|
678: | |
679: | |
680: | |
681: | |
682: |
|
683: | $this->token = array(
|
684: | 'name' => strtolower($char),
|
685: | 'type' => self::STARTTAG,
|
686: | 'attr' => array()
|
687: | );
|
688: |
|
689: | $this->state = 'tagName';
|
690: |
|
691: | } elseif ($char === '>') {
|
692: | |
693: | |
694: |
|
695: | $this->emitToken(
|
696: | array(
|
697: | 'type' => self::CHARACTR,
|
698: | 'data' => '<>'
|
699: | )
|
700: | );
|
701: |
|
702: | $this->state = 'data';
|
703: |
|
704: | } elseif ($char === '?') {
|
705: | |
706: |
|
707: | $this->state = 'bogusComment';
|
708: |
|
709: | } else {
|
710: | |
711: | |
712: |
|
713: | $this->emitToken(
|
714: | array(
|
715: | 'type' => self::CHARACTR,
|
716: | 'data' => '<'
|
717: | )
|
718: | );
|
719: |
|
720: | $this->char--;
|
721: | $this->state = 'data';
|
722: | }
|
723: | break;
|
724: | }
|
725: | }
|
726: |
|
727: | private function closeTagOpenState()
|
728: | {
|
729: | $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
|
730: | $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
|
731: |
|
732: | if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
|
733: | (!$the_same || ($the_same && (!preg_match(
|
734: | '/[\t\n\x0b\x0c >\/]/',
|
735: | $this->character($this->char + 1 + strlen($next_node))
|
736: | ) || $this->EOF === $this->char)))
|
737: | ) {
|
738: | |
739: | |
740: | |
741: | |
742: | |
743: | |
744: | |
745: | |
746: | |
747: | |
748: | |
749: | |
750: | |
751: | |
752: |
|
753: | $this->emitToken(
|
754: | array(
|
755: | 'type' => self::CHARACTR,
|
756: | 'data' => '</'
|
757: | )
|
758: | );
|
759: |
|
760: | $this->state = 'data';
|
761: |
|
762: | } else {
|
763: | |
764: | |
765: |
|
766: | $this->char++;
|
767: | $char = $this->char();
|
768: |
|
769: | if (preg_match('/^[A-Za-z]$/', $char)) {
|
770: | |
771: | |
772: | |
773: | |
774: |
|
775: | $this->token = array(
|
776: | 'name' => strtolower($char),
|
777: | 'type' => self::ENDTAG
|
778: | );
|
779: |
|
780: | $this->state = 'tagName';
|
781: |
|
782: | } elseif ($char === '>') {
|
783: | |
784: |
|
785: | $this->state = 'data';
|
786: |
|
787: | } elseif ($this->char === $this->EOF) {
|
788: | |
789: | |
790: |
|
791: | $this->emitToken(
|
792: | array(
|
793: | 'type' => self::CHARACTR,
|
794: | 'data' => '</'
|
795: | )
|
796: | );
|
797: |
|
798: | $this->char--;
|
799: | $this->state = 'data';
|
800: |
|
801: | } else {
|
802: |
|
803: | $this->state = 'bogusComment';
|
804: | }
|
805: | }
|
806: | }
|
807: |
|
808: | private function tagNameState()
|
809: | {
|
810: |
|
811: | $this->char++;
|
812: | $char = $this->character($this->char);
|
813: |
|
814: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
815: | |
816: | |
817: | |
818: | |
819: | |
820: |
|
821: | $this->state = 'beforeAttributeName';
|
822: |
|
823: | } elseif ($char === '>') {
|
824: | |
825: |
|
826: | $this->emitToken($this->token);
|
827: | $this->state = 'data';
|
828: |
|
829: | } elseif ($this->char === $this->EOF) {
|
830: | |
831: | |
832: |
|
833: | $this->emitToken($this->token);
|
834: |
|
835: | $this->char--;
|
836: | $this->state = 'data';
|
837: |
|
838: | } elseif ($char === '/') {
|
839: | |
840: | |
841: |
|
842: | $this->state = 'beforeAttributeName';
|
843: |
|
844: | } else {
|
845: | |
846: | |
847: |
|
848: | $this->token['name'] .= strtolower($char);
|
849: | $this->state = 'tagName';
|
850: | }
|
851: | }
|
852: |
|
853: | private function beforeAttributeNameState()
|
854: | {
|
855: |
|
856: | $this->char++;
|
857: | $char = $this->character($this->char);
|
858: |
|
859: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
860: | |
861: | |
862: | |
863: | |
864: | |
865: |
|
866: | $this->state = 'beforeAttributeName';
|
867: |
|
868: | } elseif ($char === '>') {
|
869: | |
870: |
|
871: | $this->emitToken($this->token);
|
872: | $this->state = 'data';
|
873: |
|
874: | } elseif ($char === '/') {
|
875: | |
876: | |
877: |
|
878: | $this->state = 'beforeAttributeName';
|
879: |
|
880: | } elseif ($this->char === $this->EOF) {
|
881: | |
882: | |
883: |
|
884: | $this->emitToken($this->token);
|
885: |
|
886: | $this->char--;
|
887: | $this->state = 'data';
|
888: |
|
889: | } else {
|
890: | |
891: | |
892: | |
893: |
|
894: | $this->token['attr'][] = array(
|
895: | 'name' => strtolower($char),
|
896: | 'value' => null
|
897: | );
|
898: |
|
899: | $this->state = 'attributeName';
|
900: | }
|
901: | }
|
902: |
|
903: | private function attributeNameState()
|
904: | {
|
905: |
|
906: | $this->char++;
|
907: | $char = $this->character($this->char);
|
908: |
|
909: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
910: | |
911: | |
912: | |
913: | |
914: | |
915: |
|
916: | $this->state = 'afterAttributeName';
|
917: |
|
918: | } elseif ($char === '=') {
|
919: | |
920: |
|
921: | $this->state = 'beforeAttributeValue';
|
922: |
|
923: | } elseif ($char === '>') {
|
924: | |
925: |
|
926: | $this->emitToken($this->token);
|
927: | $this->state = 'data';
|
928: |
|
929: | } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
|
930: | |
931: | |
932: |
|
933: | $this->state = 'beforeAttributeName';
|
934: |
|
935: | } elseif ($this->char === $this->EOF) {
|
936: | |
937: | |
938: |
|
939: | $this->emitToken($this->token);
|
940: |
|
941: | $this->char--;
|
942: | $this->state = 'data';
|
943: |
|
944: | } else {
|
945: | |
946: | |
947: |
|
948: | $last = count($this->token['attr']) - 1;
|
949: | $this->token['attr'][$last]['name'] .= strtolower($char);
|
950: |
|
951: | $this->state = 'attributeName';
|
952: | }
|
953: | }
|
954: |
|
955: | private function afterAttributeNameState()
|
956: | {
|
957: |
|
958: | $this->char++;
|
959: | $char = $this->character($this->char);
|
960: |
|
961: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
962: | |
963: | |
964: | |
965: | |
966: | |
967: |
|
968: | $this->state = 'afterAttributeName';
|
969: |
|
970: | } elseif ($char === '=') {
|
971: | |
972: |
|
973: | $this->state = 'beforeAttributeValue';
|
974: |
|
975: | } elseif ($char === '>') {
|
976: | |
977: |
|
978: | $this->emitToken($this->token);
|
979: | $this->state = 'data';
|
980: |
|
981: | } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
|
982: | |
983: | |
984: |
|
985: | $this->state = 'beforeAttributeName';
|
986: |
|
987: | } elseif ($this->char === $this->EOF) {
|
988: | |
989: | |
990: |
|
991: | $this->emitToken($this->token);
|
992: |
|
993: | $this->char--;
|
994: | $this->state = 'data';
|
995: |
|
996: | } else {
|
997: | |
998: | |
999: | |
1000: |
|
1001: | $this->token['attr'][] = array(
|
1002: | 'name' => strtolower($char),
|
1003: | 'value' => null
|
1004: | );
|
1005: |
|
1006: | $this->state = 'attributeName';
|
1007: | }
|
1008: | }
|
1009: |
|
1010: | private function beforeAttributeValueState()
|
1011: | {
|
1012: |
|
1013: | $this->char++;
|
1014: | $char = $this->character($this->char);
|
1015: |
|
1016: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
1017: | |
1018: | |
1019: | |
1020: | |
1021: | |
1022: |
|
1023: | $this->state = 'beforeAttributeValue';
|
1024: |
|
1025: | } elseif ($char === '"') {
|
1026: | |
1027: |
|
1028: | $this->state = 'attributeValueDoubleQuoted';
|
1029: |
|
1030: | } elseif ($char === '&') {
|
1031: | |
1032: | |
1033: |
|
1034: | $this->char--;
|
1035: | $this->state = 'attributeValueUnquoted';
|
1036: |
|
1037: | } elseif ($char === '\'') {
|
1038: | |
1039: |
|
1040: | $this->state = 'attributeValueSingleQuoted';
|
1041: |
|
1042: | } elseif ($char === '>') {
|
1043: | |
1044: |
|
1045: | $this->emitToken($this->token);
|
1046: | $this->state = 'data';
|
1047: |
|
1048: | } else {
|
1049: | |
1050: | |
1051: |
|
1052: | $last = count($this->token['attr']) - 1;
|
1053: | $this->token['attr'][$last]['value'] .= $char;
|
1054: |
|
1055: | $this->state = 'attributeValueUnquoted';
|
1056: | }
|
1057: | }
|
1058: |
|
1059: | private function attributeValueDoubleQuotedState()
|
1060: | {
|
1061: |
|
1062: | $this->char++;
|
1063: | $char = $this->character($this->char);
|
1064: |
|
1065: | if ($char === '"') {
|
1066: | |
1067: |
|
1068: | $this->state = 'beforeAttributeName';
|
1069: |
|
1070: | } elseif ($char === '&') {
|
1071: | |
1072: |
|
1073: | $this->entityInAttributeValueState('double');
|
1074: |
|
1075: | } elseif ($this->char === $this->EOF) {
|
1076: | |
1077: | |
1078: |
|
1079: | $this->emitToken($this->token);
|
1080: |
|
1081: | $this->char--;
|
1082: | $this->state = 'data';
|
1083: |
|
1084: | } else {
|
1085: | |
1086: | |
1087: |
|
1088: | $last = count($this->token['attr']) - 1;
|
1089: | $this->token['attr'][$last]['value'] .= $char;
|
1090: |
|
1091: | $this->state = 'attributeValueDoubleQuoted';
|
1092: | }
|
1093: | }
|
1094: |
|
1095: | private function attributeValueSingleQuotedState()
|
1096: | {
|
1097: |
|
1098: | $this->char++;
|
1099: | $char = $this->character($this->char);
|
1100: |
|
1101: | if ($char === '\'') {
|
1102: | |
1103: |
|
1104: | $this->state = 'beforeAttributeName';
|
1105: |
|
1106: | } elseif ($char === '&') {
|
1107: | |
1108: |
|
1109: | $this->entityInAttributeValueState('single');
|
1110: |
|
1111: | } elseif ($this->char === $this->EOF) {
|
1112: | |
1113: | |
1114: |
|
1115: | $this->emitToken($this->token);
|
1116: |
|
1117: | $this->char--;
|
1118: | $this->state = 'data';
|
1119: |
|
1120: | } else {
|
1121: | |
1122: | |
1123: |
|
1124: | $last = count($this->token['attr']) - 1;
|
1125: | $this->token['attr'][$last]['value'] .= $char;
|
1126: |
|
1127: | $this->state = 'attributeValueSingleQuoted';
|
1128: | }
|
1129: | }
|
1130: |
|
1131: | private function attributeValueUnquotedState()
|
1132: | {
|
1133: |
|
1134: | $this->char++;
|
1135: | $char = $this->character($this->char);
|
1136: |
|
1137: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
1138: | |
1139: | |
1140: | |
1141: | |
1142: | |
1143: |
|
1144: | $this->state = 'beforeAttributeName';
|
1145: |
|
1146: | } elseif ($char === '&') {
|
1147: | |
1148: |
|
1149: | $this->entityInAttributeValueState();
|
1150: |
|
1151: | } elseif ($char === '>') {
|
1152: | |
1153: |
|
1154: | $this->emitToken($this->token);
|
1155: | $this->state = 'data';
|
1156: |
|
1157: | } else {
|
1158: | |
1159: | |
1160: |
|
1161: | $last = count($this->token['attr']) - 1;
|
1162: | $this->token['attr'][$last]['value'] .= $char;
|
1163: |
|
1164: | $this->state = 'attributeValueUnquoted';
|
1165: | }
|
1166: | }
|
1167: |
|
1168: | private function entityInAttributeValueState()
|
1169: | {
|
1170: |
|
1171: | $entity = $this->entity();
|
1172: |
|
1173: |
|
1174: |
|
1175: |
|
1176: | $char = (!$entity)
|
1177: | ? '&'
|
1178: | : $entity;
|
1179: |
|
1180: | $last = count($this->token['attr']) - 1;
|
1181: | $this->token['attr'][$last]['value'] .= $char;
|
1182: | }
|
1183: |
|
1184: | private function bogusCommentState()
|
1185: | {
|
1186: | |
1187: | |
1188: | |
1189: | |
1190: | |
1191: | |
1192: | |
1193: |
|
1194: | $data = $this->characters('^>', $this->char);
|
1195: | $this->emitToken(
|
1196: | array(
|
1197: | 'data' => $data,
|
1198: | 'type' => self::COMMENT
|
1199: | )
|
1200: | );
|
1201: |
|
1202: | $this->char += strlen($data);
|
1203: |
|
1204: |
|
1205: | $this->state = 'data';
|
1206: |
|
1207: |
|
1208: | if ($this->char === $this->EOF) {
|
1209: | $this->char = $this->EOF - 1;
|
1210: | }
|
1211: | }
|
1212: |
|
1213: | private function markupDeclarationOpenState()
|
1214: | {
|
1215: | |
1216: | |
1217: |
|
1218: | if ($this->character($this->char + 1, 2) === '--') {
|
1219: | $this->char += 2;
|
1220: | $this->state = 'comment';
|
1221: | $this->token = array(
|
1222: | 'data' => null,
|
1223: | 'type' => self::COMMENT
|
1224: | );
|
1225: |
|
1226: | |
1227: | |
1228: |
|
1229: | } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
|
1230: | $this->char += 7;
|
1231: | $this->state = 'doctype';
|
1232: |
|
1233: | |
1234: | |
1235: |
|
1236: | } else {
|
1237: | $this->char++;
|
1238: | $this->state = 'bogusComment';
|
1239: | }
|
1240: | }
|
1241: |
|
1242: | private function commentState()
|
1243: | {
|
1244: |
|
1245: | $this->char++;
|
1246: | $char = $this->char();
|
1247: |
|
1248: |
|
1249: | if ($char === '-') {
|
1250: |
|
1251: | $this->state = 'commentDash';
|
1252: |
|
1253: |
|
1254: | } elseif ($this->char === $this->EOF) {
|
1255: | |
1256: |
|
1257: | $this->emitToken($this->token);
|
1258: | $this->char--;
|
1259: | $this->state = 'data';
|
1260: |
|
1261: |
|
1262: | } else {
|
1263: | |
1264: |
|
1265: | $this->token['data'] .= $char;
|
1266: | }
|
1267: | }
|
1268: |
|
1269: | private function commentDashState()
|
1270: | {
|
1271: |
|
1272: | $this->char++;
|
1273: | $char = $this->char();
|
1274: |
|
1275: |
|
1276: | if ($char === '-') {
|
1277: |
|
1278: | $this->state = 'commentEnd';
|
1279: |
|
1280: |
|
1281: | } elseif ($this->char === $this->EOF) {
|
1282: | |
1283: |
|
1284: | $this->emitToken($this->token);
|
1285: | $this->char--;
|
1286: | $this->state = 'data';
|
1287: |
|
1288: |
|
1289: | } else {
|
1290: | |
1291: |
|
1292: | $this->token['data'] .= '-' . $char;
|
1293: | $this->state = 'comment';
|
1294: | }
|
1295: | }
|
1296: |
|
1297: | private function commentEndState()
|
1298: | {
|
1299: |
|
1300: | $this->char++;
|
1301: | $char = $this->char();
|
1302: |
|
1303: | if ($char === '>') {
|
1304: | $this->emitToken($this->token);
|
1305: | $this->state = 'data';
|
1306: |
|
1307: | } elseif ($char === '-') {
|
1308: | $this->token['data'] .= '-';
|
1309: |
|
1310: | } elseif ($this->char === $this->EOF) {
|
1311: | $this->emitToken($this->token);
|
1312: | $this->char--;
|
1313: | $this->state = 'data';
|
1314: |
|
1315: | } else {
|
1316: | $this->token['data'] .= '--' . $char;
|
1317: | $this->state = 'comment';
|
1318: | }
|
1319: | }
|
1320: |
|
1321: | private function doctypeState()
|
1322: | {
|
1323: |
|
1324: | $this->char++;
|
1325: | $char = $this->char();
|
1326: |
|
1327: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
1328: | $this->state = 'beforeDoctypeName';
|
1329: |
|
1330: | } else {
|
1331: | $this->char--;
|
1332: | $this->state = 'beforeDoctypeName';
|
1333: | }
|
1334: | }
|
1335: |
|
1336: | private function beforeDoctypeNameState()
|
1337: | {
|
1338: |
|
1339: | $this->char++;
|
1340: | $char = $this->char();
|
1341: |
|
1342: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
1343: |
|
1344: |
|
1345: | } elseif (preg_match('/^[a-z]$/', $char)) {
|
1346: | $this->token = array(
|
1347: | 'name' => strtoupper($char),
|
1348: | 'type' => self::DOCTYPE,
|
1349: | 'error' => true
|
1350: | );
|
1351: |
|
1352: | $this->state = 'doctypeName';
|
1353: |
|
1354: | } elseif ($char === '>') {
|
1355: | $this->emitToken(
|
1356: | array(
|
1357: | 'name' => null,
|
1358: | 'type' => self::DOCTYPE,
|
1359: | 'error' => true
|
1360: | )
|
1361: | );
|
1362: |
|
1363: | $this->state = 'data';
|
1364: |
|
1365: | } elseif ($this->char === $this->EOF) {
|
1366: | $this->emitToken(
|
1367: | array(
|
1368: | 'name' => null,
|
1369: | 'type' => self::DOCTYPE,
|
1370: | 'error' => true
|
1371: | )
|
1372: | );
|
1373: |
|
1374: | $this->char--;
|
1375: | $this->state = 'data';
|
1376: |
|
1377: | } else {
|
1378: | $this->token = array(
|
1379: | 'name' => $char,
|
1380: | 'type' => self::DOCTYPE,
|
1381: | 'error' => true
|
1382: | );
|
1383: |
|
1384: | $this->state = 'doctypeName';
|
1385: | }
|
1386: | }
|
1387: |
|
1388: | private function doctypeNameState()
|
1389: | {
|
1390: |
|
1391: | $this->char++;
|
1392: | $char = $this->char();
|
1393: |
|
1394: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
1395: | $this->state = 'AfterDoctypeName';
|
1396: |
|
1397: | } elseif ($char === '>') {
|
1398: | $this->emitToken($this->token);
|
1399: | $this->state = 'data';
|
1400: |
|
1401: | } elseif (preg_match('/^[a-z]$/', $char)) {
|
1402: | $this->token['name'] .= strtoupper($char);
|
1403: |
|
1404: | } elseif ($this->char === $this->EOF) {
|
1405: | $this->emitToken($this->token);
|
1406: | $this->char--;
|
1407: | $this->state = 'data';
|
1408: |
|
1409: | } else {
|
1410: | $this->token['name'] .= $char;
|
1411: | }
|
1412: |
|
1413: | $this->token['error'] = ($this->token['name'] === 'HTML')
|
1414: | ? false
|
1415: | : true;
|
1416: | }
|
1417: |
|
1418: | private function afterDoctypeNameState()
|
1419: | {
|
1420: |
|
1421: | $this->char++;
|
1422: | $char = $this->char();
|
1423: |
|
1424: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
1425: |
|
1426: |
|
1427: | } elseif ($char === '>') {
|
1428: | $this->emitToken($this->token);
|
1429: | $this->state = 'data';
|
1430: |
|
1431: | } elseif ($this->char === $this->EOF) {
|
1432: | $this->emitToken($this->token);
|
1433: | $this->char--;
|
1434: | $this->state = 'data';
|
1435: |
|
1436: | } else {
|
1437: | $this->token['error'] = true;
|
1438: | $this->state = 'bogusDoctype';
|
1439: | }
|
1440: | }
|
1441: |
|
1442: | private function bogusDoctypeState()
|
1443: | {
|
1444: |
|
1445: | $this->char++;
|
1446: | $char = $this->char();
|
1447: |
|
1448: | if ($char === '>') {
|
1449: | $this->emitToken($this->token);
|
1450: | $this->state = 'data';
|
1451: |
|
1452: | } elseif ($this->char === $this->EOF) {
|
1453: | $this->emitToken($this->token);
|
1454: | $this->char--;
|
1455: | $this->state = 'data';
|
1456: |
|
1457: | } else {
|
1458: |
|
1459: | }
|
1460: | }
|
1461: |
|
1462: | private function entity()
|
1463: | {
|
1464: | $start = $this->char;
|
1465: |
|
1466: |
|
1467: |
|
1468: |
|
1469: |
|
1470: |
|
1471: |
|
1472: | switch ($this->character($this->char + 1)) {
|
1473: |
|
1474: | case '#':
|
1475: |
|
1476: |
|
1477: |
|
1478: | switch ($this->character($this->char + 1)) {
|
1479: |
|
1480: |
|
1481: | case 'x':
|
1482: | case 'X':
|
1483: |
|
1484: |
|
1485: |
|
1486: |
|
1487: |
|
1488: |
|
1489: | $char = 1;
|
1490: | $char_class = '0-9A-Fa-f';
|
1491: | break;
|
1492: |
|
1493: |
|
1494: | default:
|
1495: |
|
1496: |
|
1497: |
|
1498: | $char = 0;
|
1499: | $char_class = '0-9';
|
1500: | break;
|
1501: | }
|
1502: |
|
1503: |
|
1504: |
|
1505: | $this->char++;
|
1506: | $e_name = $this->characters($char_class, $this->char + $char + 1);
|
1507: | $entity = $this->character($start, $this->char);
|
1508: | $cond = strlen($e_name) > 0;
|
1509: |
|
1510: |
|
1511: | break;
|
1512: |
|
1513: |
|
1514: | default:
|
1515: |
|
1516: |
|
1517: |
|
1518: |
|
1519: | $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
|
1520: | $len = strlen($e_name);
|
1521: |
|
1522: | for ($c = 1; $c <= $len; $c++) {
|
1523: | $id = substr($e_name, 0, $c);
|
1524: | $this->char++;
|
1525: |
|
1526: | if (in_array($id, $this->entities)) {
|
1527: | if ($e_name[$c - 1] !== ';') {
|
1528: | if ($c < $len && $e_name[$c] == ';') {
|
1529: | $this->char++;
|
1530: | }
|
1531: | }
|
1532: | $entity = $id;
|
1533: | break;
|
1534: | }
|
1535: | }
|
1536: |
|
1537: | $cond = isset($entity);
|
1538: |
|
1539: | break;
|
1540: | }
|
1541: |
|
1542: | if (!$cond) {
|
1543: |
|
1544: |
|
1545: | $this->char = $start;
|
1546: | return false;
|
1547: | }
|
1548: |
|
1549: |
|
1550: |
|
1551: | return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
|
1552: | }
|
1553: |
|
1554: | private function emitToken($token)
|
1555: | {
|
1556: | $emit = $this->tree->emitToken($token);
|
1557: |
|
1558: | if (is_int($emit)) {
|
1559: | $this->content_model = $emit;
|
1560: |
|
1561: | } elseif ($token['type'] === self::ENDTAG) {
|
1562: | $this->content_model = self::PCDATA;
|
1563: | }
|
1564: | }
|
1565: |
|
1566: | private function EOF()
|
1567: | {
|
1568: | $this->state = null;
|
1569: | $this->tree->emitToken(
|
1570: | array(
|
1571: | 'type' => self::EOF
|
1572: | )
|
1573: | );
|
1574: | }
|
1575: | }
|
1576: |
|
1577: | class HTML5TreeConstructer
|
1578: | {
|
1579: | public $stack = array();
|
1580: |
|
1581: | private $phase;
|
1582: | private $mode;
|
1583: | private $dom;
|
1584: | private $foster_parent = null;
|
1585: | private $a_formatting = array();
|
1586: |
|
1587: | private $head_pointer = null;
|
1588: | private $form_pointer = null;
|
1589: |
|
1590: | private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');
|
1591: | private $formatting = array(
|
1592: | 'a',
|
1593: | 'b',
|
1594: | 'big',
|
1595: | 'em',
|
1596: | 'font',
|
1597: | 'i',
|
1598: | 'nobr',
|
1599: | 's',
|
1600: | 'small',
|
1601: | 'strike',
|
1602: | 'strong',
|
1603: | 'tt',
|
1604: | 'u'
|
1605: | );
|
1606: | private $special = array(
|
1607: | 'address',
|
1608: | 'area',
|
1609: | 'base',
|
1610: | 'basefont',
|
1611: | 'bgsound',
|
1612: | 'blockquote',
|
1613: | 'body',
|
1614: | 'br',
|
1615: | 'center',
|
1616: | 'col',
|
1617: | 'colgroup',
|
1618: | 'dd',
|
1619: | 'dir',
|
1620: | 'div',
|
1621: | 'dl',
|
1622: | 'dt',
|
1623: | 'embed',
|
1624: | 'fieldset',
|
1625: | 'form',
|
1626: | 'frame',
|
1627: | 'frameset',
|
1628: | 'h1',
|
1629: | 'h2',
|
1630: | 'h3',
|
1631: | 'h4',
|
1632: | 'h5',
|
1633: | 'h6',
|
1634: | 'head',
|
1635: | 'hr',
|
1636: | 'iframe',
|
1637: | 'image',
|
1638: | 'img',
|
1639: | 'input',
|
1640: | 'isindex',
|
1641: | 'li',
|
1642: | 'link',
|
1643: | 'listing',
|
1644: | 'menu',
|
1645: | 'meta',
|
1646: | 'noembed',
|
1647: | 'noframes',
|
1648: | 'noscript',
|
1649: | 'ol',
|
1650: | 'optgroup',
|
1651: | 'option',
|
1652: | 'p',
|
1653: | 'param',
|
1654: | 'plaintext',
|
1655: | 'pre',
|
1656: | 'script',
|
1657: | 'select',
|
1658: | 'spacer',
|
1659: | 'style',
|
1660: | 'tbody',
|
1661: | 'textarea',
|
1662: | 'tfoot',
|
1663: | 'thead',
|
1664: | 'title',
|
1665: | 'tr',
|
1666: | 'ul',
|
1667: | 'wbr'
|
1668: | );
|
1669: |
|
1670: |
|
1671: | const INIT_PHASE = 0;
|
1672: | const ROOT_PHASE = 1;
|
1673: | const MAIN_PHASE = 2;
|
1674: | const END_PHASE = 3;
|
1675: |
|
1676: |
|
1677: | const BEFOR_HEAD = 0;
|
1678: | const IN_HEAD = 1;
|
1679: | const AFTER_HEAD = 2;
|
1680: | const IN_BODY = 3;
|
1681: | const IN_TABLE = 4;
|
1682: | const IN_CAPTION = 5;
|
1683: | const IN_CGROUP = 6;
|
1684: | const IN_TBODY = 7;
|
1685: | const IN_ROW = 8;
|
1686: | const IN_CELL = 9;
|
1687: | const IN_SELECT = 10;
|
1688: | const AFTER_BODY = 11;
|
1689: | const IN_FRAME = 12;
|
1690: | const AFTR_FRAME = 13;
|
1691: |
|
1692: |
|
1693: | const SPECIAL = 0;
|
1694: | const SCOPING = 1;
|
1695: | const FORMATTING = 2;
|
1696: | const PHRASING = 3;
|
1697: |
|
1698: | const MARKER = 0;
|
1699: |
|
1700: | public function __construct()
|
1701: | {
|
1702: | $this->phase = self::INIT_PHASE;
|
1703: | $this->mode = self::BEFOR_HEAD;
|
1704: | $this->dom = new DOMDocument;
|
1705: |
|
1706: | $this->dom->encoding = 'UTF-8';
|
1707: | $this->dom->preserveWhiteSpace = true;
|
1708: | $this->dom->substituteEntities = true;
|
1709: | $this->dom->strictErrorChecking = false;
|
1710: | }
|
1711: |
|
1712: |
|
1713: | public function emitToken($token)
|
1714: | {
|
1715: | switch ($this->phase) {
|
1716: | case self::INIT_PHASE:
|
1717: | return $this->initPhase($token);
|
1718: | break;
|
1719: | case self::ROOT_PHASE:
|
1720: | return $this->rootElementPhase($token);
|
1721: | break;
|
1722: | case self::MAIN_PHASE:
|
1723: | return $this->mainPhase($token);
|
1724: | break;
|
1725: | case self::END_PHASE :
|
1726: | return $this->trailingEndPhase($token);
|
1727: | break;
|
1728: | }
|
1729: | }
|
1730: |
|
1731: | private function initPhase($token)
|
1732: | {
|
1733: | |
1734: |
|
1735: |
|
1736: | |
1737: | |
1738: | |
1739: | |
1740: | |
1741: | |
1742: | |
1743: |
|
1744: | if ((isset($token['error']) && $token['error']) ||
|
1745: | $token['type'] === HTML5::COMMENT ||
|
1746: | $token['type'] === HTML5::STARTTAG ||
|
1747: | $token['type'] === HTML5::ENDTAG ||
|
1748: | $token['type'] === HTML5::EOF ||
|
1749: | ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
|
1750: | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))
|
1751: | ) {
|
1752: | |
1753: | |
1754: | |
1755: |
|
1756: |
|
1757: | $this->phase = self::ROOT_PHASE;
|
1758: | return $this->rootElementPhase($token);
|
1759: |
|
1760: |
|
1761: | } elseif (isset($token['error']) && !$token['error']) {
|
1762: | |
1763: | |
1764: | |
1765: |
|
1766: | $doctype = new DOMDocumentType(null, null, 'HTML');
|
1767: |
|
1768: | |
1769: |
|
1770: | $this->phase = self::ROOT_PHASE;
|
1771: |
|
1772: | |
1773: | |
1774: |
|
1775: | } elseif (isset($token['data']) && preg_match(
|
1776: | '/^[\t\n\x0b\x0c ]+$/',
|
1777: | $token['data']
|
1778: | )
|
1779: | ) {
|
1780: |
|
1781: | $text = $this->dom->createTextNode($token['data']);
|
1782: | $this->dom->appendChild($text);
|
1783: | }
|
1784: | }
|
1785: |
|
1786: | private function rootElementPhase($token)
|
1787: | {
|
1788: | |
1789: |
|
1790: |
|
1791: |
|
1792: | if ($token['type'] === HTML5::DOCTYPE) {
|
1793: |
|
1794: |
|
1795: |
|
1796: | } elseif ($token['type'] === HTML5::COMMENT) {
|
1797: | |
1798: |
|
1799: | $comment = $this->dom->createComment($token['data']);
|
1800: | $this->dom->appendChild($comment);
|
1801: |
|
1802: | |
1803: | |
1804: |
|
1805: | } elseif ($token['type'] === HTML5::CHARACTR &&
|
1806: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
1807: | ) {
|
1808: |
|
1809: | $text = $this->dom->createTextNode($token['data']);
|
1810: | $this->dom->appendChild($text);
|
1811: |
|
1812: | |
1813: | |
1814: | |
1815: | |
1816: | |
1817: |
|
1818: | } elseif (($token['type'] === HTML5::CHARACTR &&
|
1819: | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
|
1820: | $token['type'] === HTML5::STARTTAG ||
|
1821: | $token['type'] === HTML5::ENDTAG ||
|
1822: | $token['type'] === HTML5::EOF
|
1823: | ) {
|
1824: | |
1825: | |
1826: |
|
1827: | $html = $this->dom->createElement('html');
|
1828: | $this->dom->appendChild($html);
|
1829: | $this->stack[] = $html;
|
1830: |
|
1831: | $this->phase = self::MAIN_PHASE;
|
1832: | return $this->mainPhase($token);
|
1833: | }
|
1834: | }
|
1835: |
|
1836: | private function mainPhase($token)
|
1837: | {
|
1838: |
|
1839: |
|
1840: |
|
1841: | if ($token['type'] === HTML5::DOCTYPE) {
|
1842: |
|
1843: |
|
1844: |
|
1845: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
|
1846: | |
1847: |
|
1848: |
|
1849: | |
1850: | |
1851: | |
1852: |
|
1853: | foreach ($token['attr'] as $attr) {
|
1854: | if (!$this->stack[0]->hasAttribute($attr['name'])) {
|
1855: | $this->stack[0]->setAttribute($attr['name'], $attr['value']);
|
1856: | }
|
1857: | }
|
1858: |
|
1859: |
|
1860: | } elseif ($token['type'] === HTML5::EOF) {
|
1861: |
|
1862: | $this->generateImpliedEndTags();
|
1863: |
|
1864: |
|
1865: | } else {
|
1866: |
|
1867: | switch ($this->mode) {
|
1868: | case self::BEFOR_HEAD:
|
1869: | return $this->beforeHead($token);
|
1870: | break;
|
1871: | case self::IN_HEAD:
|
1872: | return $this->inHead($token);
|
1873: | break;
|
1874: | case self::AFTER_HEAD:
|
1875: | return $this->afterHead($token);
|
1876: | break;
|
1877: | case self::IN_BODY:
|
1878: | return $this->inBody($token);
|
1879: | break;
|
1880: | case self::IN_TABLE:
|
1881: | return $this->inTable($token);
|
1882: | break;
|
1883: | case self::IN_CAPTION:
|
1884: | return $this->inCaption($token);
|
1885: | break;
|
1886: | case self::IN_CGROUP:
|
1887: | return $this->inColumnGroup($token);
|
1888: | break;
|
1889: | case self::IN_TBODY:
|
1890: | return $this->inTableBody($token);
|
1891: | break;
|
1892: | case self::IN_ROW:
|
1893: | return $this->inRow($token);
|
1894: | break;
|
1895: | case self::IN_CELL:
|
1896: | return $this->inCell($token);
|
1897: | break;
|
1898: | case self::IN_SELECT:
|
1899: | return $this->inSelect($token);
|
1900: | break;
|
1901: | case self::AFTER_BODY:
|
1902: | return $this->afterBody($token);
|
1903: | break;
|
1904: | case self::IN_FRAME:
|
1905: | return $this->inFrameset($token);
|
1906: | break;
|
1907: | case self::AFTR_FRAME:
|
1908: | return $this->afterFrameset($token);
|
1909: | break;
|
1910: | case self::END_PHASE:
|
1911: | return $this->trailingEndPhase($token);
|
1912: | break;
|
1913: | }
|
1914: | }
|
1915: | }
|
1916: |
|
1917: | private function beforeHead($token)
|
1918: | {
|
1919: |
|
1920: |
|
1921: | |
1922: | |
1923: |
|
1924: | if ($token['type'] === HTML5::CHARACTR &&
|
1925: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
1926: | ) {
|
1927: |
|
1928: | $this->insertText($token['data']);
|
1929: |
|
1930: |
|
1931: | } elseif ($token['type'] === HTML5::COMMENT) {
|
1932: | |
1933: |
|
1934: | $this->insertComment($token['data']);
|
1935: |
|
1936: |
|
1937: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
|
1938: | |
1939: |
|
1940: | $element = $this->insertElement($token);
|
1941: |
|
1942: |
|
1943: | $this->head_pointer = $element;
|
1944: |
|
1945: |
|
1946: | $this->mode = self::IN_HEAD;
|
1947: |
|
1948: | |
1949: | |
1950: | |
1951: | |
1952: |
|
1953: | } elseif ($token['type'] === HTML5::STARTTAG ||
|
1954: | ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
|
1955: | ($token['type'] === HTML5::CHARACTR && !preg_match(
|
1956: | '/^[\t\n\x0b\x0c ]$/',
|
1957: | $token['data']
|
1958: | ))
|
1959: | ) {
|
1960: | |
1961: |
|
1962: | $this->beforeHead(
|
1963: | array(
|
1964: | 'name' => 'head',
|
1965: | 'type' => HTML5::STARTTAG,
|
1966: | 'attr' => array()
|
1967: | )
|
1968: | );
|
1969: |
|
1970: | return $this->inHead($token);
|
1971: |
|
1972: |
|
1973: | } elseif ($token['type'] === HTML5::ENDTAG) {
|
1974: |
|
1975: | }
|
1976: | }
|
1977: |
|
1978: | private function inHead($token)
|
1979: | {
|
1980: |
|
1981: |
|
1982: | |
1983: | |
1984: | |
1985: | |
1986: | |
1987: | |
1988: |
|
1989: | if (($token['type'] === HTML5::CHARACTR &&
|
1990: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
|
1991: | $token['type'] === HTML5::CHARACTR && in_array(
|
1992: | end($this->stack)->nodeName,
|
1993: | array('title', 'style', 'script')
|
1994: | ))
|
1995: | ) {
|
1996: |
|
1997: | $this->insertText($token['data']);
|
1998: |
|
1999: |
|
2000: | } elseif ($token['type'] === HTML5::COMMENT) {
|
2001: | |
2002: |
|
2003: | $this->insertComment($token['data']);
|
2004: |
|
2005: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
2006: | in_array($token['name'], array('title', 'style', 'script'))
|
2007: | ) {
|
2008: | array_pop($this->stack);
|
2009: | return HTML5::PCDATA;
|
2010: |
|
2011: |
|
2012: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
|
2013: | |
2014: | |
2015: |
|
2016: | if ($this->head_pointer !== null) {
|
2017: | $element = $this->insertElement($token, false);
|
2018: | $this->head_pointer->appendChild($element);
|
2019: |
|
2020: | } else {
|
2021: | $element = $this->insertElement($token);
|
2022: | }
|
2023: |
|
2024: |
|
2025: | return HTML5::RCDATA;
|
2026: |
|
2027: |
|
2028: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
|
2029: | |
2030: | |
2031: |
|
2032: | if ($this->head_pointer !== null) {
|
2033: | $element = $this->insertElement($token, false);
|
2034: | $this->head_pointer->appendChild($element);
|
2035: |
|
2036: | } else {
|
2037: | $this->insertElement($token);
|
2038: | }
|
2039: |
|
2040: |
|
2041: | return HTML5::CDATA;
|
2042: |
|
2043: |
|
2044: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
|
2045: |
|
2046: | $element = $this->insertElement($token, false);
|
2047: | $this->head_pointer->appendChild($element);
|
2048: |
|
2049: |
|
2050: | return HTML5::CDATA;
|
2051: |
|
2052: |
|
2053: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
2054: | $token['name'],
|
2055: | array('base', 'link', 'meta')
|
2056: | )
|
2057: | ) {
|
2058: | |
2059: | |
2060: |
|
2061: | if ($this->head_pointer !== null) {
|
2062: | $element = $this->insertElement($token, false);
|
2063: | $this->head_pointer->appendChild($element);
|
2064: | array_pop($this->stack);
|
2065: |
|
2066: | } else {
|
2067: | $this->insertElement($token);
|
2068: | }
|
2069: |
|
2070: |
|
2071: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
|
2072: | |
2073: |
|
2074: | if ($this->head_pointer->isSameNode(end($this->stack))) {
|
2075: | array_pop($this->stack);
|
2076: |
|
2077: |
|
2078: | } else {
|
2079: |
|
2080: | }
|
2081: |
|
2082: |
|
2083: | $this->mode = self::AFTER_HEAD;
|
2084: |
|
2085: |
|
2086: | } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
|
2087: | ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')
|
2088: | ) {
|
2089: |
|
2090: |
|
2091: |
|
2092: | } else {
|
2093: | |
2094: |
|
2095: | if ($this->head_pointer->isSameNode(end($this->stack))) {
|
2096: | $this->inHead(
|
2097: | array(
|
2098: | 'name' => 'head',
|
2099: | 'type' => HTML5::ENDTAG
|
2100: | )
|
2101: | );
|
2102: |
|
2103: |
|
2104: | } else {
|
2105: | $this->mode = self::AFTER_HEAD;
|
2106: | }
|
2107: |
|
2108: |
|
2109: | return $this->afterHead($token);
|
2110: | }
|
2111: | }
|
2112: |
|
2113: | private function afterHead($token)
|
2114: | {
|
2115: |
|
2116: |
|
2117: | |
2118: | |
2119: |
|
2120: | if ($token['type'] === HTML5::CHARACTR &&
|
2121: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
2122: | ) {
|
2123: |
|
2124: | $this->insertText($token['data']);
|
2125: |
|
2126: |
|
2127: | } elseif ($token['type'] === HTML5::COMMENT) {
|
2128: | |
2129: |
|
2130: | $this->insertComment($token['data']);
|
2131: |
|
2132: |
|
2133: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
|
2134: |
|
2135: | $this->insertElement($token);
|
2136: |
|
2137: |
|
2138: | $this->mode = self::IN_BODY;
|
2139: |
|
2140: |
|
2141: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
|
2142: |
|
2143: | $this->insertElement($token);
|
2144: |
|
2145: |
|
2146: | $this->mode = self::IN_FRAME;
|
2147: |
|
2148: | |
2149: |
|
2150: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
2151: | $token['name'],
|
2152: | array('base', 'link', 'meta', 'script', 'style', 'title')
|
2153: | )
|
2154: | ) {
|
2155: | |
2156: |
|
2157: | $this->mode = self::IN_HEAD;
|
2158: | return $this->inHead($token);
|
2159: |
|
2160: |
|
2161: | } else {
|
2162: | |
2163: |
|
2164: | $this->afterHead(
|
2165: | array(
|
2166: | 'name' => 'body',
|
2167: | 'type' => HTML5::STARTTAG,
|
2168: | 'attr' => array()
|
2169: | )
|
2170: | );
|
2171: |
|
2172: | return $this->inBody($token);
|
2173: | }
|
2174: | }
|
2175: |
|
2176: | private function inBody($token)
|
2177: | {
|
2178: |
|
2179: |
|
2180: | switch ($token['type']) {
|
2181: |
|
2182: | case HTML5::CHARACTR:
|
2183: |
|
2184: | $this->reconstructActiveFormattingElements();
|
2185: |
|
2186: |
|
2187: | $this->insertText($token['data']);
|
2188: | break;
|
2189: |
|
2190: |
|
2191: | case HTML5::COMMENT:
|
2192: | |
2193: |
|
2194: | $this->insertComment($token['data']);
|
2195: | break;
|
2196: |
|
2197: | case HTML5::STARTTAG:
|
2198: | switch ($token['name']) {
|
2199: | |
2200: |
|
2201: | case 'script':
|
2202: | case 'style':
|
2203: | |
2204: |
|
2205: | return $this->inHead($token);
|
2206: | break;
|
2207: |
|
2208: | |
2209: |
|
2210: | case 'base':
|
2211: | case 'link':
|
2212: | case 'meta':
|
2213: | case 'title':
|
2214: | |
2215: |
|
2216: | return $this->inHead($token);
|
2217: | break;
|
2218: |
|
2219: |
|
2220: | case 'body':
|
2221: | |
2222: | |
2223: | |
2224: |
|
2225: | if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
|
2226: |
|
2227: |
|
2228: | |
2229: | |
2230: | |
2231: | |
2232: |
|
2233: | } else {
|
2234: | foreach ($token['attr'] as $attr) {
|
2235: | if (!$this->stack[1]->hasAttribute($attr['name'])) {
|
2236: | $this->stack[1]->setAttribute($attr['name'], $attr['value']);
|
2237: | }
|
2238: | }
|
2239: | }
|
2240: | break;
|
2241: |
|
2242: | |
2243: | |
2244: |
|
2245: | case 'address':
|
2246: | case 'blockquote':
|
2247: | case 'center':
|
2248: | case 'dir':
|
2249: | case 'div':
|
2250: | case 'dl':
|
2251: | case 'fieldset':
|
2252: | case 'listing':
|
2253: | case 'menu':
|
2254: | case 'ol':
|
2255: | case 'p':
|
2256: | case 'ul':
|
2257: | |
2258: | |
2259: |
|
2260: | if ($this->elementInScope('p')) {
|
2261: | $this->emitToken(
|
2262: | array(
|
2263: | 'name' => 'p',
|
2264: | 'type' => HTML5::ENDTAG
|
2265: | )
|
2266: | );
|
2267: | }
|
2268: |
|
2269: |
|
2270: | $this->insertElement($token);
|
2271: | break;
|
2272: |
|
2273: |
|
2274: | case 'form':
|
2275: | |
2276: |
|
2277: | if ($this->form_pointer !== null) {
|
2278: |
|
2279: |
|
2280: |
|
2281: | } else {
|
2282: | |
2283: | |
2284: |
|
2285: | if ($this->elementInScope('p')) {
|
2286: | $this->emitToken(
|
2287: | array(
|
2288: | 'name' => 'p',
|
2289: | 'type' => HTML5::ENDTAG
|
2290: | )
|
2291: | );
|
2292: | }
|
2293: |
|
2294: | |
2295: |
|
2296: | $element = $this->insertElement($token);
|
2297: | $this->form_pointer = $element;
|
2298: | }
|
2299: | break;
|
2300: |
|
2301: |
|
2302: | case 'li':
|
2303: | case 'dd':
|
2304: | case 'dt':
|
2305: | |
2306: | |
2307: |
|
2308: | if ($this->elementInScope('p')) {
|
2309: | $this->emitToken(
|
2310: | array(
|
2311: | 'name' => 'p',
|
2312: | 'type' => HTML5::ENDTAG
|
2313: | )
|
2314: | );
|
2315: | }
|
2316: |
|
2317: | $stack_length = count($this->stack) - 1;
|
2318: |
|
2319: | for ($n = $stack_length; 0 <= $n; $n--) {
|
2320: | |
2321: |
|
2322: | $stop = false;
|
2323: | $node = $this->stack[$n];
|
2324: | $cat = $this->getElementCategory($node->tagName);
|
2325: |
|
2326: | |
2327: | |
2328: |
|
2329: | if ($token['name'] === $node->tagName || ($token['name'] !== 'li'
|
2330: | && ($node->tagName === 'dd' || $node->tagName === 'dt'))
|
2331: | ) {
|
2332: | for ($x = $stack_length; $x >= $n; $x--) {
|
2333: | array_pop($this->stack);
|
2334: | }
|
2335: |
|
2336: | break;
|
2337: | }
|
2338: |
|
2339: | |
2340: | |
2341: |
|
2342: | if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
|
2343: | $node->tagName !== 'address' && $node->tagName !== 'div'
|
2344: | ) {
|
2345: | break;
|
2346: | }
|
2347: | }
|
2348: |
|
2349: | |
2350: |
|
2351: | $this->insertElement($token);
|
2352: | break;
|
2353: |
|
2354: |
|
2355: | case 'plaintext':
|
2356: | |
2357: | |
2358: |
|
2359: | if ($this->elementInScope('p')) {
|
2360: | $this->emitToken(
|
2361: | array(
|
2362: | 'name' => 'p',
|
2363: | 'type' => HTML5::ENDTAG
|
2364: | )
|
2365: | );
|
2366: | }
|
2367: |
|
2368: |
|
2369: | $this->insertElement($token);
|
2370: |
|
2371: | return HTML5::PLAINTEXT;
|
2372: | break;
|
2373: |
|
2374: | |
2375: |
|
2376: | case 'h1':
|
2377: | case 'h2':
|
2378: | case 'h3':
|
2379: | case 'h4':
|
2380: | case 'h5':
|
2381: | case 'h6':
|
2382: | |
2383: |
|
2384: | if ($this->elementInScope('p')) {
|
2385: | $this->emitToken(
|
2386: | array(
|
2387: | 'name' => 'p',
|
2388: | 'type' => HTML5::ENDTAG
|
2389: | )
|
2390: | );
|
2391: | }
|
2392: |
|
2393: | |
2394: | |
2395: | |
2396: | |
2397: |
|
2398: | while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
|
2399: | array_pop($this->stack);
|
2400: | }
|
2401: |
|
2402: |
|
2403: | $this->insertElement($token);
|
2404: | break;
|
2405: |
|
2406: |
|
2407: | case 'a':
|
2408: | |
2409: | |
2410: | |
2411: | |
2412: | |
2413: | |
2414: | |
2415: | |
2416: |
|
2417: | $leng = count($this->a_formatting);
|
2418: |
|
2419: | for ($n = $leng - 1; $n >= 0; $n--) {
|
2420: | if ($this->a_formatting[$n] === self::MARKER) {
|
2421: | break;
|
2422: |
|
2423: | } elseif ($this->a_formatting[$n]->nodeName === 'a') {
|
2424: | $this->emitToken(
|
2425: | array(
|
2426: | 'name' => 'a',
|
2427: | 'type' => HTML5::ENDTAG
|
2428: | )
|
2429: | );
|
2430: | break;
|
2431: | }
|
2432: | }
|
2433: |
|
2434: |
|
2435: | $this->reconstructActiveFormattingElements();
|
2436: |
|
2437: |
|
2438: | $el = $this->insertElement($token);
|
2439: |
|
2440: | |
2441: |
|
2442: | $this->a_formatting[] = $el;
|
2443: | break;
|
2444: |
|
2445: | |
2446: |
|
2447: | case 'b':
|
2448: | case 'big':
|
2449: | case 'em':
|
2450: | case 'font':
|
2451: | case 'i':
|
2452: | case 'nobr':
|
2453: | case 's':
|
2454: | case 'small':
|
2455: | case 'strike':
|
2456: | case 'strong':
|
2457: | case 'tt':
|
2458: | case 'u':
|
2459: |
|
2460: | $this->reconstructActiveFormattingElements();
|
2461: |
|
2462: |
|
2463: | $el = $this->insertElement($token);
|
2464: |
|
2465: | |
2466: |
|
2467: | $this->a_formatting[] = $el;
|
2468: | break;
|
2469: |
|
2470: |
|
2471: | case 'button':
|
2472: | |
2473: | |
2474: | |
2475: |
|
2476: | if ($this->elementInScope('button')) {
|
2477: | $this->inBody(
|
2478: | array(
|
2479: | 'name' => 'button',
|
2480: | 'type' => HTML5::ENDTAG
|
2481: | )
|
2482: | );
|
2483: | }
|
2484: |
|
2485: |
|
2486: | $this->reconstructActiveFormattingElements();
|
2487: |
|
2488: |
|
2489: | $this->insertElement($token);
|
2490: |
|
2491: | |
2492: |
|
2493: | $this->a_formatting[] = self::MARKER;
|
2494: | break;
|
2495: |
|
2496: |
|
2497: | case 'marquee':
|
2498: | case 'object':
|
2499: |
|
2500: | $this->reconstructActiveFormattingElements();
|
2501: |
|
2502: |
|
2503: | $this->insertElement($token);
|
2504: |
|
2505: | |
2506: |
|
2507: | $this->a_formatting[] = self::MARKER;
|
2508: | break;
|
2509: |
|
2510: |
|
2511: | case 'xmp':
|
2512: |
|
2513: | $this->reconstructActiveFormattingElements();
|
2514: |
|
2515: |
|
2516: | $this->insertElement($token);
|
2517: |
|
2518: |
|
2519: | return HTML5::CDATA;
|
2520: | break;
|
2521: |
|
2522: |
|
2523: | case 'table':
|
2524: | |
2525: |
|
2526: | if ($this->elementInScope('p')) {
|
2527: | $this->emitToken(
|
2528: | array(
|
2529: | 'name' => 'p',
|
2530: | 'type' => HTML5::ENDTAG
|
2531: | )
|
2532: | );
|
2533: | }
|
2534: |
|
2535: |
|
2536: | $this->insertElement($token);
|
2537: |
|
2538: |
|
2539: | $this->mode = self::IN_TABLE;
|
2540: | break;
|
2541: |
|
2542: | |
2543: |
|
2544: | case 'area':
|
2545: | case 'basefont':
|
2546: | case 'bgsound':
|
2547: | case 'br':
|
2548: | case 'embed':
|
2549: | case 'img':
|
2550: | case 'param':
|
2551: | case 'spacer':
|
2552: | case 'wbr':
|
2553: |
|
2554: | $this->reconstructActiveFormattingElements();
|
2555: |
|
2556: |
|
2557: | $this->insertElement($token);
|
2558: |
|
2559: |
|
2560: | array_pop($this->stack);
|
2561: | break;
|
2562: |
|
2563: |
|
2564: | case 'hr':
|
2565: | |
2566: |
|
2567: | if ($this->elementInScope('p')) {
|
2568: | $this->emitToken(
|
2569: | array(
|
2570: | 'name' => 'p',
|
2571: | 'type' => HTML5::ENDTAG
|
2572: | )
|
2573: | );
|
2574: | }
|
2575: |
|
2576: |
|
2577: | $this->insertElement($token);
|
2578: |
|
2579: |
|
2580: | array_pop($this->stack);
|
2581: | break;
|
2582: |
|
2583: |
|
2584: | case 'image':
|
2585: | |
2586: |
|
2587: | $token['name'] = 'img';
|
2588: | return $this->inBody($token);
|
2589: | break;
|
2590: |
|
2591: |
|
2592: | case 'input':
|
2593: |
|
2594: | $this->reconstructActiveFormattingElements();
|
2595: |
|
2596: |
|
2597: | $element = $this->insertElement($token, false);
|
2598: |
|
2599: | |
2600: | |
2601: |
|
2602: | $this->form_pointer !== null
|
2603: | ? $this->form_pointer->appendChild($element)
|
2604: | : end($this->stack)->appendChild($element);
|
2605: |
|
2606: |
|
2607: | array_pop($this->stack);
|
2608: | break;
|
2609: |
|
2610: |
|
2611: | case 'isindex':
|
2612: |
|
2613: |
|
2614: |
|
2615: | |
2616: |
|
2617: | if ($this->form_pointer === null) {
|
2618: | |
2619: |
|
2620: | $this->inBody(
|
2621: | array(
|
2622: | 'name' => 'body',
|
2623: | 'type' => HTML5::STARTTAG,
|
2624: | 'attr' => array()
|
2625: | )
|
2626: | );
|
2627: |
|
2628: | |
2629: |
|
2630: | $this->inBody(
|
2631: | array(
|
2632: | 'name' => 'hr',
|
2633: | 'type' => HTML5::STARTTAG,
|
2634: | 'attr' => array()
|
2635: | )
|
2636: | );
|
2637: |
|
2638: | |
2639: |
|
2640: | $this->inBody(
|
2641: | array(
|
2642: | 'name' => 'p',
|
2643: | 'type' => HTML5::STARTTAG,
|
2644: | 'attr' => array()
|
2645: | )
|
2646: | );
|
2647: |
|
2648: | |
2649: |
|
2650: | $this->inBody(
|
2651: | array(
|
2652: | 'name' => 'label',
|
2653: | 'type' => HTML5::STARTTAG,
|
2654: | 'attr' => array()
|
2655: | )
|
2656: | );
|
2657: |
|
2658: |
|
2659: | $this->insertText(
|
2660: | 'This is a searchable index. ' .
|
2661: | 'Insert your search keywords here: '
|
2662: | );
|
2663: |
|
2664: | |
2665: | |
2666: | |
2667: |
|
2668: | $attr = $token['attr'];
|
2669: | $attr[] = array('name' => 'name', 'value' => 'isindex');
|
2670: |
|
2671: | $this->inBody(
|
2672: | array(
|
2673: | 'name' => 'input',
|
2674: | 'type' => HTML5::STARTTAG,
|
2675: | 'attr' => $attr
|
2676: | )
|
2677: | );
|
2678: |
|
2679: | |
2680: |
|
2681: | $this->insertText(
|
2682: | 'This is a searchable index. ' .
|
2683: | 'Insert your search keywords here: '
|
2684: | );
|
2685: |
|
2686: | |
2687: |
|
2688: | $this->inBody(
|
2689: | array(
|
2690: | 'name' => 'label',
|
2691: | 'type' => HTML5::ENDTAG
|
2692: | )
|
2693: | );
|
2694: |
|
2695: | |
2696: |
|
2697: | $this->inBody(
|
2698: | array(
|
2699: | 'name' => 'p',
|
2700: | 'type' => HTML5::ENDTAG
|
2701: | )
|
2702: | );
|
2703: |
|
2704: | |
2705: |
|
2706: | $this->inBody(
|
2707: | array(
|
2708: | 'name' => 'hr',
|
2709: | 'type' => HTML5::ENDTAG
|
2710: | )
|
2711: | );
|
2712: |
|
2713: | |
2714: |
|
2715: | $this->inBody(
|
2716: | array(
|
2717: | 'name' => 'form',
|
2718: | 'type' => HTML5::ENDTAG
|
2719: | )
|
2720: | );
|
2721: | }
|
2722: | break;
|
2723: |
|
2724: |
|
2725: | case 'textarea':
|
2726: | $this->insertElement($token);
|
2727: |
|
2728: | |
2729: |
|
2730: | return HTML5::RCDATA;
|
2731: | break;
|
2732: |
|
2733: | |
2734: |
|
2735: | case 'iframe':
|
2736: | case 'noembed':
|
2737: | case 'noframes':
|
2738: | $this->insertElement($token);
|
2739: |
|
2740: |
|
2741: | return HTML5::CDATA;
|
2742: | break;
|
2743: |
|
2744: |
|
2745: | case 'select':
|
2746: |
|
2747: | $this->reconstructActiveFormattingElements();
|
2748: |
|
2749: |
|
2750: | $this->insertElement($token);
|
2751: |
|
2752: |
|
2753: | $this->mode = self::IN_SELECT;
|
2754: | break;
|
2755: |
|
2756: | |
2757: | |
2758: |
|
2759: | case 'caption':
|
2760: | case 'col':
|
2761: | case 'colgroup':
|
2762: | case 'frame':
|
2763: | case 'frameset':
|
2764: | case 'head':
|
2765: | case 'option':
|
2766: | case 'optgroup':
|
2767: | case 'tbody':
|
2768: | case 'td':
|
2769: | case 'tfoot':
|
2770: | case 'th':
|
2771: | case 'thead':
|
2772: | case 'tr':
|
2773: |
|
2774: | break;
|
2775: |
|
2776: | |
2777: | |
2778: |
|
2779: | case 'event-source':
|
2780: | case 'section':
|
2781: | case 'nav':
|
2782: | case 'article':
|
2783: | case 'aside':
|
2784: | case 'header':
|
2785: | case 'footer':
|
2786: | case 'datagrid':
|
2787: | case 'command':
|
2788: |
|
2789: | break;
|
2790: |
|
2791: |
|
2792: | default:
|
2793: |
|
2794: | $this->reconstructActiveFormattingElements();
|
2795: |
|
2796: | $this->insertElement($token, true, true);
|
2797: | break;
|
2798: | }
|
2799: | break;
|
2800: |
|
2801: | case HTML5::ENDTAG:
|
2802: | switch ($token['name']) {
|
2803: |
|
2804: | case 'body':
|
2805: | |
2806: | |
2807: |
|
2808: | if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
|
2809: |
|
2810: |
|
2811: | |
2812: |
|
2813: | } elseif (end($this->stack)->nodeName !== 'body') {
|
2814: |
|
2815: | }
|
2816: |
|
2817: |
|
2818: | $this->mode = self::AFTER_BODY;
|
2819: | break;
|
2820: |
|
2821: |
|
2822: | case 'html':
|
2823: | |
2824: | |
2825: |
|
2826: | $this->inBody(
|
2827: | array(
|
2828: | 'name' => 'body',
|
2829: | 'type' => HTML5::ENDTAG
|
2830: | )
|
2831: | );
|
2832: |
|
2833: | return $this->afterBody($token);
|
2834: | break;
|
2835: |
|
2836: | |
2837: | |
2838: |
|
2839: | case 'address':
|
2840: | case 'blockquote':
|
2841: | case 'center':
|
2842: | case 'dir':
|
2843: | case 'div':
|
2844: | case 'dl':
|
2845: | case 'fieldset':
|
2846: | case 'listing':
|
2847: | case 'menu':
|
2848: | case 'ol':
|
2849: | case 'pre':
|
2850: | case 'ul':
|
2851: | |
2852: | |
2853: |
|
2854: | if ($this->elementInScope($token['name'])) {
|
2855: | $this->generateImpliedEndTags();
|
2856: |
|
2857: | |
2858: | |
2859: |
|
2860: |
|
2861: |
|
2862: | |
2863: | |
2864: | |
2865: |
|
2866: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
2867: | if ($this->stack[$n]->nodeName === $token['name']) {
|
2868: | $n = -1;
|
2869: | }
|
2870: |
|
2871: | array_pop($this->stack);
|
2872: | }
|
2873: | }
|
2874: | break;
|
2875: |
|
2876: |
|
2877: | case 'form':
|
2878: | |
2879: | |
2880: |
|
2881: | if ($this->elementInScope($token['name'])) {
|
2882: | $this->generateImpliedEndTags();
|
2883: |
|
2884: | }
|
2885: |
|
2886: | if (end($this->stack)->nodeName !== $token['name']) {
|
2887: | |
2888: | |
2889: |
|
2890: |
|
2891: |
|
2892: | } else {
|
2893: | |
2894: | |
2895: |
|
2896: | array_pop($this->stack);
|
2897: | }
|
2898: |
|
2899: |
|
2900: | $this->form_pointer = null;
|
2901: | break;
|
2902: |
|
2903: |
|
2904: | case 'p':
|
2905: | |
2906: |
|
2907: | if ($this->elementInScope('p')) {
|
2908: | $this->generateImpliedEndTags(array('p'));
|
2909: |
|
2910: | |
2911: |
|
2912: |
|
2913: |
|
2914: | |
2915: | |
2916: |
|
2917: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
2918: | if ($this->elementInScope('p')) {
|
2919: | array_pop($this->stack);
|
2920: |
|
2921: | } else {
|
2922: | break;
|
2923: | }
|
2924: | }
|
2925: | }
|
2926: | break;
|
2927: |
|
2928: |
|
2929: | case 'dd':
|
2930: | case 'dt':
|
2931: | case 'li':
|
2932: | |
2933: | |
2934: | |
2935: |
|
2936: | if ($this->elementInScope($token['name'])) {
|
2937: | $this->generateImpliedEndTags(array($token['name']));
|
2938: |
|
2939: | |
2940: |
|
2941: |
|
2942: |
|
2943: | |
2944: | |
2945: | |
2946: |
|
2947: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
2948: | if ($this->stack[$n]->nodeName === $token['name']) {
|
2949: | $n = -1;
|
2950: | }
|
2951: |
|
2952: | array_pop($this->stack);
|
2953: | }
|
2954: | }
|
2955: | break;
|
2956: |
|
2957: | |
2958: |
|
2959: | case 'h1':
|
2960: | case 'h2':
|
2961: | case 'h3':
|
2962: | case 'h4':
|
2963: | case 'h5':
|
2964: | case 'h6':
|
2965: | $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
|
2966: |
|
2967: | |
2968: | |
2969: |
|
2970: | if ($this->elementInScope($elements)) {
|
2971: | $this->generateImpliedEndTags();
|
2972: |
|
2973: | |
2974: |
|
2975: |
|
2976: |
|
2977: | |
2978: | |
2979: | |
2980: |
|
2981: | while ($this->elementInScope($elements)) {
|
2982: | array_pop($this->stack);
|
2983: | }
|
2984: | }
|
2985: | break;
|
2986: |
|
2987: | |
2988: |
|
2989: | case 'a':
|
2990: | case 'b':
|
2991: | case 'big':
|
2992: | case 'em':
|
2993: | case 'font':
|
2994: | case 'i':
|
2995: | case 'nobr':
|
2996: | case 's':
|
2997: | case 'small':
|
2998: | case 'strike':
|
2999: | case 'strong':
|
3000: | case 'tt':
|
3001: | case 'u':
|
3002: | |
3003: | |
3004: | |
3005: | |
3006: | |
3007: | |
3008: |
|
3009: | while (true) {
|
3010: | for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
|
3011: | if ($this->a_formatting[$a] === self::MARKER) {
|
3012: | break;
|
3013: |
|
3014: | } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
|
3015: | $formatting_element = $this->a_formatting[$a];
|
3016: | $in_stack = in_array($formatting_element, $this->stack, true);
|
3017: | $fe_af_pos = $a;
|
3018: | break;
|
3019: | }
|
3020: | }
|
3021: |
|
3022: | |
3023: | |
3024: | |
3025: |
|
3026: | if (!isset($formatting_element) || ($in_stack &&
|
3027: | !$this->elementInScope($token['name']))
|
3028: | ) {
|
3029: | break;
|
3030: |
|
3031: | |
3032: | |
3033: | |
3034: |
|
3035: | } elseif (isset($formatting_element) && !$in_stack) {
|
3036: | unset($this->a_formatting[$fe_af_pos]);
|
3037: | $this->a_formatting = array_merge($this->a_formatting);
|
3038: | break;
|
3039: | }
|
3040: |
|
3041: | |
3042: | |
3043: | |
3044: | |
3045: |
|
3046: | $fe_s_pos = array_search($formatting_element, $this->stack, true);
|
3047: | $length = count($this->stack);
|
3048: |
|
3049: | for ($s = $fe_s_pos + 1; $s < $length; $s++) {
|
3050: | $category = $this->getElementCategory($this->stack[$s]->nodeName);
|
3051: |
|
3052: | if ($category !== self::PHRASING && $category !== self::FORMATTING) {
|
3053: | $furthest_block = $this->stack[$s];
|
3054: | }
|
3055: | }
|
3056: |
|
3057: | |
3058: | |
3059: | |
3060: | |
3061: | |
3062: |
|
3063: | if (!isset($furthest_block)) {
|
3064: | for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
|
3065: | array_pop($this->stack);
|
3066: | }
|
3067: |
|
3068: | unset($this->a_formatting[$fe_af_pos]);
|
3069: | $this->a_formatting = array_merge($this->a_formatting);
|
3070: | break;
|
3071: | }
|
3072: |
|
3073: | |
3074: | |
3075: |
|
3076: | $common_ancestor = $this->stack[$fe_s_pos - 1];
|
3077: |
|
3078: | |
3079: |
|
3080: | if ($furthest_block->parentNode !== null) {
|
3081: | $furthest_block->parentNode->removeChild($furthest_block);
|
3082: | }
|
3083: |
|
3084: | |
3085: | |
3086: | |
3087: |
|
3088: | $bookmark = $fe_af_pos;
|
3089: |
|
3090: | |
3091: |
|
3092: | $node = $furthest_block;
|
3093: | $last_node = $furthest_block;
|
3094: |
|
3095: | while (true) {
|
3096: | for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
|
3097: | |
3098: |
|
3099: | $node = $this->stack[$n];
|
3100: |
|
3101: | |
3102: | |
3103: | |
3104: |
|
3105: | if (!in_array($node, $this->a_formatting, true)) {
|
3106: | unset($this->stack[$n]);
|
3107: | $this->stack = array_merge($this->stack);
|
3108: |
|
3109: | } else {
|
3110: | break;
|
3111: | }
|
3112: | }
|
3113: |
|
3114: | |
3115: | |
3116: |
|
3117: | if ($node === $formatting_element) {
|
3118: | break;
|
3119: |
|
3120: | |
3121: | |
3122: | |
3123: |
|
3124: | } elseif ($last_node === $furthest_block) {
|
3125: | $bookmark = array_search($node, $this->a_formatting, true) + 1;
|
3126: | }
|
3127: |
|
3128: | |
3129: | |
3130: | |
3131: | |
3132: | |
3133: |
|
3134: | if ($node->hasChildNodes()) {
|
3135: | $clone = $node->cloneNode();
|
3136: | $s_pos = array_search($node, $this->stack, true);
|
3137: | $a_pos = array_search($node, $this->a_formatting, true);
|
3138: |
|
3139: | $this->stack[$s_pos] = $clone;
|
3140: | $this->a_formatting[$a_pos] = $clone;
|
3141: | $node = $clone;
|
3142: | }
|
3143: |
|
3144: | |
3145: |
|
3146: | if ($last_node->parentNode !== null) {
|
3147: | $last_node->parentNode->removeChild($last_node);
|
3148: | }
|
3149: |
|
3150: | $node->appendChild($last_node);
|
3151: |
|
3152: |
|
3153: | $last_node = $node;
|
3154: | }
|
3155: |
|
3156: | |
3157: | |
3158: | |
3159: |
|
3160: | if ($last_node->parentNode !== null) {
|
3161: | $last_node->parentNode->removeChild($last_node);
|
3162: | }
|
3163: |
|
3164: | $common_ancestor->appendChild($last_node);
|
3165: |
|
3166: | |
3167: |
|
3168: | $clone = $formatting_element->cloneNode();
|
3169: |
|
3170: | |
3171: | |
3172: |
|
3173: | while ($furthest_block->hasChildNodes()) {
|
3174: | $child = $furthest_block->firstChild;
|
3175: | $furthest_block->removeChild($child);
|
3176: | $clone->appendChild($child);
|
3177: | }
|
3178: |
|
3179: |
|
3180: | $furthest_block->appendChild($clone);
|
3181: |
|
3182: | |
3183: | |
3184: | |
3185: |
|
3186: | $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
|
3187: | unset($this->a_formatting[$fe_af_pos]);
|
3188: | $this->a_formatting = array_merge($this->a_formatting);
|
3189: |
|
3190: | $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
|
3191: | $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
|
3192: | $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
|
3193: |
|
3194: | |
3195: | |
3196: | |
3197: | |
3198: |
|
3199: | $fe_s_pos = array_search($formatting_element, $this->stack, true);
|
3200: | $fb_s_pos = array_search($furthest_block, $this->stack, true);
|
3201: | unset($this->stack[$fe_s_pos]);
|
3202: |
|
3203: | $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
|
3204: | $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
|
3205: | $this->stack = array_merge($s_part1, array($clone), $s_part2);
|
3206: |
|
3207: |
|
3208: | unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
|
3209: | }
|
3210: | break;
|
3211: |
|
3212: | |
3213: |
|
3214: | case 'button':
|
3215: | case 'marquee':
|
3216: | case 'object':
|
3217: | |
3218: | |
3219: |
|
3220: | if ($this->elementInScope($token['name'])) {
|
3221: | $this->generateImpliedEndTags();
|
3222: |
|
3223: | |
3224: |
|
3225: |
|
3226: |
|
3227: | |
3228: | |
3229: | |
3230: | |
3231: |
|
3232: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
3233: | if ($this->stack[$n]->nodeName === $token['name']) {
|
3234: | $n = -1;
|
3235: | }
|
3236: |
|
3237: | array_pop($this->stack);
|
3238: | }
|
3239: |
|
3240: | $marker = end(array_keys($this->a_formatting, self::MARKER, true));
|
3241: |
|
3242: | for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
|
3243: | array_pop($this->a_formatting);
|
3244: | }
|
3245: | }
|
3246: | break;
|
3247: |
|
3248: | |
3249: | |
3250: | |
3251: |
|
3252: | case 'area':
|
3253: | case 'basefont':
|
3254: | case 'bgsound':
|
3255: | case 'br':
|
3256: | case 'embed':
|
3257: | case 'hr':
|
3258: | case 'iframe':
|
3259: | case 'image':
|
3260: | case 'img':
|
3261: | case 'input':
|
3262: | case 'isindex':
|
3263: | case 'noembed':
|
3264: | case 'noframes':
|
3265: | case 'param':
|
3266: | case 'select':
|
3267: | case 'spacer':
|
3268: | case 'table':
|
3269: | case 'textarea':
|
3270: | case 'wbr':
|
3271: |
|
3272: | break;
|
3273: |
|
3274: |
|
3275: | default:
|
3276: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
3277: | |
3278: |
|
3279: | $node = end($this->stack);
|
3280: |
|
3281: | |
3282: |
|
3283: | if ($token['name'] === $node->nodeName) {
|
3284: |
|
3285: | $this->generateImpliedEndTags();
|
3286: |
|
3287: | |
3288: | |
3289: |
|
3290: |
|
3291: |
|
3292: | |
3293: |
|
3294: | for ($x = count($this->stack) - $n; $x >= $n; $x--) {
|
3295: | array_pop($this->stack);
|
3296: | }
|
3297: |
|
3298: | } else {
|
3299: | $category = $this->getElementCategory($node);
|
3300: |
|
3301: | if ($category !== self::SPECIAL && $category !== self::SCOPING) {
|
3302: | |
3303: | |
3304: | |
3305: |
|
3306: | return false;
|
3307: | }
|
3308: | }
|
3309: | }
|
3310: | break;
|
3311: | }
|
3312: | break;
|
3313: | }
|
3314: | }
|
3315: |
|
3316: | private function inTable($token)
|
3317: | {
|
3318: | $clear = array('html', 'table');
|
3319: |
|
3320: | |
3321: | |
3322: |
|
3323: | if ($token['type'] === HTML5::CHARACTR &&
|
3324: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
3325: | ) {
|
3326: |
|
3327: | $text = $this->dom->createTextNode($token['data']);
|
3328: | end($this->stack)->appendChild($text);
|
3329: |
|
3330: |
|
3331: | } elseif ($token['type'] === HTML5::COMMENT) {
|
3332: | |
3333: |
|
3334: | $comment = $this->dom->createComment($token['data']);
|
3335: | end($this->stack)->appendChild($comment);
|
3336: |
|
3337: |
|
3338: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
3339: | $token['name'] === 'caption'
|
3340: | ) {
|
3341: |
|
3342: | $this->clearStackToTableContext($clear);
|
3343: |
|
3344: | |
3345: |
|
3346: | $this->a_formatting[] = self::MARKER;
|
3347: |
|
3348: | |
3349: |
|
3350: | $this->insertElement($token);
|
3351: | $this->mode = self::IN_CAPTION;
|
3352: |
|
3353: |
|
3354: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
3355: | $token['name'] === 'colgroup'
|
3356: | ) {
|
3357: |
|
3358: | $this->clearStackToTableContext($clear);
|
3359: |
|
3360: | |
3361: |
|
3362: | $this->insertElement($token);
|
3363: | $this->mode = self::IN_CGROUP;
|
3364: |
|
3365: |
|
3366: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
3367: | $token['name'] === 'col'
|
3368: | ) {
|
3369: | $this->inTable(
|
3370: | array(
|
3371: | 'name' => 'colgroup',
|
3372: | 'type' => HTML5::STARTTAG,
|
3373: | 'attr' => array()
|
3374: | )
|
3375: | );
|
3376: |
|
3377: | $this->inColumnGroup($token);
|
3378: |
|
3379: |
|
3380: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
3381: | $token['name'],
|
3382: | array('tbody', 'tfoot', 'thead')
|
3383: | )
|
3384: | ) {
|
3385: |
|
3386: | $this->clearStackToTableContext($clear);
|
3387: |
|
3388: | |
3389: |
|
3390: | $this->insertElement($token);
|
3391: | $this->mode = self::IN_TBODY;
|
3392: |
|
3393: |
|
3394: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
3395: | in_array($token['name'], array('td', 'th', 'tr'))
|
3396: | ) {
|
3397: | |
3398: |
|
3399: | $this->inTable(
|
3400: | array(
|
3401: | 'name' => 'tbody',
|
3402: | 'type' => HTML5::STARTTAG,
|
3403: | 'attr' => array()
|
3404: | )
|
3405: | );
|
3406: |
|
3407: | return $this->inTableBody($token);
|
3408: |
|
3409: |
|
3410: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
3411: | $token['name'] === 'table'
|
3412: | ) {
|
3413: | |
3414: | |
3415: |
|
3416: | $this->inTable(
|
3417: | array(
|
3418: | 'name' => 'table',
|
3419: | 'type' => HTML5::ENDTAG
|
3420: | )
|
3421: | );
|
3422: |
|
3423: | return $this->mainPhase($token);
|
3424: |
|
3425: |
|
3426: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
3427: | $token['name'] === 'table'
|
3428: | ) {
|
3429: | |
3430: | |
3431: |
|
3432: | if (!$this->elementInScope($token['name'], true)) {
|
3433: | return false;
|
3434: |
|
3435: |
|
3436: | } else {
|
3437: |
|
3438: | $this->generateImpliedEndTags();
|
3439: |
|
3440: | |
3441: |
|
3442: |
|
3443: |
|
3444: | |
3445: |
|
3446: | while (true) {
|
3447: | $current = end($this->stack)->nodeName;
|
3448: | array_pop($this->stack);
|
3449: |
|
3450: | if ($current === 'table') {
|
3451: | break;
|
3452: | }
|
3453: | }
|
3454: |
|
3455: |
|
3456: | $this->resetInsertionMode();
|
3457: | }
|
3458: |
|
3459: | |
3460: |
|
3461: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
3462: | $token['name'],
|
3463: | array(
|
3464: | 'body',
|
3465: | 'caption',
|
3466: | 'col',
|
3467: | 'colgroup',
|
3468: | 'html',
|
3469: | 'tbody',
|
3470: | 'td',
|
3471: | 'tfoot',
|
3472: | 'th',
|
3473: | 'thead',
|
3474: | 'tr'
|
3475: | )
|
3476: | )
|
3477: | ) {
|
3478: |
|
3479: |
|
3480: |
|
3481: | } else {
|
3482: | |
3483: |
|
3484: |
|
3485: | |
3486: | |
3487: |
|
3488: | if (in_array(
|
3489: | end($this->stack)->nodeName,
|
3490: | array('table', 'tbody', 'tfoot', 'thead', 'tr')
|
3491: | )
|
3492: | ) {
|
3493: | |
3494: | |
3495: | |
3496: | |
3497: | |
3498: | |
3499: | |
3500: | |
3501: | |
3502: | |
3503: |
|
3504: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
3505: | if ($this->stack[$n]->nodeName === 'table') {
|
3506: | $table = $this->stack[$n];
|
3507: | break;
|
3508: | }
|
3509: | }
|
3510: |
|
3511: | if (isset($table) && $table->parentNode !== null) {
|
3512: | $this->foster_parent = $table->parentNode;
|
3513: |
|
3514: | } elseif (!isset($table)) {
|
3515: | $this->foster_parent = $this->stack[0];
|
3516: |
|
3517: | } elseif (isset($table) && ($table->parentNode === null ||
|
3518: | $table->parentNode->nodeType !== XML_ELEMENT_NODE)
|
3519: | ) {
|
3520: | $this->foster_parent = $this->stack[$n - 1];
|
3521: | }
|
3522: | }
|
3523: |
|
3524: | $this->inBody($token);
|
3525: | }
|
3526: | }
|
3527: |
|
3528: | private function inCaption($token)
|
3529: | {
|
3530: |
|
3531: | if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
|
3532: | |
3533: | |
3534: |
|
3535: | if (!$this->elementInScope($token['name'], true)) {
|
3536: |
|
3537: |
|
3538: |
|
3539: | } else {
|
3540: |
|
3541: | $this->generateImpliedEndTags();
|
3542: |
|
3543: | |
3544: |
|
3545: |
|
3546: |
|
3547: | |
3548: |
|
3549: | while (true) {
|
3550: | $node = end($this->stack)->nodeName;
|
3551: | array_pop($this->stack);
|
3552: |
|
3553: | if ($node === 'caption') {
|
3554: | break;
|
3555: | }
|
3556: | }
|
3557: |
|
3558: | |
3559: |
|
3560: | $this->clearTheActiveFormattingElementsUpToTheLastMarker();
|
3561: |
|
3562: |
|
3563: | $this->mode = self::IN_TABLE;
|
3564: | }
|
3565: |
|
3566: | |
3567: | |
3568: |
|
3569: | } elseif (($token['type'] === HTML5::STARTTAG && in_array(
|
3570: | $token['name'],
|
3571: | array(
|
3572: | 'caption',
|
3573: | 'col',
|
3574: | 'colgroup',
|
3575: | 'tbody',
|
3576: | 'td',
|
3577: | 'tfoot',
|
3578: | 'th',
|
3579: | 'thead',
|
3580: | 'tr'
|
3581: | )
|
3582: | )) || ($token['type'] === HTML5::ENDTAG &&
|
3583: | $token['name'] === 'table')
|
3584: | ) {
|
3585: | |
3586: | |
3587: |
|
3588: | $this->inCaption(
|
3589: | array(
|
3590: | 'name' => 'caption',
|
3591: | 'type' => HTML5::ENDTAG
|
3592: | )
|
3593: | );
|
3594: |
|
3595: | return $this->inTable($token);
|
3596: |
|
3597: | |
3598: |
|
3599: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
3600: | $token['name'],
|
3601: | array(
|
3602: | 'body',
|
3603: | 'col',
|
3604: | 'colgroup',
|
3605: | 'html',
|
3606: | 'tbody',
|
3607: | 'tfoot',
|
3608: | 'th',
|
3609: | 'thead',
|
3610: | 'tr'
|
3611: | )
|
3612: | )
|
3613: | ) {
|
3614: |
|
3615: |
|
3616: |
|
3617: | } else {
|
3618: |
|
3619: | $this->inBody($token);
|
3620: | }
|
3621: | }
|
3622: |
|
3623: | private function inColumnGroup($token)
|
3624: | {
|
3625: | |
3626: | |
3627: |
|
3628: | if ($token['type'] === HTML5::CHARACTR &&
|
3629: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
3630: | ) {
|
3631: |
|
3632: | $text = $this->dom->createTextNode($token['data']);
|
3633: | end($this->stack)->appendChild($text);
|
3634: |
|
3635: |
|
3636: | } elseif ($token['type'] === HTML5::COMMENT) {
|
3637: | |
3638: |
|
3639: | $comment = $this->dom->createComment($token['data']);
|
3640: | end($this->stack)->appendChild($comment);
|
3641: |
|
3642: |
|
3643: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
|
3644: | |
3645: |
|
3646: | $this->insertElement($token);
|
3647: | array_pop($this->stack);
|
3648: |
|
3649: |
|
3650: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
3651: | $token['name'] === 'colgroup'
|
3652: | ) {
|
3653: | |
3654: |
|
3655: | if (end($this->stack)->nodeName === 'html') {
|
3656: |
|
3657: |
|
3658: | |
3659: | |
3660: |
|
3661: | } else {
|
3662: | array_pop($this->stack);
|
3663: | $this->mode = self::IN_TABLE;
|
3664: | }
|
3665: |
|
3666: |
|
3667: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
|
3668: |
|
3669: |
|
3670: |
|
3671: | } else {
|
3672: | |
3673: |
|
3674: | $this->inColumnGroup(
|
3675: | array(
|
3676: | 'name' => 'colgroup',
|
3677: | 'type' => HTML5::ENDTAG
|
3678: | )
|
3679: | );
|
3680: |
|
3681: | return $this->inTable($token);
|
3682: | }
|
3683: | }
|
3684: |
|
3685: | private function inTableBody($token)
|
3686: | {
|
3687: | $clear = array('tbody', 'tfoot', 'thead', 'html');
|
3688: |
|
3689: |
|
3690: | if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
|
3691: |
|
3692: | $this->clearStackToTableContext($clear);
|
3693: |
|
3694: | |
3695: |
|
3696: | $this->insertElement($token);
|
3697: | $this->mode = self::IN_ROW;
|
3698: |
|
3699: |
|
3700: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
3701: | ($token['name'] === 'th' || $token['name'] === 'td')
|
3702: | ) {
|
3703: | |
3704: |
|
3705: | $this->inTableBody(
|
3706: | array(
|
3707: | 'name' => 'tr',
|
3708: | 'type' => HTML5::STARTTAG,
|
3709: | 'attr' => array()
|
3710: | )
|
3711: | );
|
3712: |
|
3713: | return $this->inRow($token);
|
3714: |
|
3715: |
|
3716: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
3717: | in_array($token['name'], array('tbody', 'tfoot', 'thead'))
|
3718: | ) {
|
3719: | |
3720: | |
3721: |
|
3722: | if (!$this->elementInScope($token['name'], true)) {
|
3723: |
|
3724: |
|
3725: |
|
3726: | } else {
|
3727: |
|
3728: | $this->clearStackToTableContext($clear);
|
3729: |
|
3730: | |
3731: |
|
3732: | array_pop($this->stack);
|
3733: | $this->mode = self::IN_TABLE;
|
3734: | }
|
3735: |
|
3736: | |
3737: |
|
3738: | } elseif (($token['type'] === HTML5::STARTTAG && in_array(
|
3739: | $token['name'],
|
3740: | array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')
|
3741: | )) ||
|
3742: | ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')
|
3743: | ) {
|
3744: | |
3745: | |
3746: |
|
3747: | if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
|
3748: |
|
3749: |
|
3750: |
|
3751: | } else {
|
3752: |
|
3753: | $this->clearStackToTableContext($clear);
|
3754: |
|
3755: | |
3756: | |
3757: |
|
3758: | $this->inTableBody(
|
3759: | array(
|
3760: | 'name' => end($this->stack)->nodeName,
|
3761: | 'type' => HTML5::ENDTAG
|
3762: | )
|
3763: | );
|
3764: |
|
3765: | return $this->mainPhase($token);
|
3766: | }
|
3767: |
|
3768: | |
3769: |
|
3770: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
3771: | $token['name'],
|
3772: | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
|
3773: | )
|
3774: | ) {
|
3775: |
|
3776: |
|
3777: |
|
3778: | } else {
|
3779: |
|
3780: | $this->inTable($token);
|
3781: | }
|
3782: | }
|
3783: |
|
3784: | private function inRow($token)
|
3785: | {
|
3786: | $clear = array('tr', 'html');
|
3787: |
|
3788: |
|
3789: | if ($token['type'] === HTML5::STARTTAG &&
|
3790: | ($token['name'] === 'th' || $token['name'] === 'td')
|
3791: | ) {
|
3792: |
|
3793: | $this->clearStackToTableContext($clear);
|
3794: |
|
3795: | |
3796: |
|
3797: | $this->insertElement($token);
|
3798: | $this->mode = self::IN_CELL;
|
3799: |
|
3800: | |
3801: |
|
3802: | $this->a_formatting[] = self::MARKER;
|
3803: |
|
3804: |
|
3805: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
|
3806: | |
3807: | |
3808: |
|
3809: | if (!$this->elementInScope($token['name'], true)) {
|
3810: |
|
3811: |
|
3812: |
|
3813: | } else {
|
3814: |
|
3815: | $this->clearStackToTableContext($clear);
|
3816: |
|
3817: | |
3818: | |
3819: |
|
3820: | array_pop($this->stack);
|
3821: | $this->mode = self::IN_TBODY;
|
3822: | }
|
3823: |
|
3824: | |
3825: |
|
3826: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
3827: | $token['name'],
|
3828: | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')
|
3829: | )
|
3830: | ) {
|
3831: | |
3832: |
|
3833: | $this->inRow(
|
3834: | array(
|
3835: | 'name' => 'tr',
|
3836: | 'type' => HTML5::ENDTAG
|
3837: | )
|
3838: | );
|
3839: |
|
3840: | return $this->inCell($token);
|
3841: |
|
3842: |
|
3843: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
3844: | in_array($token['name'], array('tbody', 'tfoot', 'thead'))
|
3845: | ) {
|
3846: | |
3847: | |
3848: |
|
3849: | if (!$this->elementInScope($token['name'], true)) {
|
3850: |
|
3851: |
|
3852: |
|
3853: | } else {
|
3854: | |
3855: |
|
3856: | $this->inRow(
|
3857: | array(
|
3858: | 'name' => 'tr',
|
3859: | 'type' => HTML5::ENDTAG
|
3860: | )
|
3861: | );
|
3862: |
|
3863: | return $this->inCell($token);
|
3864: | }
|
3865: |
|
3866: | |
3867: |
|
3868: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
3869: | $token['name'],
|
3870: | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
|
3871: | )
|
3872: | ) {
|
3873: |
|
3874: |
|
3875: |
|
3876: | } else {
|
3877: |
|
3878: | $this->inTable($token);
|
3879: | }
|
3880: | }
|
3881: |
|
3882: | private function inCell($token)
|
3883: | {
|
3884: |
|
3885: | if ($token['type'] === HTML5::ENDTAG &&
|
3886: | ($token['name'] === 'td' || $token['name'] === 'th')
|
3887: | ) {
|
3888: | |
3889: | |
3890: |
|
3891: | if (!$this->elementInScope($token['name'], true)) {
|
3892: |
|
3893: |
|
3894: |
|
3895: | } else {
|
3896: | |
3897: |
|
3898: | $this->generateImpliedEndTags(array($token['name']));
|
3899: |
|
3900: | |
3901: |
|
3902: |
|
3903: |
|
3904: | |
3905: |
|
3906: | while (true) {
|
3907: | $node = end($this->stack)->nodeName;
|
3908: | array_pop($this->stack);
|
3909: |
|
3910: | if ($node === $token['name']) {
|
3911: | break;
|
3912: | }
|
3913: | }
|
3914: |
|
3915: | |
3916: |
|
3917: | $this->clearTheActiveFormattingElementsUpToTheLastMarker();
|
3918: |
|
3919: | |
3920: |
|
3921: | $this->mode = self::IN_ROW;
|
3922: | }
|
3923: |
|
3924: | |
3925: |
|
3926: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
3927: | $token['name'],
|
3928: | array(
|
3929: | 'caption',
|
3930: | 'col',
|
3931: | 'colgroup',
|
3932: | 'tbody',
|
3933: | 'td',
|
3934: | 'tfoot',
|
3935: | 'th',
|
3936: | 'thead',
|
3937: | 'tr'
|
3938: | )
|
3939: | )
|
3940: | ) {
|
3941: | |
3942: | |
3943: |
|
3944: | if (!$this->elementInScope(array('td', 'th'), true)) {
|
3945: |
|
3946: |
|
3947: | |
3948: |
|
3949: | } else {
|
3950: | $this->closeCell();
|
3951: | return $this->inRow($token);
|
3952: | }
|
3953: |
|
3954: | |
3955: |
|
3956: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
3957: | $token['name'],
|
3958: | array(
|
3959: | 'caption',
|
3960: | 'col',
|
3961: | 'colgroup',
|
3962: | 'tbody',
|
3963: | 'td',
|
3964: | 'tfoot',
|
3965: | 'th',
|
3966: | 'thead',
|
3967: | 'tr'
|
3968: | )
|
3969: | )
|
3970: | ) {
|
3971: | |
3972: | |
3973: |
|
3974: | if (!$this->elementInScope(array('td', 'th'), true)) {
|
3975: |
|
3976: |
|
3977: | |
3978: |
|
3979: | } else {
|
3980: | $this->closeCell();
|
3981: | return $this->inRow($token);
|
3982: | }
|
3983: |
|
3984: | |
3985: |
|
3986: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
3987: | $token['name'],
|
3988: | array('body', 'caption', 'col', 'colgroup', 'html')
|
3989: | )
|
3990: | ) {
|
3991: |
|
3992: |
|
3993: | |
3994: |
|
3995: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
3996: | $token['name'],
|
3997: | array('table', 'tbody', 'tfoot', 'thead', 'tr')
|
3998: | )
|
3999: | ) {
|
4000: | |
4001: | |
4002: | |
4003: |
|
4004: | if (!$this->elementInScope($token['name'], true)) {
|
4005: |
|
4006: |
|
4007: | |
4008: |
|
4009: | } else {
|
4010: | $this->closeCell();
|
4011: | return $this->inRow($token);
|
4012: | }
|
4013: |
|
4014: |
|
4015: | } else {
|
4016: |
|
4017: | $this->inBody($token);
|
4018: | }
|
4019: | }
|
4020: |
|
4021: | private function inSelect($token)
|
4022: | {
|
4023: |
|
4024: |
|
4025: |
|
4026: | if ($token['type'] === HTML5::CHARACTR) {
|
4027: |
|
4028: | $this->insertText($token['data']);
|
4029: |
|
4030: |
|
4031: | } elseif ($token['type'] === HTML5::COMMENT) {
|
4032: | |
4033: |
|
4034: | $this->insertComment($token['data']);
|
4035: |
|
4036: |
|
4037: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
4038: | $token['name'] === 'option'
|
4039: | ) {
|
4040: | |
4041: |
|
4042: | if (end($this->stack)->nodeName === 'option') {
|
4043: | $this->inSelect(
|
4044: | array(
|
4045: | 'name' => 'option',
|
4046: | 'type' => HTML5::ENDTAG
|
4047: | )
|
4048: | );
|
4049: | }
|
4050: |
|
4051: |
|
4052: | $this->insertElement($token);
|
4053: |
|
4054: |
|
4055: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
4056: | $token['name'] === 'optgroup'
|
4057: | ) {
|
4058: | |
4059: |
|
4060: | if (end($this->stack)->nodeName === 'option') {
|
4061: | $this->inSelect(
|
4062: | array(
|
4063: | 'name' => 'option',
|
4064: | 'type' => HTML5::ENDTAG
|
4065: | )
|
4066: | );
|
4067: | }
|
4068: |
|
4069: | |
4070: |
|
4071: | if (end($this->stack)->nodeName === 'optgroup') {
|
4072: | $this->inSelect(
|
4073: | array(
|
4074: | 'name' => 'optgroup',
|
4075: | 'type' => HTML5::ENDTAG
|
4076: | )
|
4077: | );
|
4078: | }
|
4079: |
|
4080: |
|
4081: | $this->insertElement($token);
|
4082: |
|
4083: |
|
4084: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
4085: | $token['name'] === 'optgroup'
|
4086: | ) {
|
4087: | |
4088: | |
4089: | |
4090: |
|
4091: | $elements_in_stack = count($this->stack);
|
4092: |
|
4093: | if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
|
4094: | $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'
|
4095: | ) {
|
4096: | $this->inSelect(
|
4097: | array(
|
4098: | 'name' => 'option',
|
4099: | 'type' => HTML5::ENDTAG
|
4100: | )
|
4101: | );
|
4102: | }
|
4103: |
|
4104: | |
4105: | |
4106: |
|
4107: | if ($this->stack[$elements_in_stack - 1] === 'optgroup') {
|
4108: | array_pop($this->stack);
|
4109: | }
|
4110: |
|
4111: |
|
4112: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
4113: | $token['name'] === 'option'
|
4114: | ) {
|
4115: | |
4116: | |
4117: |
|
4118: | if (end($this->stack)->nodeName === 'option') {
|
4119: | array_pop($this->stack);
|
4120: | }
|
4121: |
|
4122: |
|
4123: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
4124: | $token['name'] === 'select'
|
4125: | ) {
|
4126: | |
4127: | |
4128: |
|
4129: | if (!$this->elementInScope($token['name'], true)) {
|
4130: |
|
4131: |
|
4132: |
|
4133: | } else {
|
4134: | |
4135: |
|
4136: | while (true) {
|
4137: | $current = end($this->stack)->nodeName;
|
4138: | array_pop($this->stack);
|
4139: |
|
4140: | if ($current === 'select') {
|
4141: | break;
|
4142: | }
|
4143: | }
|
4144: |
|
4145: |
|
4146: | $this->resetInsertionMode();
|
4147: | }
|
4148: |
|
4149: |
|
4150: | } elseif ($token['name'] === 'select' &&
|
4151: | $token['type'] === HTML5::STARTTAG
|
4152: | ) {
|
4153: | |
4154: |
|
4155: | $this->inSelect(
|
4156: | array(
|
4157: | 'name' => 'select',
|
4158: | 'type' => HTML5::ENDTAG
|
4159: | )
|
4160: | );
|
4161: |
|
4162: | |
4163: |
|
4164: | } elseif (in_array(
|
4165: | $token['name'],
|
4166: | array(
|
4167: | 'caption',
|
4168: | 'table',
|
4169: | 'tbody',
|
4170: | 'tfoot',
|
4171: | 'thead',
|
4172: | 'tr',
|
4173: | 'td',
|
4174: | 'th'
|
4175: | )
|
4176: | ) && $token['type'] === HTML5::ENDTAG
|
4177: | ) {
|
4178: |
|
4179: |
|
4180: |
|
4181: | |
4182: | |
4183: | |
4184: |
|
4185: | if ($this->elementInScope($token['name'], true)) {
|
4186: | $this->inSelect(
|
4187: | array(
|
4188: | 'name' => 'select',
|
4189: | 'type' => HTML5::ENDTAG
|
4190: | )
|
4191: | );
|
4192: |
|
4193: | $this->mainPhase($token);
|
4194: | }
|
4195: |
|
4196: |
|
4197: | } else {
|
4198: |
|
4199: | }
|
4200: | }
|
4201: |
|
4202: | private function afterBody($token)
|
4203: | {
|
4204: |
|
4205: |
|
4206: | |
4207: | |
4208: |
|
4209: | if ($token['type'] === HTML5::CHARACTR &&
|
4210: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
4211: | ) {
|
4212: | |
4213: |
|
4214: | $this->inBody($token);
|
4215: |
|
4216: |
|
4217: | } elseif ($token['type'] === HTML5::COMMENT) {
|
4218: | |
4219: | |
4220: |
|
4221: | $comment = $this->dom->createComment($token['data']);
|
4222: | $this->stack[0]->appendChild($comment);
|
4223: |
|
4224: |
|
4225: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
|
4226: | |
4227: | |
4228: | |
4229: |
|
4230: |
|
4231: |
|
4232: | $this->phase = self::END_PHASE;
|
4233: |
|
4234: |
|
4235: | } else {
|
4236: | |
4237: |
|
4238: | $this->mode = self::IN_BODY;
|
4239: | return $this->inBody($token);
|
4240: | }
|
4241: | }
|
4242: |
|
4243: | private function inFrameset($token)
|
4244: | {
|
4245: |
|
4246: |
|
4247: | |
4248: | |
4249: |
|
4250: | if ($token['type'] === HTML5::CHARACTR &&
|
4251: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
4252: | ) {
|
4253: |
|
4254: | $this->insertText($token['data']);
|
4255: |
|
4256: |
|
4257: | } elseif ($token['type'] === HTML5::COMMENT) {
|
4258: | |
4259: |
|
4260: | $this->insertComment($token['data']);
|
4261: |
|
4262: |
|
4263: | } elseif ($token['name'] === 'frameset' &&
|
4264: | $token['type'] === HTML5::STARTTAG
|
4265: | ) {
|
4266: | $this->insertElement($token);
|
4267: |
|
4268: |
|
4269: | } elseif ($token['name'] === 'frameset' &&
|
4270: | $token['type'] === HTML5::ENDTAG
|
4271: | ) {
|
4272: | |
4273: |
|
4274: | if (end($this->stack)->nodeName === 'html') {
|
4275: |
|
4276: |
|
4277: | } else {
|
4278: | |
4279: |
|
4280: | array_pop($this->stack);
|
4281: |
|
4282: | |
4283: | |
4284: | |
4285: |
|
4286: | $this->mode = self::AFTR_FRAME;
|
4287: | }
|
4288: |
|
4289: |
|
4290: | } elseif ($token['name'] === 'frame' &&
|
4291: | $token['type'] === HTML5::STARTTAG
|
4292: | ) {
|
4293: |
|
4294: | $this->insertElement($token);
|
4295: |
|
4296: |
|
4297: | array_pop($this->stack);
|
4298: |
|
4299: |
|
4300: | } elseif ($token['name'] === 'noframes' &&
|
4301: | $token['type'] === HTML5::STARTTAG
|
4302: | ) {
|
4303: |
|
4304: | $this->inBody($token);
|
4305: |
|
4306: |
|
4307: | } else {
|
4308: |
|
4309: | }
|
4310: | }
|
4311: |
|
4312: | private function afterFrameset($token)
|
4313: | {
|
4314: |
|
4315: |
|
4316: | |
4317: | |
4318: |
|
4319: | if ($token['type'] === HTML5::CHARACTR &&
|
4320: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
4321: | ) {
|
4322: |
|
4323: | $this->insertText($token['data']);
|
4324: |
|
4325: |
|
4326: | } elseif ($token['type'] === HTML5::COMMENT) {
|
4327: | |
4328: |
|
4329: | $this->insertComment($token['data']);
|
4330: |
|
4331: |
|
4332: | } elseif ($token['name'] === 'html' &&
|
4333: | $token['type'] === HTML5::ENDTAG
|
4334: | ) {
|
4335: |
|
4336: | $this->phase = self::END_PHASE;
|
4337: |
|
4338: |
|
4339: | } elseif ($token['name'] === 'noframes' &&
|
4340: | $token['type'] === HTML5::STARTTAG
|
4341: | ) {
|
4342: |
|
4343: | $this->inBody($token);
|
4344: |
|
4345: |
|
4346: | } else {
|
4347: |
|
4348: | }
|
4349: | }
|
4350: |
|
4351: | private function trailingEndPhase($token)
|
4352: | {
|
4353: | |
4354: |
|
4355: |
|
4356: |
|
4357: | if ($token['type'] === HTML5::DOCTYPE) {
|
4358: |
|
4359: |
|
4360: |
|
4361: | } elseif ($token['type'] === HTML5::COMMENT) {
|
4362: | |
4363: |
|
4364: | $comment = $this->dom->createComment($token['data']);
|
4365: | $this->dom->appendChild($comment);
|
4366: |
|
4367: | |
4368: | |
4369: |
|
4370: | } elseif ($token['type'] === HTML5::CHARACTR &&
|
4371: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
4372: | ) {
|
4373: |
|
4374: | $this->mainPhase($token);
|
4375: |
|
4376: | |
4377: | |
4378: |
|
4379: | } elseif (($token['type'] === HTML5::CHARACTR &&
|
4380: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
|
4381: | $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG
|
4382: | ) {
|
4383: | |
4384: |
|
4385: | $this->phase = self::MAIN_PHASE;
|
4386: | return $this->mainPhase($token);
|
4387: |
|
4388: |
|
4389: | } elseif ($token['type'] === HTML5::EOF) {
|
4390: |
|
4391: | }
|
4392: | }
|
4393: |
|
4394: | private function insertElement($token, $append = true, $check = false)
|
4395: | {
|
4396: |
|
4397: | if ($check) {
|
4398: |
|
4399: |
|
4400: | $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
|
4401: |
|
4402: | $token['name'] = ltrim($token['name'], '-0..9');
|
4403: |
|
4404: | if ($token['name'] === '') {
|
4405: | $token['name'] = 'span';
|
4406: | }
|
4407: | }
|
4408: |
|
4409: | $el = $this->dom->createElement($token['name']);
|
4410: |
|
4411: | foreach ($token['attr'] as $attr) {
|
4412: | if (!$el->hasAttribute($attr['name'])) {
|
4413: | $el->setAttribute($attr['name'], (string)$attr['value']);
|
4414: | }
|
4415: | }
|
4416: |
|
4417: | $this->appendToRealParent($el);
|
4418: | $this->stack[] = $el;
|
4419: |
|
4420: | return $el;
|
4421: | }
|
4422: |
|
4423: | private function insertText($data)
|
4424: | {
|
4425: | $text = $this->dom->createTextNode($data);
|
4426: | $this->appendToRealParent($text);
|
4427: | }
|
4428: |
|
4429: | private function insertComment($data)
|
4430: | {
|
4431: | $comment = $this->dom->createComment($data);
|
4432: | $this->appendToRealParent($comment);
|
4433: | }
|
4434: |
|
4435: | private function appendToRealParent($node)
|
4436: | {
|
4437: | if ($this->foster_parent === null) {
|
4438: | end($this->stack)->appendChild($node);
|
4439: |
|
4440: | } elseif ($this->foster_parent !== null) {
|
4441: | |
4442: | |
4443: | |
4444: | |
4445: | |
4446: |
|
4447: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
4448: | if ($this->stack[$n]->nodeName === 'table' &&
|
4449: | $this->stack[$n]->parentNode !== null
|
4450: | ) {
|
4451: | $table = $this->stack[$n];
|
4452: | break;
|
4453: | }
|
4454: | }
|
4455: |
|
4456: | if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
|
4457: | $this->foster_parent->insertBefore($node, $table);
|
4458: | } else {
|
4459: | $this->foster_parent->appendChild($node);
|
4460: | }
|
4461: |
|
4462: | $this->foster_parent = null;
|
4463: | }
|
4464: | }
|
4465: |
|
4466: | private function elementInScope($el, $table = false)
|
4467: | {
|
4468: | if (is_array($el)) {
|
4469: | foreach ($el as $element) {
|
4470: | if ($this->elementInScope($element, $table)) {
|
4471: | return true;
|
4472: | }
|
4473: | }
|
4474: |
|
4475: | return false;
|
4476: | }
|
4477: |
|
4478: | $leng = count($this->stack);
|
4479: |
|
4480: | for ($n = 0; $n < $leng; $n++) {
|
4481: | |
4482: |
|
4483: | $node = $this->stack[$leng - 1 - $n];
|
4484: |
|
4485: | if ($node->tagName === $el) {
|
4486: |
|
4487: | return true;
|
4488: |
|
4489: | } elseif ($node->tagName === 'table') {
|
4490: | |
4491: |
|
4492: | return false;
|
4493: |
|
4494: | } elseif ($table === true && in_array(
|
4495: | $node->tagName,
|
4496: | array(
|
4497: | 'caption',
|
4498: | 'td',
|
4499: | 'th',
|
4500: | 'button',
|
4501: | 'marquee',
|
4502: | 'object'
|
4503: | )
|
4504: | )
|
4505: | ) {
|
4506: | |
4507: | |
4508: |
|
4509: | return false;
|
4510: |
|
4511: | } elseif ($node === $node->ownerDocument->documentElement) {
|
4512: | |
4513: | |
4514: | |
4515: |
|
4516: | return false;
|
4517: | }
|
4518: |
|
4519: | |
4520: | |
4521: | |
4522: |
|
4523: | }
|
4524: | }
|
4525: |
|
4526: | private function reconstructActiveFormattingElements()
|
4527: | {
|
4528: | |
4529: |
|
4530: | $formatting_elements = count($this->a_formatting);
|
4531: |
|
4532: | if ($formatting_elements === 0) {
|
4533: | return false;
|
4534: | }
|
4535: |
|
4536: | |
4537: |
|
4538: | $entry = end($this->a_formatting);
|
4539: |
|
4540: | |
4541: | |
4542: | |
4543: |
|
4544: | if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
|
4545: | return false;
|
4546: | }
|
4547: |
|
4548: | for ($a = $formatting_elements - 1; $a >= 0; true) {
|
4549: | |
4550: |
|
4551: | if ($a === 0) {
|
4552: | $step_seven = false;
|
4553: | break;
|
4554: | }
|
4555: |
|
4556: | |
4557: |
|
4558: | $a--;
|
4559: | $entry = $this->a_formatting[$a];
|
4560: |
|
4561: | |
4562: |
|
4563: | if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
|
4564: | break;
|
4565: | }
|
4566: | }
|
4567: |
|
4568: | while (true) {
|
4569: | |
4570: |
|
4571: | if (isset($step_seven) && $step_seven === true) {
|
4572: | $a++;
|
4573: | $entry = $this->a_formatting[$a];
|
4574: | }
|
4575: |
|
4576: |
|
4577: | $clone = $entry->cloneNode();
|
4578: |
|
4579: | |
4580: |
|
4581: | end($this->stack)->appendChild($clone);
|
4582: | $this->stack[] = $clone;
|
4583: |
|
4584: | |
4585: |
|
4586: | $this->a_formatting[$a] = $clone;
|
4587: |
|
4588: | |
4589: |
|
4590: | if (end($this->a_formatting) !== $clone) {
|
4591: | $step_seven = true;
|
4592: | } else {
|
4593: | break;
|
4594: | }
|
4595: | }
|
4596: | }
|
4597: |
|
4598: | private function clearTheActiveFormattingElementsUpToTheLastMarker()
|
4599: | {
|
4600: | |
4601: | |
4602: |
|
4603: |
|
4604: | while (true) {
|
4605: | |
4606: |
|
4607: | $entry = end($this->a_formatting);
|
4608: |
|
4609: |
|
4610: | array_pop($this->a_formatting);
|
4611: |
|
4612: | |
4613: |
|
4614: | if ($entry === self::MARKER) {
|
4615: | break;
|
4616: | }
|
4617: | }
|
4618: | }
|
4619: |
|
4620: | private function generateImpliedEndTags($exclude = array())
|
4621: | {
|
4622: | |
4623: | |
4624: | |
4625: | |
4626: |
|
4627: | $node = end($this->stack);
|
4628: | $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
|
4629: |
|
4630: | while (in_array(end($this->stack)->nodeName, $elements)) {
|
4631: | array_pop($this->stack);
|
4632: | }
|
4633: | }
|
4634: |
|
4635: | private function getElementCategory($node)
|
4636: | {
|
4637: | $name = $node->tagName;
|
4638: | if (in_array($name, $this->special)) {
|
4639: | return self::SPECIAL;
|
4640: | } elseif (in_array($name, $this->scoping)) {
|
4641: | return self::SCOPING;
|
4642: | } elseif (in_array($name, $this->formatting)) {
|
4643: | return self::FORMATTING;
|
4644: | } else {
|
4645: | return self::PHRASING;
|
4646: | }
|
4647: | }
|
4648: |
|
4649: | private function clearStackToTableContext($elements)
|
4650: | {
|
4651: | |
4652: | |
4653: | |
4654: | |
4655: |
|
4656: | while (true) {
|
4657: | $node = end($this->stack)->nodeName;
|
4658: |
|
4659: | if (in_array($node, $elements)) {
|
4660: | break;
|
4661: | } else {
|
4662: | array_pop($this->stack);
|
4663: | }
|
4664: | }
|
4665: | }
|
4666: |
|
4667: | private function resetInsertionMode()
|
4668: | {
|
4669: |
|
4670: | $last = false;
|
4671: | $leng = count($this->stack);
|
4672: |
|
4673: | for ($n = $leng - 1; $n >= 0; $n--) {
|
4674: |
|
4675: | $node = $this->stack[$n];
|
4676: |
|
4677: | |
4678: | |
4679: | |
4680: |
|
4681: | if ($this->stack[0]->isSameNode($node)) {
|
4682: | $last = true;
|
4683: | }
|
4684: |
|
4685: | |
4686: |
|
4687: | if ($node->nodeName === 'select') {
|
4688: | $this->mode = self::IN_SELECT;
|
4689: | break;
|
4690: |
|
4691: | |
4692: |
|
4693: | } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {
|
4694: | $this->mode = self::IN_CELL;
|
4695: | break;
|
4696: |
|
4697: | |
4698: |
|
4699: | } elseif ($node->nodeName === 'tr') {
|
4700: | $this->mode = self::IN_ROW;
|
4701: | break;
|
4702: |
|
4703: | |
4704: |
|
4705: | } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
|
4706: | $this->mode = self::IN_TBODY;
|
4707: | break;
|
4708: |
|
4709: | |
4710: |
|
4711: | } elseif ($node->nodeName === 'caption') {
|
4712: | $this->mode = self::IN_CAPTION;
|
4713: | break;
|
4714: |
|
4715: | |
4716: |
|
4717: | } elseif ($node->nodeName === 'colgroup') {
|
4718: | $this->mode = self::IN_CGROUP;
|
4719: | break;
|
4720: |
|
4721: | |
4722: |
|
4723: | } elseif ($node->nodeName === 'table') {
|
4724: | $this->mode = self::IN_TABLE;
|
4725: | break;
|
4726: |
|
4727: | |
4728: | |
4729: |
|
4730: | } elseif ($node->nodeName === 'head') {
|
4731: | $this->mode = self::IN_BODY;
|
4732: | break;
|
4733: |
|
4734: | |
4735: |
|
4736: | } elseif ($node->nodeName === 'body') {
|
4737: | $this->mode = self::IN_BODY;
|
4738: | break;
|
4739: |
|
4740: | |
4741: |
|
4742: | } elseif ($node->nodeName === 'frameset') {
|
4743: | $this->mode = self::IN_FRAME;
|
4744: | break;
|
4745: |
|
4746: | |
4747: | |
4748: | |
4749: |
|
4750: | } elseif ($node->nodeName === 'html') {
|
4751: | $this->mode = ($this->head_pointer === null)
|
4752: | ? self::BEFOR_HEAD
|
4753: | : self::AFTER_HEAD;
|
4754: |
|
4755: | break;
|
4756: |
|
4757: | |
4758: |
|
4759: | } elseif ($last) {
|
4760: | $this->mode = self::IN_BODY;
|
4761: | break;
|
4762: | }
|
4763: | }
|
4764: | }
|
4765: |
|
4766: | private function closeCell()
|
4767: | {
|
4768: | |
4769: |
|
4770: | foreach (array('td', 'th') as $cell) {
|
4771: | if ($this->elementInScope($cell, true)) {
|
4772: | $this->inCell(
|
4773: | array(
|
4774: | 'name' => $cell,
|
4775: | 'type' => HTML5::ENDTAG
|
4776: | )
|
4777: | );
|
4778: |
|
4779: | break;
|
4780: | }
|
4781: | }
|
4782: | }
|
4783: |
|
4784: | public function save()
|
4785: | {
|
4786: | return $this->dom;
|
4787: | }
|
4788: | }
|
4789: | |