| 1: | <?php
|
| 2: |
|
| 3: | |
| 4: | |
| 5: | |
| 6: | |
| 7: | |
| 8: | |
| 9: | |
| 10: | |
| 11: |
|
| 12: |
|
| 13: | class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
|
| 14: | {
|
| 15: | |
| 16: | |
| 17: | |
| 18: | |
| 19: | |
| 20: |
|
| 21: | public function tokenizeHTML($html, $config, $context)
|
| 22: | {
|
| 23: | $new_html = $this->normalize($html, $config, $context);
|
| 24: | $new_html = $this->wrapHTML($new_html, $config, $context, false );
|
| 25: | try {
|
| 26: | $parser = new HTML5($new_html);
|
| 27: | $doc = $parser->save();
|
| 28: | } catch (DOMException $e) {
|
| 29: |
|
| 30: | $lexer = new HTMLPurifier_Lexer_DirectLex();
|
| 31: | $context->register('PH5PError', $e);
|
| 32: | return $lexer->tokenizeHTML($html, $config, $context);
|
| 33: | }
|
| 34: | $tokens = array();
|
| 35: | $this->tokenizeDOM(
|
| 36: | $doc->getElementsByTagName('html')->item(0)->
|
| 37: | getElementsByTagName('body')->item(0)
|
| 38: | ,
|
| 39: | $tokens, $config
|
| 40: | );
|
| 41: | return $tokens;
|
| 42: | }
|
| 43: | }
|
| 44: |
|
| 45: | |
| 46: | |
| 47: | |
| 48: | |
| 49: | |
| 50: | |
| 51: | |
| 52: | |
| 53: | |
| 54: | |
| 55: | |
| 56: | |
| 57: | |
| 58: | |
| 59: | |
| 60: | |
| 61: | |
| 62: | |
| 63: | |
| 64: | |
| 65: | |
| 66: | |
| 67: | |
| 68: |
|
| 69: |
|
| 70: | class HTML5
|
| 71: | {
|
| 72: | private $data;
|
| 73: | private $char;
|
| 74: | private $EOF;
|
| 75: | private $state;
|
| 76: | private $tree;
|
| 77: | private $token;
|
| 78: | private $content_model;
|
| 79: | private $escape = false;
|
| 80: | private $entities = array(
|
| 81: | 'AElig;',
|
| 82: | 'AElig',
|
| 83: | 'AMP;',
|
| 84: | 'AMP',
|
| 85: | 'Aacute;',
|
| 86: | 'Aacute',
|
| 87: | 'Acirc;',
|
| 88: | 'Acirc',
|
| 89: | 'Agrave;',
|
| 90: | 'Agrave',
|
| 91: | 'Alpha;',
|
| 92: | 'Aring;',
|
| 93: | 'Aring',
|
| 94: | 'Atilde;',
|
| 95: | 'Atilde',
|
| 96: | 'Auml;',
|
| 97: | 'Auml',
|
| 98: | 'Beta;',
|
| 99: | 'COPY;',
|
| 100: | 'COPY',
|
| 101: | 'Ccedil;',
|
| 102: | 'Ccedil',
|
| 103: | 'Chi;',
|
| 104: | 'Dagger;',
|
| 105: | 'Delta;',
|
| 106: | 'ETH;',
|
| 107: | 'ETH',
|
| 108: | 'Eacute;',
|
| 109: | 'Eacute',
|
| 110: | 'Ecirc;',
|
| 111: | 'Ecirc',
|
| 112: | 'Egrave;',
|
| 113: | 'Egrave',
|
| 114: | 'Epsilon;',
|
| 115: | 'Eta;',
|
| 116: | 'Euml;',
|
| 117: | 'Euml',
|
| 118: | 'GT;',
|
| 119: | 'GT',
|
| 120: | 'Gamma;',
|
| 121: | 'Iacute;',
|
| 122: | 'Iacute',
|
| 123: | 'Icirc;',
|
| 124: | 'Icirc',
|
| 125: | 'Igrave;',
|
| 126: | 'Igrave',
|
| 127: | 'Iota;',
|
| 128: | 'Iuml;',
|
| 129: | 'Iuml',
|
| 130: | 'Kappa;',
|
| 131: | 'LT;',
|
| 132: | 'LT',
|
| 133: | 'Lambda;',
|
| 134: | 'Mu;',
|
| 135: | 'Ntilde;',
|
| 136: | 'Ntilde',
|
| 137: | 'Nu;',
|
| 138: | 'OElig;',
|
| 139: | 'Oacute;',
|
| 140: | 'Oacute',
|
| 141: | 'Ocirc;',
|
| 142: | 'Ocirc',
|
| 143: | 'Ograve;',
|
| 144: | 'Ograve',
|
| 145: | 'Omega;',
|
| 146: | 'Omicron;',
|
| 147: | 'Oslash;',
|
| 148: | 'Oslash',
|
| 149: | 'Otilde;',
|
| 150: | 'Otilde',
|
| 151: | 'Ouml;',
|
| 152: | 'Ouml',
|
| 153: | 'Phi;',
|
| 154: | 'Pi;',
|
| 155: | 'Prime;',
|
| 156: | 'Psi;',
|
| 157: | 'QUOT;',
|
| 158: | 'QUOT',
|
| 159: | 'REG;',
|
| 160: | 'REG',
|
| 161: | 'Rho;',
|
| 162: | 'Scaron;',
|
| 163: | 'Sigma;',
|
| 164: | 'THORN;',
|
| 165: | 'THORN',
|
| 166: | 'TRADE;',
|
| 167: | 'Tau;',
|
| 168: | 'Theta;',
|
| 169: | 'Uacute;',
|
| 170: | 'Uacute',
|
| 171: | 'Ucirc;',
|
| 172: | 'Ucirc',
|
| 173: | 'Ugrave;',
|
| 174: | 'Ugrave',
|
| 175: | 'Upsilon;',
|
| 176: | 'Uuml;',
|
| 177: | 'Uuml',
|
| 178: | 'Xi;',
|
| 179: | 'Yacute;',
|
| 180: | 'Yacute',
|
| 181: | 'Yuml;',
|
| 182: | 'Zeta;',
|
| 183: | 'aacute;',
|
| 184: | 'aacute',
|
| 185: | 'acirc;',
|
| 186: | 'acirc',
|
| 187: | 'acute;',
|
| 188: | 'acute',
|
| 189: | 'aelig;',
|
| 190: | 'aelig',
|
| 191: | 'agrave;',
|
| 192: | 'agrave',
|
| 193: | 'alefsym;',
|
| 194: | 'alpha;',
|
| 195: | 'amp;',
|
| 196: | 'amp',
|
| 197: | 'and;',
|
| 198: | 'ang;',
|
| 199: | 'apos;',
|
| 200: | 'aring;',
|
| 201: | 'aring',
|
| 202: | 'asymp;',
|
| 203: | 'atilde;',
|
| 204: | 'atilde',
|
| 205: | 'auml;',
|
| 206: | 'auml',
|
| 207: | 'bdquo;',
|
| 208: | 'beta;',
|
| 209: | 'brvbar;',
|
| 210: | 'brvbar',
|
| 211: | 'bull;',
|
| 212: | 'cap;',
|
| 213: | 'ccedil;',
|
| 214: | 'ccedil',
|
| 215: | 'cedil;',
|
| 216: | 'cedil',
|
| 217: | 'cent;',
|
| 218: | 'cent',
|
| 219: | 'chi;',
|
| 220: | 'circ;',
|
| 221: | 'clubs;',
|
| 222: | 'cong;',
|
| 223: | 'copy;',
|
| 224: | 'copy',
|
| 225: | 'crarr;',
|
| 226: | 'cup;',
|
| 227: | 'curren;',
|
| 228: | 'curren',
|
| 229: | 'dArr;',
|
| 230: | 'dagger;',
|
| 231: | 'darr;',
|
| 232: | 'deg;',
|
| 233: | 'deg',
|
| 234: | 'delta;',
|
| 235: | 'diams;',
|
| 236: | 'divide;',
|
| 237: | 'divide',
|
| 238: | 'eacute;',
|
| 239: | 'eacute',
|
| 240: | 'ecirc;',
|
| 241: | 'ecirc',
|
| 242: | 'egrave;',
|
| 243: | 'egrave',
|
| 244: | 'empty;',
|
| 245: | 'emsp;',
|
| 246: | 'ensp;',
|
| 247: | 'epsilon;',
|
| 248: | 'equiv;',
|
| 249: | 'eta;',
|
| 250: | 'eth;',
|
| 251: | 'eth',
|
| 252: | 'euml;',
|
| 253: | 'euml',
|
| 254: | 'euro;',
|
| 255: | 'exist;',
|
| 256: | 'fnof;',
|
| 257: | 'forall;',
|
| 258: | 'frac12;',
|
| 259: | 'frac12',
|
| 260: | 'frac14;',
|
| 261: | 'frac14',
|
| 262: | 'frac34;',
|
| 263: | 'frac34',
|
| 264: | 'frasl;',
|
| 265: | 'gamma;',
|
| 266: | 'ge;',
|
| 267: | 'gt;',
|
| 268: | 'gt',
|
| 269: | 'hArr;',
|
| 270: | 'harr;',
|
| 271: | 'hearts;',
|
| 272: | 'hellip;',
|
| 273: | 'iacute;',
|
| 274: | 'iacute',
|
| 275: | 'icirc;',
|
| 276: | 'icirc',
|
| 277: | 'iexcl;',
|
| 278: | 'iexcl',
|
| 279: | 'igrave;',
|
| 280: | 'igrave',
|
| 281: | 'image;',
|
| 282: | 'infin;',
|
| 283: | 'int;',
|
| 284: | 'iota;',
|
| 285: | 'iquest;',
|
| 286: | 'iquest',
|
| 287: | 'isin;',
|
| 288: | 'iuml;',
|
| 289: | 'iuml',
|
| 290: | 'kappa;',
|
| 291: | 'lArr;',
|
| 292: | 'lambda;',
|
| 293: | 'lang;',
|
| 294: | 'laquo;',
|
| 295: | 'laquo',
|
| 296: | 'larr;',
|
| 297: | 'lceil;',
|
| 298: | 'ldquo;',
|
| 299: | 'le;',
|
| 300: | 'lfloor;',
|
| 301: | 'lowast;',
|
| 302: | 'loz;',
|
| 303: | 'lrm;',
|
| 304: | 'lsaquo;',
|
| 305: | 'lsquo;',
|
| 306: | 'lt;',
|
| 307: | 'lt',
|
| 308: | 'macr;',
|
| 309: | 'macr',
|
| 310: | 'mdash;',
|
| 311: | 'micro;',
|
| 312: | 'micro',
|
| 313: | 'middot;',
|
| 314: | 'middot',
|
| 315: | 'minus;',
|
| 316: | 'mu;',
|
| 317: | 'nabla;',
|
| 318: | 'nbsp;',
|
| 319: | 'nbsp',
|
| 320: | 'ndash;',
|
| 321: | 'ne;',
|
| 322: | 'ni;',
|
| 323: | 'not;',
|
| 324: | 'not',
|
| 325: | 'notin;',
|
| 326: | 'nsub;',
|
| 327: | 'ntilde;',
|
| 328: | 'ntilde',
|
| 329: | 'nu;',
|
| 330: | 'oacute;',
|
| 331: | 'oacute',
|
| 332: | 'ocirc;',
|
| 333: | 'ocirc',
|
| 334: | 'oelig;',
|
| 335: | 'ograve;',
|
| 336: | 'ograve',
|
| 337: | 'oline;',
|
| 338: | 'omega;',
|
| 339: | 'omicron;',
|
| 340: | 'oplus;',
|
| 341: | 'or;',
|
| 342: | 'ordf;',
|
| 343: | 'ordf',
|
| 344: | 'ordm;',
|
| 345: | 'ordm',
|
| 346: | 'oslash;',
|
| 347: | 'oslash',
|
| 348: | 'otilde;',
|
| 349: | 'otilde',
|
| 350: | 'otimes;',
|
| 351: | 'ouml;',
|
| 352: | 'ouml',
|
| 353: | 'para;',
|
| 354: | 'para',
|
| 355: | 'part;',
|
| 356: | 'permil;',
|
| 357: | 'perp;',
|
| 358: | 'phi;',
|
| 359: | 'pi;',
|
| 360: | 'piv;',
|
| 361: | 'plusmn;',
|
| 362: | 'plusmn',
|
| 363: | 'pound;',
|
| 364: | 'pound',
|
| 365: | 'prime;',
|
| 366: | 'prod;',
|
| 367: | 'prop;',
|
| 368: | 'psi;',
|
| 369: | 'quot;',
|
| 370: | 'quot',
|
| 371: | 'rArr;',
|
| 372: | 'radic;',
|
| 373: | 'rang;',
|
| 374: | 'raquo;',
|
| 375: | 'raquo',
|
| 376: | 'rarr;',
|
| 377: | 'rceil;',
|
| 378: | 'rdquo;',
|
| 379: | 'real;',
|
| 380: | 'reg;',
|
| 381: | 'reg',
|
| 382: | 'rfloor;',
|
| 383: | 'rho;',
|
| 384: | 'rlm;',
|
| 385: | 'rsaquo;',
|
| 386: | 'rsquo;',
|
| 387: | 'sbquo;',
|
| 388: | 'scaron;',
|
| 389: | 'sdot;',
|
| 390: | 'sect;',
|
| 391: | 'sect',
|
| 392: | 'shy;',
|
| 393: | 'shy',
|
| 394: | 'sigma;',
|
| 395: | 'sigmaf;',
|
| 396: | 'sim;',
|
| 397: | 'spades;',
|
| 398: | 'sub;',
|
| 399: | 'sube;',
|
| 400: | 'sum;',
|
| 401: | 'sup1;',
|
| 402: | 'sup1',
|
| 403: | 'sup2;',
|
| 404: | 'sup2',
|
| 405: | 'sup3;',
|
| 406: | 'sup3',
|
| 407: | 'sup;',
|
| 408: | 'supe;',
|
| 409: | 'szlig;',
|
| 410: | 'szlig',
|
| 411: | 'tau;',
|
| 412: | 'there4;',
|
| 413: | 'theta;',
|
| 414: | 'thetasym;',
|
| 415: | 'thinsp;',
|
| 416: | 'thorn;',
|
| 417: | 'thorn',
|
| 418: | 'tilde;',
|
| 419: | 'times;',
|
| 420: | 'times',
|
| 421: | 'trade;',
|
| 422: | 'uArr;',
|
| 423: | 'uacute;',
|
| 424: | 'uacute',
|
| 425: | 'uarr;',
|
| 426: | 'ucirc;',
|
| 427: | 'ucirc',
|
| 428: | 'ugrave;',
|
| 429: | 'ugrave',
|
| 430: | 'uml;',
|
| 431: | 'uml',
|
| 432: | 'upsih;',
|
| 433: | 'upsilon;',
|
| 434: | 'uuml;',
|
| 435: | 'uuml',
|
| 436: | 'weierp;',
|
| 437: | 'xi;',
|
| 438: | 'yacute;',
|
| 439: | 'yacute',
|
| 440: | 'yen;',
|
| 441: | 'yen',
|
| 442: | 'yuml;',
|
| 443: | 'yuml',
|
| 444: | 'zeta;',
|
| 445: | 'zwj;',
|
| 446: | 'zwnj;'
|
| 447: | );
|
| 448: |
|
| 449: | const PCDATA = 0;
|
| 450: | const RCDATA = 1;
|
| 451: | const CDATA = 2;
|
| 452: | const PLAINTEXT = 3;
|
| 453: |
|
| 454: | const DOCTYPE = 0;
|
| 455: | const STARTTAG = 1;
|
| 456: | const ENDTAG = 2;
|
| 457: | const COMMENT = 3;
|
| 458: | const CHARACTR = 4;
|
| 459: | const EOF = 5;
|
| 460: |
|
| 461: | public function __construct($data)
|
| 462: | {
|
| 463: | $this->data = $data;
|
| 464: | $this->char = -1;
|
| 465: | $this->EOF = strlen($data);
|
| 466: | $this->tree = new HTML5TreeConstructer;
|
| 467: | $this->content_model = self::PCDATA;
|
| 468: |
|
| 469: | $this->state = 'data';
|
| 470: |
|
| 471: | while ($this->state !== null) {
|
| 472: | $this->{$this->state . 'State'}();
|
| 473: | }
|
| 474: | }
|
| 475: |
|
| 476: | public function save()
|
| 477: | {
|
| 478: | return $this->tree->save();
|
| 479: | }
|
| 480: |
|
| 481: | private function char()
|
| 482: | {
|
| 483: | return ($this->char < $this->EOF)
|
| 484: | ? $this->data[$this->char]
|
| 485: | : false;
|
| 486: | }
|
| 487: |
|
| 488: | private function character($s, $l = 0)
|
| 489: | {
|
| 490: | if ($s + $l < $this->EOF) {
|
| 491: | if ($l === 0) {
|
| 492: | return $this->data[$s];
|
| 493: | } else {
|
| 494: | return substr($this->data, $s, $l);
|
| 495: | }
|
| 496: | }
|
| 497: | }
|
| 498: |
|
| 499: | private function characters($char_class, $start)
|
| 500: | {
|
| 501: | return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
|
| 502: | }
|
| 503: |
|
| 504: | private function dataState()
|
| 505: | {
|
| 506: |
|
| 507: | $this->char++;
|
| 508: | $char = $this->char();
|
| 509: |
|
| 510: | if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
|
| 511: | |
| 512: | |
| 513: | |
| 514: |
|
| 515: | $this->state = 'entityData';
|
| 516: |
|
| 517: | } elseif ($char === '-') {
|
| 518: | |
| 519: | |
| 520: | |
| 521: | |
| 522: | |
| 523: |
|
| 524: | if (($this->content_model === self::RCDATA || $this->content_model ===
|
| 525: | self::CDATA) && $this->escape === false &&
|
| 526: | $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
|
| 527: | ) {
|
| 528: | $this->escape = true;
|
| 529: | }
|
| 530: |
|
| 531: | |
| 532: |
|
| 533: | $this->emitToken(
|
| 534: | array(
|
| 535: | 'type' => self::CHARACTR,
|
| 536: | 'data' => $char
|
| 537: | )
|
| 538: | );
|
| 539: |
|
| 540: |
|
| 541: | } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
|
| 542: | (($this->content_model === self::RCDATA ||
|
| 543: | $this->content_model === self::CDATA) && $this->escape === false))
|
| 544: | ) {
|
| 545: | |
| 546: | |
| 547: | |
| 548: | |
| 549: | |
| 550: | |
| 551: | |
| 552: |
|
| 553: | $this->state = 'tagOpen';
|
| 554: |
|
| 555: |
|
| 556: | } elseif ($char === '>') {
|
| 557: | |
| 558: | |
| 559: | |
| 560: | |
| 561: |
|
| 562: | if (($this->content_model === self::RCDATA ||
|
| 563: | $this->content_model === self::CDATA) && $this->escape === true &&
|
| 564: | $this->character($this->char, 3) === '-->'
|
| 565: | ) {
|
| 566: | $this->escape = false;
|
| 567: | }
|
| 568: |
|
| 569: | |
| 570: |
|
| 571: | $this->emitToken(
|
| 572: | array(
|
| 573: | 'type' => self::CHARACTR,
|
| 574: | 'data' => $char
|
| 575: | )
|
| 576: | );
|
| 577: |
|
| 578: | } elseif ($this->char === $this->EOF) {
|
| 579: | |
| 580: |
|
| 581: | $this->EOF();
|
| 582: |
|
| 583: | } elseif ($this->content_model === self::PLAINTEXT) {
|
| 584: | |
| 585: | |
| 586: |
|
| 587: | $this->emitToken(
|
| 588: | array(
|
| 589: | 'type' => self::CHARACTR,
|
| 590: | 'data' => substr($this->data, $this->char)
|
| 591: | )
|
| 592: | );
|
| 593: |
|
| 594: | $this->EOF();
|
| 595: |
|
| 596: | } else {
|
| 597: | |
| 598: | |
| 599: | |
| 600: |
|
| 601: | $len = strcspn($this->data, '<&', $this->char);
|
| 602: | $char = substr($this->data, $this->char, $len);
|
| 603: | $this->char += $len - 1;
|
| 604: |
|
| 605: | $this->emitToken(
|
| 606: | array(
|
| 607: | 'type' => self::CHARACTR,
|
| 608: | 'data' => $char
|
| 609: | )
|
| 610: | );
|
| 611: |
|
| 612: | $this->state = 'data';
|
| 613: | }
|
| 614: | }
|
| 615: |
|
| 616: | private function entityDataState()
|
| 617: | {
|
| 618: |
|
| 619: | $entity = $this->entity();
|
| 620: |
|
| 621: |
|
| 622: |
|
| 623: | $char = (!$entity) ? '&' : $entity;
|
| 624: | $this->emitToken(
|
| 625: | array(
|
| 626: | 'type' => self::CHARACTR,
|
| 627: | 'data' => $char
|
| 628: | )
|
| 629: | );
|
| 630: |
|
| 631: |
|
| 632: | $this->state = 'data';
|
| 633: | }
|
| 634: |
|
| 635: | private function tagOpenState()
|
| 636: | {
|
| 637: | switch ($this->content_model) {
|
| 638: | case self::RCDATA:
|
| 639: | case self::CDATA:
|
| 640: | |
| 641: | |
| 642: | |
| 643: | |
| 644: |
|
| 645: | if ($this->character($this->char + 1) === '/') {
|
| 646: | $this->char++;
|
| 647: | $this->state = 'closeTagOpen';
|
| 648: |
|
| 649: | } else {
|
| 650: | $this->emitToken(
|
| 651: | array(
|
| 652: | 'type' => self::CHARACTR,
|
| 653: | 'data' => '<'
|
| 654: | )
|
| 655: | );
|
| 656: |
|
| 657: | $this->state = 'data';
|
| 658: | }
|
| 659: | break;
|
| 660: |
|
| 661: | case self::PCDATA:
|
| 662: |
|
| 663: |
|
| 664: | $this->char++;
|
| 665: | $char = $this->char();
|
| 666: |
|
| 667: | if ($char === '!') {
|
| 668: | |
| 669: |
|
| 670: | $this->state = 'markupDeclarationOpen';
|
| 671: |
|
| 672: | } elseif ($char === '/') {
|
| 673: | |
| 674: |
|
| 675: | $this->state = 'closeTagOpen';
|
| 676: |
|
| 677: | } elseif (preg_match('/^[A-Za-z]$/', $char)) {
|
| 678: | |
| 679: | |
| 680: | |
| 681: | |
| 682: |
|
| 683: | $this->token = array(
|
| 684: | 'name' => strtolower($char),
|
| 685: | 'type' => self::STARTTAG,
|
| 686: | 'attr' => array()
|
| 687: | );
|
| 688: |
|
| 689: | $this->state = 'tagName';
|
| 690: |
|
| 691: | } elseif ($char === '>') {
|
| 692: | |
| 693: | |
| 694: |
|
| 695: | $this->emitToken(
|
| 696: | array(
|
| 697: | 'type' => self::CHARACTR,
|
| 698: | 'data' => '<>'
|
| 699: | )
|
| 700: | );
|
| 701: |
|
| 702: | $this->state = 'data';
|
| 703: |
|
| 704: | } elseif ($char === '?') {
|
| 705: | |
| 706: |
|
| 707: | $this->state = 'bogusComment';
|
| 708: |
|
| 709: | } else {
|
| 710: | |
| 711: | |
| 712: |
|
| 713: | $this->emitToken(
|
| 714: | array(
|
| 715: | 'type' => self::CHARACTR,
|
| 716: | 'data' => '<'
|
| 717: | )
|
| 718: | );
|
| 719: |
|
| 720: | $this->char--;
|
| 721: | $this->state = 'data';
|
| 722: | }
|
| 723: | break;
|
| 724: | }
|
| 725: | }
|
| 726: |
|
| 727: | private function closeTagOpenState()
|
| 728: | {
|
| 729: | $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
|
| 730: | $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
|
| 731: |
|
| 732: | if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
|
| 733: | (!$the_same || ($the_same && (!preg_match(
|
| 734: | '/[\t\n\x0b\x0c >\/]/',
|
| 735: | $this->character($this->char + 1 + strlen($next_node))
|
| 736: | ) || $this->EOF === $this->char)))
|
| 737: | ) {
|
| 738: | |
| 739: | |
| 740: | |
| 741: | |
| 742: | |
| 743: | |
| 744: | |
| 745: | |
| 746: | |
| 747: | |
| 748: | |
| 749: | |
| 750: | |
| 751: | |
| 752: |
|
| 753: | $this->emitToken(
|
| 754: | array(
|
| 755: | 'type' => self::CHARACTR,
|
| 756: | 'data' => '</'
|
| 757: | )
|
| 758: | );
|
| 759: |
|
| 760: | $this->state = 'data';
|
| 761: |
|
| 762: | } else {
|
| 763: | |
| 764: | |
| 765: |
|
| 766: | $this->char++;
|
| 767: | $char = $this->char();
|
| 768: |
|
| 769: | if (preg_match('/^[A-Za-z]$/', $char)) {
|
| 770: | |
| 771: | |
| 772: | |
| 773: | |
| 774: |
|
| 775: | $this->token = array(
|
| 776: | 'name' => strtolower($char),
|
| 777: | 'type' => self::ENDTAG
|
| 778: | );
|
| 779: |
|
| 780: | $this->state = 'tagName';
|
| 781: |
|
| 782: | } elseif ($char === '>') {
|
| 783: | |
| 784: |
|
| 785: | $this->state = 'data';
|
| 786: |
|
| 787: | } elseif ($this->char === $this->EOF) {
|
| 788: | |
| 789: | |
| 790: |
|
| 791: | $this->emitToken(
|
| 792: | array(
|
| 793: | 'type' => self::CHARACTR,
|
| 794: | 'data' => '</'
|
| 795: | )
|
| 796: | );
|
| 797: |
|
| 798: | $this->char--;
|
| 799: | $this->state = 'data';
|
| 800: |
|
| 801: | } else {
|
| 802: |
|
| 803: | $this->state = 'bogusComment';
|
| 804: | }
|
| 805: | }
|
| 806: | }
|
| 807: |
|
| 808: | private function tagNameState()
|
| 809: | {
|
| 810: |
|
| 811: | $this->char++;
|
| 812: | $char = $this->character($this->char);
|
| 813: |
|
| 814: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 815: | |
| 816: | |
| 817: | |
| 818: | |
| 819: | |
| 820: |
|
| 821: | $this->state = 'beforeAttributeName';
|
| 822: |
|
| 823: | } elseif ($char === '>') {
|
| 824: | |
| 825: |
|
| 826: | $this->emitToken($this->token);
|
| 827: | $this->state = 'data';
|
| 828: |
|
| 829: | } elseif ($this->char === $this->EOF) {
|
| 830: | |
| 831: | |
| 832: |
|
| 833: | $this->emitToken($this->token);
|
| 834: |
|
| 835: | $this->char--;
|
| 836: | $this->state = 'data';
|
| 837: |
|
| 838: | } elseif ($char === '/') {
|
| 839: | |
| 840: | |
| 841: |
|
| 842: | $this->state = 'beforeAttributeName';
|
| 843: |
|
| 844: | } else {
|
| 845: | |
| 846: | |
| 847: |
|
| 848: | $this->token['name'] .= strtolower($char);
|
| 849: | $this->state = 'tagName';
|
| 850: | }
|
| 851: | }
|
| 852: |
|
| 853: | private function beforeAttributeNameState()
|
| 854: | {
|
| 855: |
|
| 856: | $this->char++;
|
| 857: | $char = $this->character($this->char);
|
| 858: |
|
| 859: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 860: | |
| 861: | |
| 862: | |
| 863: | |
| 864: | |
| 865: |
|
| 866: | $this->state = 'beforeAttributeName';
|
| 867: |
|
| 868: | } elseif ($char === '>') {
|
| 869: | |
| 870: |
|
| 871: | $this->emitToken($this->token);
|
| 872: | $this->state = 'data';
|
| 873: |
|
| 874: | } elseif ($char === '/') {
|
| 875: | |
| 876: | |
| 877: |
|
| 878: | $this->state = 'beforeAttributeName';
|
| 879: |
|
| 880: | } elseif ($this->char === $this->EOF) {
|
| 881: | |
| 882: | |
| 883: |
|
| 884: | $this->emitToken($this->token);
|
| 885: |
|
| 886: | $this->char--;
|
| 887: | $this->state = 'data';
|
| 888: |
|
| 889: | } else {
|
| 890: | |
| 891: | |
| 892: | |
| 893: |
|
| 894: | $this->token['attr'][] = array(
|
| 895: | 'name' => strtolower($char),
|
| 896: | 'value' => null
|
| 897: | );
|
| 898: |
|
| 899: | $this->state = 'attributeName';
|
| 900: | }
|
| 901: | }
|
| 902: |
|
| 903: | private function attributeNameState()
|
| 904: | {
|
| 905: |
|
| 906: | $this->char++;
|
| 907: | $char = $this->character($this->char);
|
| 908: |
|
| 909: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 910: | |
| 911: | |
| 912: | |
| 913: | |
| 914: | |
| 915: |
|
| 916: | $this->state = 'afterAttributeName';
|
| 917: |
|
| 918: | } elseif ($char === '=') {
|
| 919: | |
| 920: |
|
| 921: | $this->state = 'beforeAttributeValue';
|
| 922: |
|
| 923: | } elseif ($char === '>') {
|
| 924: | |
| 925: |
|
| 926: | $this->emitToken($this->token);
|
| 927: | $this->state = 'data';
|
| 928: |
|
| 929: | } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
|
| 930: | |
| 931: | |
| 932: |
|
| 933: | $this->state = 'beforeAttributeName';
|
| 934: |
|
| 935: | } elseif ($this->char === $this->EOF) {
|
| 936: | |
| 937: | |
| 938: |
|
| 939: | $this->emitToken($this->token);
|
| 940: |
|
| 941: | $this->char--;
|
| 942: | $this->state = 'data';
|
| 943: |
|
| 944: | } else {
|
| 945: | |
| 946: | |
| 947: |
|
| 948: | $last = count($this->token['attr']) - 1;
|
| 949: | $this->token['attr'][$last]['name'] .= strtolower($char);
|
| 950: |
|
| 951: | $this->state = 'attributeName';
|
| 952: | }
|
| 953: | }
|
| 954: |
|
| 955: | private function afterAttributeNameState()
|
| 956: | {
|
| 957: |
|
| 958: | $this->char++;
|
| 959: | $char = $this->character($this->char);
|
| 960: |
|
| 961: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 962: | |
| 963: | |
| 964: | |
| 965: | |
| 966: | |
| 967: |
|
| 968: | $this->state = 'afterAttributeName';
|
| 969: |
|
| 970: | } elseif ($char === '=') {
|
| 971: | |
| 972: |
|
| 973: | $this->state = 'beforeAttributeValue';
|
| 974: |
|
| 975: | } elseif ($char === '>') {
|
| 976: | |
| 977: |
|
| 978: | $this->emitToken($this->token);
|
| 979: | $this->state = 'data';
|
| 980: |
|
| 981: | } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
|
| 982: | |
| 983: | |
| 984: |
|
| 985: | $this->state = 'beforeAttributeName';
|
| 986: |
|
| 987: | } elseif ($this->char === $this->EOF) {
|
| 988: | |
| 989: | |
| 990: |
|
| 991: | $this->emitToken($this->token);
|
| 992: |
|
| 993: | $this->char--;
|
| 994: | $this->state = 'data';
|
| 995: |
|
| 996: | } else {
|
| 997: | |
| 998: | |
| 999: | |
| 1000: |
|
| 1001: | $this->token['attr'][] = array(
|
| 1002: | 'name' => strtolower($char),
|
| 1003: | 'value' => null
|
| 1004: | );
|
| 1005: |
|
| 1006: | $this->state = 'attributeName';
|
| 1007: | }
|
| 1008: | }
|
| 1009: |
|
| 1010: | private function beforeAttributeValueState()
|
| 1011: | {
|
| 1012: |
|
| 1013: | $this->char++;
|
| 1014: | $char = $this->character($this->char);
|
| 1015: |
|
| 1016: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 1017: | |
| 1018: | |
| 1019: | |
| 1020: | |
| 1021: | |
| 1022: |
|
| 1023: | $this->state = 'beforeAttributeValue';
|
| 1024: |
|
| 1025: | } elseif ($char === '"') {
|
| 1026: | |
| 1027: |
|
| 1028: | $this->state = 'attributeValueDoubleQuoted';
|
| 1029: |
|
| 1030: | } elseif ($char === '&') {
|
| 1031: | |
| 1032: | |
| 1033: |
|
| 1034: | $this->char--;
|
| 1035: | $this->state = 'attributeValueUnquoted';
|
| 1036: |
|
| 1037: | } elseif ($char === '\'') {
|
| 1038: | |
| 1039: |
|
| 1040: | $this->state = 'attributeValueSingleQuoted';
|
| 1041: |
|
| 1042: | } elseif ($char === '>') {
|
| 1043: | |
| 1044: |
|
| 1045: | $this->emitToken($this->token);
|
| 1046: | $this->state = 'data';
|
| 1047: |
|
| 1048: | } else {
|
| 1049: | |
| 1050: | |
| 1051: |
|
| 1052: | $last = count($this->token['attr']) - 1;
|
| 1053: | $this->token['attr'][$last]['value'] .= $char;
|
| 1054: |
|
| 1055: | $this->state = 'attributeValueUnquoted';
|
| 1056: | }
|
| 1057: | }
|
| 1058: |
|
| 1059: | private function attributeValueDoubleQuotedState()
|
| 1060: | {
|
| 1061: |
|
| 1062: | $this->char++;
|
| 1063: | $char = $this->character($this->char);
|
| 1064: |
|
| 1065: | if ($char === '"') {
|
| 1066: | |
| 1067: |
|
| 1068: | $this->state = 'beforeAttributeName';
|
| 1069: |
|
| 1070: | } elseif ($char === '&') {
|
| 1071: | |
| 1072: |
|
| 1073: | $this->entityInAttributeValueState('double');
|
| 1074: |
|
| 1075: | } elseif ($this->char === $this->EOF) {
|
| 1076: | |
| 1077: | |
| 1078: |
|
| 1079: | $this->emitToken($this->token);
|
| 1080: |
|
| 1081: | $this->char--;
|
| 1082: | $this->state = 'data';
|
| 1083: |
|
| 1084: | } else {
|
| 1085: | |
| 1086: | |
| 1087: |
|
| 1088: | $last = count($this->token['attr']) - 1;
|
| 1089: | $this->token['attr'][$last]['value'] .= $char;
|
| 1090: |
|
| 1091: | $this->state = 'attributeValueDoubleQuoted';
|
| 1092: | }
|
| 1093: | }
|
| 1094: |
|
| 1095: | private function attributeValueSingleQuotedState()
|
| 1096: | {
|
| 1097: |
|
| 1098: | $this->char++;
|
| 1099: | $char = $this->character($this->char);
|
| 1100: |
|
| 1101: | if ($char === '\'') {
|
| 1102: | |
| 1103: |
|
| 1104: | $this->state = 'beforeAttributeName';
|
| 1105: |
|
| 1106: | } elseif ($char === '&') {
|
| 1107: | |
| 1108: |
|
| 1109: | $this->entityInAttributeValueState('single');
|
| 1110: |
|
| 1111: | } elseif ($this->char === $this->EOF) {
|
| 1112: | |
| 1113: | |
| 1114: |
|
| 1115: | $this->emitToken($this->token);
|
| 1116: |
|
| 1117: | $this->char--;
|
| 1118: | $this->state = 'data';
|
| 1119: |
|
| 1120: | } else {
|
| 1121: | |
| 1122: | |
| 1123: |
|
| 1124: | $last = count($this->token['attr']) - 1;
|
| 1125: | $this->token['attr'][$last]['value'] .= $char;
|
| 1126: |
|
| 1127: | $this->state = 'attributeValueSingleQuoted';
|
| 1128: | }
|
| 1129: | }
|
| 1130: |
|
| 1131: | private function attributeValueUnquotedState()
|
| 1132: | {
|
| 1133: |
|
| 1134: | $this->char++;
|
| 1135: | $char = $this->character($this->char);
|
| 1136: |
|
| 1137: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 1138: | |
| 1139: | |
| 1140: | |
| 1141: | |
| 1142: | |
| 1143: |
|
| 1144: | $this->state = 'beforeAttributeName';
|
| 1145: |
|
| 1146: | } elseif ($char === '&') {
|
| 1147: | |
| 1148: |
|
| 1149: | $this->entityInAttributeValueState();
|
| 1150: |
|
| 1151: | } elseif ($char === '>') {
|
| 1152: | |
| 1153: |
|
| 1154: | $this->emitToken($this->token);
|
| 1155: | $this->state = 'data';
|
| 1156: |
|
| 1157: | } else {
|
| 1158: | |
| 1159: | |
| 1160: |
|
| 1161: | $last = count($this->token['attr']) - 1;
|
| 1162: | $this->token['attr'][$last]['value'] .= $char;
|
| 1163: |
|
| 1164: | $this->state = 'attributeValueUnquoted';
|
| 1165: | }
|
| 1166: | }
|
| 1167: |
|
| 1168: | private function entityInAttributeValueState()
|
| 1169: | {
|
| 1170: |
|
| 1171: | $entity = $this->entity();
|
| 1172: |
|
| 1173: |
|
| 1174: |
|
| 1175: |
|
| 1176: | $char = (!$entity)
|
| 1177: | ? '&'
|
| 1178: | : $entity;
|
| 1179: |
|
| 1180: | $last = count($this->token['attr']) - 1;
|
| 1181: | $this->token['attr'][$last]['value'] .= $char;
|
| 1182: | }
|
| 1183: |
|
| 1184: | private function bogusCommentState()
|
| 1185: | {
|
| 1186: | |
| 1187: | |
| 1188: | |
| 1189: | |
| 1190: | |
| 1191: | |
| 1192: | |
| 1193: |
|
| 1194: | $data = $this->characters('^>', $this->char);
|
| 1195: | $this->emitToken(
|
| 1196: | array(
|
| 1197: | 'data' => $data,
|
| 1198: | 'type' => self::COMMENT
|
| 1199: | )
|
| 1200: | );
|
| 1201: |
|
| 1202: | $this->char += strlen($data);
|
| 1203: |
|
| 1204: |
|
| 1205: | $this->state = 'data';
|
| 1206: |
|
| 1207: |
|
| 1208: | if ($this->char === $this->EOF) {
|
| 1209: | $this->char = $this->EOF - 1;
|
| 1210: | }
|
| 1211: | }
|
| 1212: |
|
| 1213: | private function markupDeclarationOpenState()
|
| 1214: | {
|
| 1215: | |
| 1216: | |
| 1217: |
|
| 1218: | if ($this->character($this->char + 1, 2) === '--') {
|
| 1219: | $this->char += 2;
|
| 1220: | $this->state = 'comment';
|
| 1221: | $this->token = array(
|
| 1222: | 'data' => null,
|
| 1223: | 'type' => self::COMMENT
|
| 1224: | );
|
| 1225: |
|
| 1226: | |
| 1227: | |
| 1228: |
|
| 1229: | } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
|
| 1230: | $this->char += 7;
|
| 1231: | $this->state = 'doctype';
|
| 1232: |
|
| 1233: | |
| 1234: | |
| 1235: |
|
| 1236: | } else {
|
| 1237: | $this->char++;
|
| 1238: | $this->state = 'bogusComment';
|
| 1239: | }
|
| 1240: | }
|
| 1241: |
|
| 1242: | private function commentState()
|
| 1243: | {
|
| 1244: |
|
| 1245: | $this->char++;
|
| 1246: | $char = $this->char();
|
| 1247: |
|
| 1248: |
|
| 1249: | if ($char === '-') {
|
| 1250: |
|
| 1251: | $this->state = 'commentDash';
|
| 1252: |
|
| 1253: |
|
| 1254: | } elseif ($this->char === $this->EOF) {
|
| 1255: | |
| 1256: |
|
| 1257: | $this->emitToken($this->token);
|
| 1258: | $this->char--;
|
| 1259: | $this->state = 'data';
|
| 1260: |
|
| 1261: |
|
| 1262: | } else {
|
| 1263: | |
| 1264: |
|
| 1265: | $this->token['data'] .= $char;
|
| 1266: | }
|
| 1267: | }
|
| 1268: |
|
| 1269: | private function commentDashState()
|
| 1270: | {
|
| 1271: |
|
| 1272: | $this->char++;
|
| 1273: | $char = $this->char();
|
| 1274: |
|
| 1275: |
|
| 1276: | if ($char === '-') {
|
| 1277: |
|
| 1278: | $this->state = 'commentEnd';
|
| 1279: |
|
| 1280: |
|
| 1281: | } elseif ($this->char === $this->EOF) {
|
| 1282: | |
| 1283: |
|
| 1284: | $this->emitToken($this->token);
|
| 1285: | $this->char--;
|
| 1286: | $this->state = 'data';
|
| 1287: |
|
| 1288: |
|
| 1289: | } else {
|
| 1290: | |
| 1291: |
|
| 1292: | $this->token['data'] .= '-' . $char;
|
| 1293: | $this->state = 'comment';
|
| 1294: | }
|
| 1295: | }
|
| 1296: |
|
| 1297: | private function commentEndState()
|
| 1298: | {
|
| 1299: |
|
| 1300: | $this->char++;
|
| 1301: | $char = $this->char();
|
| 1302: |
|
| 1303: | if ($char === '>') {
|
| 1304: | $this->emitToken($this->token);
|
| 1305: | $this->state = 'data';
|
| 1306: |
|
| 1307: | } elseif ($char === '-') {
|
| 1308: | $this->token['data'] .= '-';
|
| 1309: |
|
| 1310: | } elseif ($this->char === $this->EOF) {
|
| 1311: | $this->emitToken($this->token);
|
| 1312: | $this->char--;
|
| 1313: | $this->state = 'data';
|
| 1314: |
|
| 1315: | } else {
|
| 1316: | $this->token['data'] .= '--' . $char;
|
| 1317: | $this->state = 'comment';
|
| 1318: | }
|
| 1319: | }
|
| 1320: |
|
| 1321: | private function doctypeState()
|
| 1322: | {
|
| 1323: |
|
| 1324: | $this->char++;
|
| 1325: | $char = $this->char();
|
| 1326: |
|
| 1327: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 1328: | $this->state = 'beforeDoctypeName';
|
| 1329: |
|
| 1330: | } else {
|
| 1331: | $this->char--;
|
| 1332: | $this->state = 'beforeDoctypeName';
|
| 1333: | }
|
| 1334: | }
|
| 1335: |
|
| 1336: | private function beforeDoctypeNameState()
|
| 1337: | {
|
| 1338: |
|
| 1339: | $this->char++;
|
| 1340: | $char = $this->char();
|
| 1341: |
|
| 1342: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 1343: |
|
| 1344: |
|
| 1345: | } elseif (preg_match('/^[a-z]$/', $char)) {
|
| 1346: | $this->token = array(
|
| 1347: | 'name' => strtoupper($char),
|
| 1348: | 'type' => self::DOCTYPE,
|
| 1349: | 'error' => true
|
| 1350: | );
|
| 1351: |
|
| 1352: | $this->state = 'doctypeName';
|
| 1353: |
|
| 1354: | } elseif ($char === '>') {
|
| 1355: | $this->emitToken(
|
| 1356: | array(
|
| 1357: | 'name' => null,
|
| 1358: | 'type' => self::DOCTYPE,
|
| 1359: | 'error' => true
|
| 1360: | )
|
| 1361: | );
|
| 1362: |
|
| 1363: | $this->state = 'data';
|
| 1364: |
|
| 1365: | } elseif ($this->char === $this->EOF) {
|
| 1366: | $this->emitToken(
|
| 1367: | array(
|
| 1368: | 'name' => null,
|
| 1369: | 'type' => self::DOCTYPE,
|
| 1370: | 'error' => true
|
| 1371: | )
|
| 1372: | );
|
| 1373: |
|
| 1374: | $this->char--;
|
| 1375: | $this->state = 'data';
|
| 1376: |
|
| 1377: | } else {
|
| 1378: | $this->token = array(
|
| 1379: | 'name' => $char,
|
| 1380: | 'type' => self::DOCTYPE,
|
| 1381: | 'error' => true
|
| 1382: | );
|
| 1383: |
|
| 1384: | $this->state = 'doctypeName';
|
| 1385: | }
|
| 1386: | }
|
| 1387: |
|
| 1388: | private function doctypeNameState()
|
| 1389: | {
|
| 1390: |
|
| 1391: | $this->char++;
|
| 1392: | $char = $this->char();
|
| 1393: |
|
| 1394: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 1395: | $this->state = 'AfterDoctypeName';
|
| 1396: |
|
| 1397: | } elseif ($char === '>') {
|
| 1398: | $this->emitToken($this->token);
|
| 1399: | $this->state = 'data';
|
| 1400: |
|
| 1401: | } elseif (preg_match('/^[a-z]$/', $char)) {
|
| 1402: | $this->token['name'] .= strtoupper($char);
|
| 1403: |
|
| 1404: | } elseif ($this->char === $this->EOF) {
|
| 1405: | $this->emitToken($this->token);
|
| 1406: | $this->char--;
|
| 1407: | $this->state = 'data';
|
| 1408: |
|
| 1409: | } else {
|
| 1410: | $this->token['name'] .= $char;
|
| 1411: | }
|
| 1412: |
|
| 1413: | $this->token['error'] = ($this->token['name'] === 'HTML')
|
| 1414: | ? false
|
| 1415: | : true;
|
| 1416: | }
|
| 1417: |
|
| 1418: | private function afterDoctypeNameState()
|
| 1419: | {
|
| 1420: |
|
| 1421: | $this->char++;
|
| 1422: | $char = $this->char();
|
| 1423: |
|
| 1424: | if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
|
| 1425: |
|
| 1426: |
|
| 1427: | } elseif ($char === '>') {
|
| 1428: | $this->emitToken($this->token);
|
| 1429: | $this->state = 'data';
|
| 1430: |
|
| 1431: | } elseif ($this->char === $this->EOF) {
|
| 1432: | $this->emitToken($this->token);
|
| 1433: | $this->char--;
|
| 1434: | $this->state = 'data';
|
| 1435: |
|
| 1436: | } else {
|
| 1437: | $this->token['error'] = true;
|
| 1438: | $this->state = 'bogusDoctype';
|
| 1439: | }
|
| 1440: | }
|
| 1441: |
|
| 1442: | private function bogusDoctypeState()
|
| 1443: | {
|
| 1444: |
|
| 1445: | $this->char++;
|
| 1446: | $char = $this->char();
|
| 1447: |
|
| 1448: | if ($char === '>') {
|
| 1449: | $this->emitToken($this->token);
|
| 1450: | $this->state = 'data';
|
| 1451: |
|
| 1452: | } elseif ($this->char === $this->EOF) {
|
| 1453: | $this->emitToken($this->token);
|
| 1454: | $this->char--;
|
| 1455: | $this->state = 'data';
|
| 1456: |
|
| 1457: | } else {
|
| 1458: |
|
| 1459: | }
|
| 1460: | }
|
| 1461: |
|
| 1462: | private function entity()
|
| 1463: | {
|
| 1464: | $start = $this->char;
|
| 1465: |
|
| 1466: |
|
| 1467: |
|
| 1468: |
|
| 1469: |
|
| 1470: |
|
| 1471: |
|
| 1472: | switch ($this->character($this->char + 1)) {
|
| 1473: |
|
| 1474: | case '#':
|
| 1475: |
|
| 1476: |
|
| 1477: |
|
| 1478: | switch ($this->character($this->char + 1)) {
|
| 1479: |
|
| 1480: |
|
| 1481: | case 'x':
|
| 1482: | case 'X':
|
| 1483: |
|
| 1484: |
|
| 1485: |
|
| 1486: |
|
| 1487: |
|
| 1488: |
|
| 1489: | $char = 1;
|
| 1490: | $char_class = '0-9A-Fa-f';
|
| 1491: | break;
|
| 1492: |
|
| 1493: |
|
| 1494: | default:
|
| 1495: |
|
| 1496: |
|
| 1497: |
|
| 1498: | $char = 0;
|
| 1499: | $char_class = '0-9';
|
| 1500: | break;
|
| 1501: | }
|
| 1502: |
|
| 1503: |
|
| 1504: |
|
| 1505: | $this->char++;
|
| 1506: | $e_name = $this->characters($char_class, $this->char + $char + 1);
|
| 1507: | $entity = $this->character($start, $this->char);
|
| 1508: | $cond = strlen($e_name) > 0;
|
| 1509: |
|
| 1510: |
|
| 1511: | break;
|
| 1512: |
|
| 1513: |
|
| 1514: | default:
|
| 1515: |
|
| 1516: |
|
| 1517: |
|
| 1518: |
|
| 1519: | $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
|
| 1520: | $len = strlen($e_name);
|
| 1521: |
|
| 1522: | for ($c = 1; $c <= $len; $c++) {
|
| 1523: | $id = substr($e_name, 0, $c);
|
| 1524: | $this->char++;
|
| 1525: |
|
| 1526: | if (in_array($id, $this->entities)) {
|
| 1527: | if ($e_name[$c - 1] !== ';') {
|
| 1528: | if ($c < $len && $e_name[$c] == ';') {
|
| 1529: | $this->char++;
|
| 1530: | }
|
| 1531: | }
|
| 1532: | $entity = $id;
|
| 1533: | break;
|
| 1534: | }
|
| 1535: | }
|
| 1536: |
|
| 1537: | $cond = isset($entity);
|
| 1538: |
|
| 1539: | break;
|
| 1540: | }
|
| 1541: |
|
| 1542: | if (!$cond) {
|
| 1543: |
|
| 1544: |
|
| 1545: | $this->char = $start;
|
| 1546: | return false;
|
| 1547: | }
|
| 1548: |
|
| 1549: |
|
| 1550: |
|
| 1551: | return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
|
| 1552: | }
|
| 1553: |
|
| 1554: | private function emitToken($token)
|
| 1555: | {
|
| 1556: | $emit = $this->tree->emitToken($token);
|
| 1557: |
|
| 1558: | if (is_int($emit)) {
|
| 1559: | $this->content_model = $emit;
|
| 1560: |
|
| 1561: | } elseif ($token['type'] === self::ENDTAG) {
|
| 1562: | $this->content_model = self::PCDATA;
|
| 1563: | }
|
| 1564: | }
|
| 1565: |
|
| 1566: | private function EOF()
|
| 1567: | {
|
| 1568: | $this->state = null;
|
| 1569: | $this->tree->emitToken(
|
| 1570: | array(
|
| 1571: | 'type' => self::EOF
|
| 1572: | )
|
| 1573: | );
|
| 1574: | }
|
| 1575: | }
|
| 1576: |
|
| 1577: | class HTML5TreeConstructer
|
| 1578: | {
|
| 1579: | public $stack = array();
|
| 1580: |
|
| 1581: | private $phase;
|
| 1582: | private $mode;
|
| 1583: | private $dom;
|
| 1584: | private $foster_parent = null;
|
| 1585: | private $a_formatting = array();
|
| 1586: |
|
| 1587: | private $head_pointer = null;
|
| 1588: | private $form_pointer = null;
|
| 1589: |
|
| 1590: | private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');
|
| 1591: | private $formatting = array(
|
| 1592: | 'a',
|
| 1593: | 'b',
|
| 1594: | 'big',
|
| 1595: | 'em',
|
| 1596: | 'font',
|
| 1597: | 'i',
|
| 1598: | 'nobr',
|
| 1599: | 's',
|
| 1600: | 'small',
|
| 1601: | 'strike',
|
| 1602: | 'strong',
|
| 1603: | 'tt',
|
| 1604: | 'u'
|
| 1605: | );
|
| 1606: | private $special = array(
|
| 1607: | 'address',
|
| 1608: | 'area',
|
| 1609: | 'base',
|
| 1610: | 'basefont',
|
| 1611: | 'bgsound',
|
| 1612: | 'blockquote',
|
| 1613: | 'body',
|
| 1614: | 'br',
|
| 1615: | 'center',
|
| 1616: | 'col',
|
| 1617: | 'colgroup',
|
| 1618: | 'dd',
|
| 1619: | 'dir',
|
| 1620: | 'div',
|
| 1621: | 'dl',
|
| 1622: | 'dt',
|
| 1623: | 'embed',
|
| 1624: | 'fieldset',
|
| 1625: | 'form',
|
| 1626: | 'frame',
|
| 1627: | 'frameset',
|
| 1628: | 'h1',
|
| 1629: | 'h2',
|
| 1630: | 'h3',
|
| 1631: | 'h4',
|
| 1632: | 'h5',
|
| 1633: | 'h6',
|
| 1634: | 'head',
|
| 1635: | 'hr',
|
| 1636: | 'iframe',
|
| 1637: | 'image',
|
| 1638: | 'img',
|
| 1639: | 'input',
|
| 1640: | 'isindex',
|
| 1641: | 'li',
|
| 1642: | 'link',
|
| 1643: | 'listing',
|
| 1644: | 'menu',
|
| 1645: | 'meta',
|
| 1646: | 'noembed',
|
| 1647: | 'noframes',
|
| 1648: | 'noscript',
|
| 1649: | 'ol',
|
| 1650: | 'optgroup',
|
| 1651: | 'option',
|
| 1652: | 'p',
|
| 1653: | 'param',
|
| 1654: | 'plaintext',
|
| 1655: | 'pre',
|
| 1656: | 'script',
|
| 1657: | 'select',
|
| 1658: | 'spacer',
|
| 1659: | 'style',
|
| 1660: | 'tbody',
|
| 1661: | 'textarea',
|
| 1662: | 'tfoot',
|
| 1663: | 'thead',
|
| 1664: | 'title',
|
| 1665: | 'tr',
|
| 1666: | 'ul',
|
| 1667: | 'wbr'
|
| 1668: | );
|
| 1669: |
|
| 1670: |
|
| 1671: | const INIT_PHASE = 0;
|
| 1672: | const ROOT_PHASE = 1;
|
| 1673: | const MAIN_PHASE = 2;
|
| 1674: | const END_PHASE = 3;
|
| 1675: |
|
| 1676: |
|
| 1677: | const BEFOR_HEAD = 0;
|
| 1678: | const IN_HEAD = 1;
|
| 1679: | const AFTER_HEAD = 2;
|
| 1680: | const IN_BODY = 3;
|
| 1681: | const IN_TABLE = 4;
|
| 1682: | const IN_CAPTION = 5;
|
| 1683: | const IN_CGROUP = 6;
|
| 1684: | const IN_TBODY = 7;
|
| 1685: | const IN_ROW = 8;
|
| 1686: | const IN_CELL = 9;
|
| 1687: | const IN_SELECT = 10;
|
| 1688: | const AFTER_BODY = 11;
|
| 1689: | const IN_FRAME = 12;
|
| 1690: | const AFTR_FRAME = 13;
|
| 1691: |
|
| 1692: |
|
| 1693: | const SPECIAL = 0;
|
| 1694: | const SCOPING = 1;
|
| 1695: | const FORMATTING = 2;
|
| 1696: | const PHRASING = 3;
|
| 1697: |
|
| 1698: | const MARKER = 0;
|
| 1699: |
|
| 1700: | public function __construct()
|
| 1701: | {
|
| 1702: | $this->phase = self::INIT_PHASE;
|
| 1703: | $this->mode = self::BEFOR_HEAD;
|
| 1704: | $this->dom = new DOMDocument;
|
| 1705: |
|
| 1706: | $this->dom->encoding = 'UTF-8';
|
| 1707: | $this->dom->preserveWhiteSpace = true;
|
| 1708: | $this->dom->substituteEntities = true;
|
| 1709: | $this->dom->strictErrorChecking = false;
|
| 1710: | }
|
| 1711: |
|
| 1712: |
|
| 1713: | public function emitToken($token)
|
| 1714: | {
|
| 1715: | switch ($this->phase) {
|
| 1716: | case self::INIT_PHASE:
|
| 1717: | return $this->initPhase($token);
|
| 1718: | break;
|
| 1719: | case self::ROOT_PHASE:
|
| 1720: | return $this->rootElementPhase($token);
|
| 1721: | break;
|
| 1722: | case self::MAIN_PHASE:
|
| 1723: | return $this->mainPhase($token);
|
| 1724: | break;
|
| 1725: | case self::END_PHASE :
|
| 1726: | return $this->trailingEndPhase($token);
|
| 1727: | break;
|
| 1728: | }
|
| 1729: | }
|
| 1730: |
|
| 1731: | private function initPhase($token)
|
| 1732: | {
|
| 1733: | |
| 1734: |
|
| 1735: |
|
| 1736: | |
| 1737: | |
| 1738: | |
| 1739: | |
| 1740: | |
| 1741: | |
| 1742: | |
| 1743: |
|
| 1744: | if ((isset($token['error']) && $token['error']) ||
|
| 1745: | $token['type'] === HTML5::COMMENT ||
|
| 1746: | $token['type'] === HTML5::STARTTAG ||
|
| 1747: | $token['type'] === HTML5::ENDTAG ||
|
| 1748: | $token['type'] === HTML5::EOF ||
|
| 1749: | ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
|
| 1750: | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))
|
| 1751: | ) {
|
| 1752: | |
| 1753: | |
| 1754: | |
| 1755: |
|
| 1756: |
|
| 1757: | $this->phase = self::ROOT_PHASE;
|
| 1758: | return $this->rootElementPhase($token);
|
| 1759: |
|
| 1760: |
|
| 1761: | } elseif (isset($token['error']) && !$token['error']) {
|
| 1762: | |
| 1763: | |
| 1764: | |
| 1765: |
|
| 1766: | $doctype = new DOMDocumentType(null, null, 'HTML');
|
| 1767: |
|
| 1768: | |
| 1769: |
|
| 1770: | $this->phase = self::ROOT_PHASE;
|
| 1771: |
|
| 1772: | |
| 1773: | |
| 1774: |
|
| 1775: | } elseif (isset($token['data']) && preg_match(
|
| 1776: | '/^[\t\n\x0b\x0c ]+$/',
|
| 1777: | $token['data']
|
| 1778: | )
|
| 1779: | ) {
|
| 1780: |
|
| 1781: | $text = $this->dom->createTextNode($token['data']);
|
| 1782: | $this->dom->appendChild($text);
|
| 1783: | }
|
| 1784: | }
|
| 1785: |
|
| 1786: | private function rootElementPhase($token)
|
| 1787: | {
|
| 1788: | |
| 1789: |
|
| 1790: |
|
| 1791: |
|
| 1792: | if ($token['type'] === HTML5::DOCTYPE) {
|
| 1793: |
|
| 1794: |
|
| 1795: |
|
| 1796: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 1797: | |
| 1798: |
|
| 1799: | $comment = $this->dom->createComment($token['data']);
|
| 1800: | $this->dom->appendChild($comment);
|
| 1801: |
|
| 1802: | |
| 1803: | |
| 1804: |
|
| 1805: | } elseif ($token['type'] === HTML5::CHARACTR &&
|
| 1806: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 1807: | ) {
|
| 1808: |
|
| 1809: | $text = $this->dom->createTextNode($token['data']);
|
| 1810: | $this->dom->appendChild($text);
|
| 1811: |
|
| 1812: | |
| 1813: | |
| 1814: | |
| 1815: | |
| 1816: | |
| 1817: |
|
| 1818: | } elseif (($token['type'] === HTML5::CHARACTR &&
|
| 1819: | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
|
| 1820: | $token['type'] === HTML5::STARTTAG ||
|
| 1821: | $token['type'] === HTML5::ENDTAG ||
|
| 1822: | $token['type'] === HTML5::EOF
|
| 1823: | ) {
|
| 1824: | |
| 1825: | |
| 1826: |
|
| 1827: | $html = $this->dom->createElement('html');
|
| 1828: | $this->dom->appendChild($html);
|
| 1829: | $this->stack[] = $html;
|
| 1830: |
|
| 1831: | $this->phase = self::MAIN_PHASE;
|
| 1832: | return $this->mainPhase($token);
|
| 1833: | }
|
| 1834: | }
|
| 1835: |
|
| 1836: | private function mainPhase($token)
|
| 1837: | {
|
| 1838: |
|
| 1839: |
|
| 1840: |
|
| 1841: | if ($token['type'] === HTML5::DOCTYPE) {
|
| 1842: |
|
| 1843: |
|
| 1844: |
|
| 1845: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
|
| 1846: | |
| 1847: |
|
| 1848: |
|
| 1849: | |
| 1850: | |
| 1851: | |
| 1852: |
|
| 1853: | foreach ($token['attr'] as $attr) {
|
| 1854: | if (!$this->stack[0]->hasAttribute($attr['name'])) {
|
| 1855: | $this->stack[0]->setAttribute($attr['name'], $attr['value']);
|
| 1856: | }
|
| 1857: | }
|
| 1858: |
|
| 1859: |
|
| 1860: | } elseif ($token['type'] === HTML5::EOF) {
|
| 1861: |
|
| 1862: | $this->generateImpliedEndTags();
|
| 1863: |
|
| 1864: |
|
| 1865: | } else {
|
| 1866: |
|
| 1867: | switch ($this->mode) {
|
| 1868: | case self::BEFOR_HEAD:
|
| 1869: | return $this->beforeHead($token);
|
| 1870: | break;
|
| 1871: | case self::IN_HEAD:
|
| 1872: | return $this->inHead($token);
|
| 1873: | break;
|
| 1874: | case self::AFTER_HEAD:
|
| 1875: | return $this->afterHead($token);
|
| 1876: | break;
|
| 1877: | case self::IN_BODY:
|
| 1878: | return $this->inBody($token);
|
| 1879: | break;
|
| 1880: | case self::IN_TABLE:
|
| 1881: | return $this->inTable($token);
|
| 1882: | break;
|
| 1883: | case self::IN_CAPTION:
|
| 1884: | return $this->inCaption($token);
|
| 1885: | break;
|
| 1886: | case self::IN_CGROUP:
|
| 1887: | return $this->inColumnGroup($token);
|
| 1888: | break;
|
| 1889: | case self::IN_TBODY:
|
| 1890: | return $this->inTableBody($token);
|
| 1891: | break;
|
| 1892: | case self::IN_ROW:
|
| 1893: | return $this->inRow($token);
|
| 1894: | break;
|
| 1895: | case self::IN_CELL:
|
| 1896: | return $this->inCell($token);
|
| 1897: | break;
|
| 1898: | case self::IN_SELECT:
|
| 1899: | return $this->inSelect($token);
|
| 1900: | break;
|
| 1901: | case self::AFTER_BODY:
|
| 1902: | return $this->afterBody($token);
|
| 1903: | break;
|
| 1904: | case self::IN_FRAME:
|
| 1905: | return $this->inFrameset($token);
|
| 1906: | break;
|
| 1907: | case self::AFTR_FRAME:
|
| 1908: | return $this->afterFrameset($token);
|
| 1909: | break;
|
| 1910: | case self::END_PHASE:
|
| 1911: | return $this->trailingEndPhase($token);
|
| 1912: | break;
|
| 1913: | }
|
| 1914: | }
|
| 1915: | }
|
| 1916: |
|
| 1917: | private function beforeHead($token)
|
| 1918: | {
|
| 1919: |
|
| 1920: |
|
| 1921: | |
| 1922: | |
| 1923: |
|
| 1924: | if ($token['type'] === HTML5::CHARACTR &&
|
| 1925: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 1926: | ) {
|
| 1927: |
|
| 1928: | $this->insertText($token['data']);
|
| 1929: |
|
| 1930: |
|
| 1931: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 1932: | |
| 1933: |
|
| 1934: | $this->insertComment($token['data']);
|
| 1935: |
|
| 1936: |
|
| 1937: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
|
| 1938: | |
| 1939: |
|
| 1940: | $element = $this->insertElement($token);
|
| 1941: |
|
| 1942: |
|
| 1943: | $this->head_pointer = $element;
|
| 1944: |
|
| 1945: |
|
| 1946: | $this->mode = self::IN_HEAD;
|
| 1947: |
|
| 1948: | |
| 1949: | |
| 1950: | |
| 1951: | |
| 1952: |
|
| 1953: | } elseif ($token['type'] === HTML5::STARTTAG ||
|
| 1954: | ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
|
| 1955: | ($token['type'] === HTML5::CHARACTR && !preg_match(
|
| 1956: | '/^[\t\n\x0b\x0c ]$/',
|
| 1957: | $token['data']
|
| 1958: | ))
|
| 1959: | ) {
|
| 1960: | |
| 1961: |
|
| 1962: | $this->beforeHead(
|
| 1963: | array(
|
| 1964: | 'name' => 'head',
|
| 1965: | 'type' => HTML5::STARTTAG,
|
| 1966: | 'attr' => array()
|
| 1967: | )
|
| 1968: | );
|
| 1969: |
|
| 1970: | return $this->inHead($token);
|
| 1971: |
|
| 1972: |
|
| 1973: | } elseif ($token['type'] === HTML5::ENDTAG) {
|
| 1974: |
|
| 1975: | }
|
| 1976: | }
|
| 1977: |
|
| 1978: | private function inHead($token)
|
| 1979: | {
|
| 1980: |
|
| 1981: |
|
| 1982: | |
| 1983: | |
| 1984: | |
| 1985: | |
| 1986: | |
| 1987: | |
| 1988: |
|
| 1989: | if (($token['type'] === HTML5::CHARACTR &&
|
| 1990: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
|
| 1991: | $token['type'] === HTML5::CHARACTR && in_array(
|
| 1992: | end($this->stack)->nodeName,
|
| 1993: | array('title', 'style', 'script')
|
| 1994: | ))
|
| 1995: | ) {
|
| 1996: |
|
| 1997: | $this->insertText($token['data']);
|
| 1998: |
|
| 1999: |
|
| 2000: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 2001: | |
| 2002: |
|
| 2003: | $this->insertComment($token['data']);
|
| 2004: |
|
| 2005: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 2006: | in_array($token['name'], array('title', 'style', 'script'))
|
| 2007: | ) {
|
| 2008: | array_pop($this->stack);
|
| 2009: | return HTML5::PCDATA;
|
| 2010: |
|
| 2011: |
|
| 2012: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
|
| 2013: | |
| 2014: | |
| 2015: |
|
| 2016: | if ($this->head_pointer !== null) {
|
| 2017: | $element = $this->insertElement($token, false);
|
| 2018: | $this->head_pointer->appendChild($element);
|
| 2019: |
|
| 2020: | } else {
|
| 2021: | $element = $this->insertElement($token);
|
| 2022: | }
|
| 2023: |
|
| 2024: |
|
| 2025: | return HTML5::RCDATA;
|
| 2026: |
|
| 2027: |
|
| 2028: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
|
| 2029: | |
| 2030: | |
| 2031: |
|
| 2032: | if ($this->head_pointer !== null) {
|
| 2033: | $element = $this->insertElement($token, false);
|
| 2034: | $this->head_pointer->appendChild($element);
|
| 2035: |
|
| 2036: | } else {
|
| 2037: | $this->insertElement($token);
|
| 2038: | }
|
| 2039: |
|
| 2040: |
|
| 2041: | return HTML5::CDATA;
|
| 2042: |
|
| 2043: |
|
| 2044: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
|
| 2045: |
|
| 2046: | $element = $this->insertElement($token, false);
|
| 2047: | $this->head_pointer->appendChild($element);
|
| 2048: |
|
| 2049: |
|
| 2050: | return HTML5::CDATA;
|
| 2051: |
|
| 2052: |
|
| 2053: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
| 2054: | $token['name'],
|
| 2055: | array('base', 'link', 'meta')
|
| 2056: | )
|
| 2057: | ) {
|
| 2058: | |
| 2059: | |
| 2060: |
|
| 2061: | if ($this->head_pointer !== null) {
|
| 2062: | $element = $this->insertElement($token, false);
|
| 2063: | $this->head_pointer->appendChild($element);
|
| 2064: | array_pop($this->stack);
|
| 2065: |
|
| 2066: | } else {
|
| 2067: | $this->insertElement($token);
|
| 2068: | }
|
| 2069: |
|
| 2070: |
|
| 2071: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
|
| 2072: | |
| 2073: |
|
| 2074: | if ($this->head_pointer->isSameNode(end($this->stack))) {
|
| 2075: | array_pop($this->stack);
|
| 2076: |
|
| 2077: |
|
| 2078: | } else {
|
| 2079: |
|
| 2080: | }
|
| 2081: |
|
| 2082: |
|
| 2083: | $this->mode = self::AFTER_HEAD;
|
| 2084: |
|
| 2085: |
|
| 2086: | } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
|
| 2087: | ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')
|
| 2088: | ) {
|
| 2089: |
|
| 2090: |
|
| 2091: |
|
| 2092: | } else {
|
| 2093: | |
| 2094: |
|
| 2095: | if ($this->head_pointer->isSameNode(end($this->stack))) {
|
| 2096: | $this->inHead(
|
| 2097: | array(
|
| 2098: | 'name' => 'head',
|
| 2099: | 'type' => HTML5::ENDTAG
|
| 2100: | )
|
| 2101: | );
|
| 2102: |
|
| 2103: |
|
| 2104: | } else {
|
| 2105: | $this->mode = self::AFTER_HEAD;
|
| 2106: | }
|
| 2107: |
|
| 2108: |
|
| 2109: | return $this->afterHead($token);
|
| 2110: | }
|
| 2111: | }
|
| 2112: |
|
| 2113: | private function afterHead($token)
|
| 2114: | {
|
| 2115: |
|
| 2116: |
|
| 2117: | |
| 2118: | |
| 2119: |
|
| 2120: | if ($token['type'] === HTML5::CHARACTR &&
|
| 2121: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 2122: | ) {
|
| 2123: |
|
| 2124: | $this->insertText($token['data']);
|
| 2125: |
|
| 2126: |
|
| 2127: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 2128: | |
| 2129: |
|
| 2130: | $this->insertComment($token['data']);
|
| 2131: |
|
| 2132: |
|
| 2133: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
|
| 2134: |
|
| 2135: | $this->insertElement($token);
|
| 2136: |
|
| 2137: |
|
| 2138: | $this->mode = self::IN_BODY;
|
| 2139: |
|
| 2140: |
|
| 2141: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
|
| 2142: |
|
| 2143: | $this->insertElement($token);
|
| 2144: |
|
| 2145: |
|
| 2146: | $this->mode = self::IN_FRAME;
|
| 2147: |
|
| 2148: | |
| 2149: |
|
| 2150: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
| 2151: | $token['name'],
|
| 2152: | array('base', 'link', 'meta', 'script', 'style', 'title')
|
| 2153: | )
|
| 2154: | ) {
|
| 2155: | |
| 2156: |
|
| 2157: | $this->mode = self::IN_HEAD;
|
| 2158: | return $this->inHead($token);
|
| 2159: |
|
| 2160: |
|
| 2161: | } else {
|
| 2162: | |
| 2163: |
|
| 2164: | $this->afterHead(
|
| 2165: | array(
|
| 2166: | 'name' => 'body',
|
| 2167: | 'type' => HTML5::STARTTAG,
|
| 2168: | 'attr' => array()
|
| 2169: | )
|
| 2170: | );
|
| 2171: |
|
| 2172: | return $this->inBody($token);
|
| 2173: | }
|
| 2174: | }
|
| 2175: |
|
| 2176: | private function inBody($token)
|
| 2177: | {
|
| 2178: |
|
| 2179: |
|
| 2180: | switch ($token['type']) {
|
| 2181: |
|
| 2182: | case HTML5::CHARACTR:
|
| 2183: |
|
| 2184: | $this->reconstructActiveFormattingElements();
|
| 2185: |
|
| 2186: |
|
| 2187: | $this->insertText($token['data']);
|
| 2188: | break;
|
| 2189: |
|
| 2190: |
|
| 2191: | case HTML5::COMMENT:
|
| 2192: | |
| 2193: |
|
| 2194: | $this->insertComment($token['data']);
|
| 2195: | break;
|
| 2196: |
|
| 2197: | case HTML5::STARTTAG:
|
| 2198: | switch ($token['name']) {
|
| 2199: | |
| 2200: |
|
| 2201: | case 'script':
|
| 2202: | case 'style':
|
| 2203: | |
| 2204: |
|
| 2205: | return $this->inHead($token);
|
| 2206: | break;
|
| 2207: |
|
| 2208: | |
| 2209: |
|
| 2210: | case 'base':
|
| 2211: | case 'link':
|
| 2212: | case 'meta':
|
| 2213: | case 'title':
|
| 2214: | |
| 2215: |
|
| 2216: | return $this->inHead($token);
|
| 2217: | break;
|
| 2218: |
|
| 2219: |
|
| 2220: | case 'body':
|
| 2221: | |
| 2222: | |
| 2223: | |
| 2224: |
|
| 2225: | if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
|
| 2226: |
|
| 2227: |
|
| 2228: | |
| 2229: | |
| 2230: | |
| 2231: | |
| 2232: |
|
| 2233: | } else {
|
| 2234: | foreach ($token['attr'] as $attr) {
|
| 2235: | if (!$this->stack[1]->hasAttribute($attr['name'])) {
|
| 2236: | $this->stack[1]->setAttribute($attr['name'], $attr['value']);
|
| 2237: | }
|
| 2238: | }
|
| 2239: | }
|
| 2240: | break;
|
| 2241: |
|
| 2242: | |
| 2243: | |
| 2244: |
|
| 2245: | case 'address':
|
| 2246: | case 'blockquote':
|
| 2247: | case 'center':
|
| 2248: | case 'dir':
|
| 2249: | case 'div':
|
| 2250: | case 'dl':
|
| 2251: | case 'fieldset':
|
| 2252: | case 'listing':
|
| 2253: | case 'menu':
|
| 2254: | case 'ol':
|
| 2255: | case 'p':
|
| 2256: | case 'ul':
|
| 2257: | |
| 2258: | |
| 2259: |
|
| 2260: | if ($this->elementInScope('p')) {
|
| 2261: | $this->emitToken(
|
| 2262: | array(
|
| 2263: | 'name' => 'p',
|
| 2264: | 'type' => HTML5::ENDTAG
|
| 2265: | )
|
| 2266: | );
|
| 2267: | }
|
| 2268: |
|
| 2269: |
|
| 2270: | $this->insertElement($token);
|
| 2271: | break;
|
| 2272: |
|
| 2273: |
|
| 2274: | case 'form':
|
| 2275: | |
| 2276: |
|
| 2277: | if ($this->form_pointer !== null) {
|
| 2278: |
|
| 2279: |
|
| 2280: |
|
| 2281: | } else {
|
| 2282: | |
| 2283: | |
| 2284: |
|
| 2285: | if ($this->elementInScope('p')) {
|
| 2286: | $this->emitToken(
|
| 2287: | array(
|
| 2288: | 'name' => 'p',
|
| 2289: | 'type' => HTML5::ENDTAG
|
| 2290: | )
|
| 2291: | );
|
| 2292: | }
|
| 2293: |
|
| 2294: | |
| 2295: |
|
| 2296: | $element = $this->insertElement($token);
|
| 2297: | $this->form_pointer = $element;
|
| 2298: | }
|
| 2299: | break;
|
| 2300: |
|
| 2301: |
|
| 2302: | case 'li':
|
| 2303: | case 'dd':
|
| 2304: | case 'dt':
|
| 2305: | |
| 2306: | |
| 2307: |
|
| 2308: | if ($this->elementInScope('p')) {
|
| 2309: | $this->emitToken(
|
| 2310: | array(
|
| 2311: | 'name' => 'p',
|
| 2312: | 'type' => HTML5::ENDTAG
|
| 2313: | )
|
| 2314: | );
|
| 2315: | }
|
| 2316: |
|
| 2317: | $stack_length = count($this->stack) - 1;
|
| 2318: |
|
| 2319: | for ($n = $stack_length; 0 <= $n; $n--) {
|
| 2320: | |
| 2321: |
|
| 2322: | $stop = false;
|
| 2323: | $node = $this->stack[$n];
|
| 2324: | $cat = $this->getElementCategory($node->tagName);
|
| 2325: |
|
| 2326: | |
| 2327: | |
| 2328: |
|
| 2329: | if ($token['name'] === $node->tagName || ($token['name'] !== 'li'
|
| 2330: | && ($node->tagName === 'dd' || $node->tagName === 'dt'))
|
| 2331: | ) {
|
| 2332: | for ($x = $stack_length; $x >= $n; $x--) {
|
| 2333: | array_pop($this->stack);
|
| 2334: | }
|
| 2335: |
|
| 2336: | break;
|
| 2337: | }
|
| 2338: |
|
| 2339: | |
| 2340: | |
| 2341: |
|
| 2342: | if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
|
| 2343: | $node->tagName !== 'address' && $node->tagName !== 'div'
|
| 2344: | ) {
|
| 2345: | break;
|
| 2346: | }
|
| 2347: | }
|
| 2348: |
|
| 2349: | |
| 2350: |
|
| 2351: | $this->insertElement($token);
|
| 2352: | break;
|
| 2353: |
|
| 2354: |
|
| 2355: | case 'plaintext':
|
| 2356: | |
| 2357: | |
| 2358: |
|
| 2359: | if ($this->elementInScope('p')) {
|
| 2360: | $this->emitToken(
|
| 2361: | array(
|
| 2362: | 'name' => 'p',
|
| 2363: | 'type' => HTML5::ENDTAG
|
| 2364: | )
|
| 2365: | );
|
| 2366: | }
|
| 2367: |
|
| 2368: |
|
| 2369: | $this->insertElement($token);
|
| 2370: |
|
| 2371: | return HTML5::PLAINTEXT;
|
| 2372: | break;
|
| 2373: |
|
| 2374: | |
| 2375: |
|
| 2376: | case 'h1':
|
| 2377: | case 'h2':
|
| 2378: | case 'h3':
|
| 2379: | case 'h4':
|
| 2380: | case 'h5':
|
| 2381: | case 'h6':
|
| 2382: | |
| 2383: |
|
| 2384: | if ($this->elementInScope('p')) {
|
| 2385: | $this->emitToken(
|
| 2386: | array(
|
| 2387: | 'name' => 'p',
|
| 2388: | 'type' => HTML5::ENDTAG
|
| 2389: | )
|
| 2390: | );
|
| 2391: | }
|
| 2392: |
|
| 2393: | |
| 2394: | |
| 2395: | |
| 2396: | |
| 2397: |
|
| 2398: | while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
|
| 2399: | array_pop($this->stack);
|
| 2400: | }
|
| 2401: |
|
| 2402: |
|
| 2403: | $this->insertElement($token);
|
| 2404: | break;
|
| 2405: |
|
| 2406: |
|
| 2407: | case 'a':
|
| 2408: | |
| 2409: | |
| 2410: | |
| 2411: | |
| 2412: | |
| 2413: | |
| 2414: | |
| 2415: | |
| 2416: |
|
| 2417: | $leng = count($this->a_formatting);
|
| 2418: |
|
| 2419: | for ($n = $leng - 1; $n >= 0; $n--) {
|
| 2420: | if ($this->a_formatting[$n] === self::MARKER) {
|
| 2421: | break;
|
| 2422: |
|
| 2423: | } elseif ($this->a_formatting[$n]->nodeName === 'a') {
|
| 2424: | $this->emitToken(
|
| 2425: | array(
|
| 2426: | 'name' => 'a',
|
| 2427: | 'type' => HTML5::ENDTAG
|
| 2428: | )
|
| 2429: | );
|
| 2430: | break;
|
| 2431: | }
|
| 2432: | }
|
| 2433: |
|
| 2434: |
|
| 2435: | $this->reconstructActiveFormattingElements();
|
| 2436: |
|
| 2437: |
|
| 2438: | $el = $this->insertElement($token);
|
| 2439: |
|
| 2440: | |
| 2441: |
|
| 2442: | $this->a_formatting[] = $el;
|
| 2443: | break;
|
| 2444: |
|
| 2445: | |
| 2446: |
|
| 2447: | case 'b':
|
| 2448: | case 'big':
|
| 2449: | case 'em':
|
| 2450: | case 'font':
|
| 2451: | case 'i':
|
| 2452: | case 'nobr':
|
| 2453: | case 's':
|
| 2454: | case 'small':
|
| 2455: | case 'strike':
|
| 2456: | case 'strong':
|
| 2457: | case 'tt':
|
| 2458: | case 'u':
|
| 2459: |
|
| 2460: | $this->reconstructActiveFormattingElements();
|
| 2461: |
|
| 2462: |
|
| 2463: | $el = $this->insertElement($token);
|
| 2464: |
|
| 2465: | |
| 2466: |
|
| 2467: | $this->a_formatting[] = $el;
|
| 2468: | break;
|
| 2469: |
|
| 2470: |
|
| 2471: | case 'button':
|
| 2472: | |
| 2473: | |
| 2474: | |
| 2475: |
|
| 2476: | if ($this->elementInScope('button')) {
|
| 2477: | $this->inBody(
|
| 2478: | array(
|
| 2479: | 'name' => 'button',
|
| 2480: | 'type' => HTML5::ENDTAG
|
| 2481: | )
|
| 2482: | );
|
| 2483: | }
|
| 2484: |
|
| 2485: |
|
| 2486: | $this->reconstructActiveFormattingElements();
|
| 2487: |
|
| 2488: |
|
| 2489: | $this->insertElement($token);
|
| 2490: |
|
| 2491: | |
| 2492: |
|
| 2493: | $this->a_formatting[] = self::MARKER;
|
| 2494: | break;
|
| 2495: |
|
| 2496: |
|
| 2497: | case 'marquee':
|
| 2498: | case 'object':
|
| 2499: |
|
| 2500: | $this->reconstructActiveFormattingElements();
|
| 2501: |
|
| 2502: |
|
| 2503: | $this->insertElement($token);
|
| 2504: |
|
| 2505: | |
| 2506: |
|
| 2507: | $this->a_formatting[] = self::MARKER;
|
| 2508: | break;
|
| 2509: |
|
| 2510: |
|
| 2511: | case 'xmp':
|
| 2512: |
|
| 2513: | $this->reconstructActiveFormattingElements();
|
| 2514: |
|
| 2515: |
|
| 2516: | $this->insertElement($token);
|
| 2517: |
|
| 2518: |
|
| 2519: | return HTML5::CDATA;
|
| 2520: | break;
|
| 2521: |
|
| 2522: |
|
| 2523: | case 'table':
|
| 2524: | |
| 2525: |
|
| 2526: | if ($this->elementInScope('p')) {
|
| 2527: | $this->emitToken(
|
| 2528: | array(
|
| 2529: | 'name' => 'p',
|
| 2530: | 'type' => HTML5::ENDTAG
|
| 2531: | )
|
| 2532: | );
|
| 2533: | }
|
| 2534: |
|
| 2535: |
|
| 2536: | $this->insertElement($token);
|
| 2537: |
|
| 2538: |
|
| 2539: | $this->mode = self::IN_TABLE;
|
| 2540: | break;
|
| 2541: |
|
| 2542: | |
| 2543: |
|
| 2544: | case 'area':
|
| 2545: | case 'basefont':
|
| 2546: | case 'bgsound':
|
| 2547: | case 'br':
|
| 2548: | case 'embed':
|
| 2549: | case 'img':
|
| 2550: | case 'param':
|
| 2551: | case 'spacer':
|
| 2552: | case 'wbr':
|
| 2553: |
|
| 2554: | $this->reconstructActiveFormattingElements();
|
| 2555: |
|
| 2556: |
|
| 2557: | $this->insertElement($token);
|
| 2558: |
|
| 2559: |
|
| 2560: | array_pop($this->stack);
|
| 2561: | break;
|
| 2562: |
|
| 2563: |
|
| 2564: | case 'hr':
|
| 2565: | |
| 2566: |
|
| 2567: | if ($this->elementInScope('p')) {
|
| 2568: | $this->emitToken(
|
| 2569: | array(
|
| 2570: | 'name' => 'p',
|
| 2571: | 'type' => HTML5::ENDTAG
|
| 2572: | )
|
| 2573: | );
|
| 2574: | }
|
| 2575: |
|
| 2576: |
|
| 2577: | $this->insertElement($token);
|
| 2578: |
|
| 2579: |
|
| 2580: | array_pop($this->stack);
|
| 2581: | break;
|
| 2582: |
|
| 2583: |
|
| 2584: | case 'image':
|
| 2585: | |
| 2586: |
|
| 2587: | $token['name'] = 'img';
|
| 2588: | return $this->inBody($token);
|
| 2589: | break;
|
| 2590: |
|
| 2591: |
|
| 2592: | case 'input':
|
| 2593: |
|
| 2594: | $this->reconstructActiveFormattingElements();
|
| 2595: |
|
| 2596: |
|
| 2597: | $element = $this->insertElement($token, false);
|
| 2598: |
|
| 2599: | |
| 2600: | |
| 2601: |
|
| 2602: | $this->form_pointer !== null
|
| 2603: | ? $this->form_pointer->appendChild($element)
|
| 2604: | : end($this->stack)->appendChild($element);
|
| 2605: |
|
| 2606: |
|
| 2607: | array_pop($this->stack);
|
| 2608: | break;
|
| 2609: |
|
| 2610: |
|
| 2611: | case 'isindex':
|
| 2612: |
|
| 2613: |
|
| 2614: |
|
| 2615: | |
| 2616: |
|
| 2617: | if ($this->form_pointer === null) {
|
| 2618: | |
| 2619: |
|
| 2620: | $this->inBody(
|
| 2621: | array(
|
| 2622: | 'name' => 'body',
|
| 2623: | 'type' => HTML5::STARTTAG,
|
| 2624: | 'attr' => array()
|
| 2625: | )
|
| 2626: | );
|
| 2627: |
|
| 2628: | |
| 2629: |
|
| 2630: | $this->inBody(
|
| 2631: | array(
|
| 2632: | 'name' => 'hr',
|
| 2633: | 'type' => HTML5::STARTTAG,
|
| 2634: | 'attr' => array()
|
| 2635: | )
|
| 2636: | );
|
| 2637: |
|
| 2638: | |
| 2639: |
|
| 2640: | $this->inBody(
|
| 2641: | array(
|
| 2642: | 'name' => 'p',
|
| 2643: | 'type' => HTML5::STARTTAG,
|
| 2644: | 'attr' => array()
|
| 2645: | )
|
| 2646: | );
|
| 2647: |
|
| 2648: | |
| 2649: |
|
| 2650: | $this->inBody(
|
| 2651: | array(
|
| 2652: | 'name' => 'label',
|
| 2653: | 'type' => HTML5::STARTTAG,
|
| 2654: | 'attr' => array()
|
| 2655: | )
|
| 2656: | );
|
| 2657: |
|
| 2658: |
|
| 2659: | $this->insertText(
|
| 2660: | 'This is a searchable index. ' .
|
| 2661: | 'Insert your search keywords here: '
|
| 2662: | );
|
| 2663: |
|
| 2664: | |
| 2665: | |
| 2666: | |
| 2667: |
|
| 2668: | $attr = $token['attr'];
|
| 2669: | $attr[] = array('name' => 'name', 'value' => 'isindex');
|
| 2670: |
|
| 2671: | $this->inBody(
|
| 2672: | array(
|
| 2673: | 'name' => 'input',
|
| 2674: | 'type' => HTML5::STARTTAG,
|
| 2675: | 'attr' => $attr
|
| 2676: | )
|
| 2677: | );
|
| 2678: |
|
| 2679: | |
| 2680: |
|
| 2681: | $this->insertText(
|
| 2682: | 'This is a searchable index. ' .
|
| 2683: | 'Insert your search keywords here: '
|
| 2684: | );
|
| 2685: |
|
| 2686: | |
| 2687: |
|
| 2688: | $this->inBody(
|
| 2689: | array(
|
| 2690: | 'name' => 'label',
|
| 2691: | 'type' => HTML5::ENDTAG
|
| 2692: | )
|
| 2693: | );
|
| 2694: |
|
| 2695: | |
| 2696: |
|
| 2697: | $this->inBody(
|
| 2698: | array(
|
| 2699: | 'name' => 'p',
|
| 2700: | 'type' => HTML5::ENDTAG
|
| 2701: | )
|
| 2702: | );
|
| 2703: |
|
| 2704: | |
| 2705: |
|
| 2706: | $this->inBody(
|
| 2707: | array(
|
| 2708: | 'name' => 'hr',
|
| 2709: | 'type' => HTML5::ENDTAG
|
| 2710: | )
|
| 2711: | );
|
| 2712: |
|
| 2713: | |
| 2714: |
|
| 2715: | $this->inBody(
|
| 2716: | array(
|
| 2717: | 'name' => 'form',
|
| 2718: | 'type' => HTML5::ENDTAG
|
| 2719: | )
|
| 2720: | );
|
| 2721: | }
|
| 2722: | break;
|
| 2723: |
|
| 2724: |
|
| 2725: | case 'textarea':
|
| 2726: | $this->insertElement($token);
|
| 2727: |
|
| 2728: | |
| 2729: |
|
| 2730: | return HTML5::RCDATA;
|
| 2731: | break;
|
| 2732: |
|
| 2733: | |
| 2734: |
|
| 2735: | case 'iframe':
|
| 2736: | case 'noembed':
|
| 2737: | case 'noframes':
|
| 2738: | $this->insertElement($token);
|
| 2739: |
|
| 2740: |
|
| 2741: | return HTML5::CDATA;
|
| 2742: | break;
|
| 2743: |
|
| 2744: |
|
| 2745: | case 'select':
|
| 2746: |
|
| 2747: | $this->reconstructActiveFormattingElements();
|
| 2748: |
|
| 2749: |
|
| 2750: | $this->insertElement($token);
|
| 2751: |
|
| 2752: |
|
| 2753: | $this->mode = self::IN_SELECT;
|
| 2754: | break;
|
| 2755: |
|
| 2756: | |
| 2757: | |
| 2758: |
|
| 2759: | case 'caption':
|
| 2760: | case 'col':
|
| 2761: | case 'colgroup':
|
| 2762: | case 'frame':
|
| 2763: | case 'frameset':
|
| 2764: | case 'head':
|
| 2765: | case 'option':
|
| 2766: | case 'optgroup':
|
| 2767: | case 'tbody':
|
| 2768: | case 'td':
|
| 2769: | case 'tfoot':
|
| 2770: | case 'th':
|
| 2771: | case 'thead':
|
| 2772: | case 'tr':
|
| 2773: |
|
| 2774: | break;
|
| 2775: |
|
| 2776: | |
| 2777: | |
| 2778: |
|
| 2779: | case 'event-source':
|
| 2780: | case 'section':
|
| 2781: | case 'nav':
|
| 2782: | case 'article':
|
| 2783: | case 'aside':
|
| 2784: | case 'header':
|
| 2785: | case 'footer':
|
| 2786: | case 'datagrid':
|
| 2787: | case 'command':
|
| 2788: |
|
| 2789: | break;
|
| 2790: |
|
| 2791: |
|
| 2792: | default:
|
| 2793: |
|
| 2794: | $this->reconstructActiveFormattingElements();
|
| 2795: |
|
| 2796: | $this->insertElement($token, true, true);
|
| 2797: | break;
|
| 2798: | }
|
| 2799: | break;
|
| 2800: |
|
| 2801: | case HTML5::ENDTAG:
|
| 2802: | switch ($token['name']) {
|
| 2803: |
|
| 2804: | case 'body':
|
| 2805: | |
| 2806: | |
| 2807: |
|
| 2808: | if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
|
| 2809: |
|
| 2810: |
|
| 2811: | |
| 2812: |
|
| 2813: | } elseif (end($this->stack)->nodeName !== 'body') {
|
| 2814: |
|
| 2815: | }
|
| 2816: |
|
| 2817: |
|
| 2818: | $this->mode = self::AFTER_BODY;
|
| 2819: | break;
|
| 2820: |
|
| 2821: |
|
| 2822: | case 'html':
|
| 2823: | |
| 2824: | |
| 2825: |
|
| 2826: | $this->inBody(
|
| 2827: | array(
|
| 2828: | 'name' => 'body',
|
| 2829: | 'type' => HTML5::ENDTAG
|
| 2830: | )
|
| 2831: | );
|
| 2832: |
|
| 2833: | return $this->afterBody($token);
|
| 2834: | break;
|
| 2835: |
|
| 2836: | |
| 2837: | |
| 2838: |
|
| 2839: | case 'address':
|
| 2840: | case 'blockquote':
|
| 2841: | case 'center':
|
| 2842: | case 'dir':
|
| 2843: | case 'div':
|
| 2844: | case 'dl':
|
| 2845: | case 'fieldset':
|
| 2846: | case 'listing':
|
| 2847: | case 'menu':
|
| 2848: | case 'ol':
|
| 2849: | case 'pre':
|
| 2850: | case 'ul':
|
| 2851: | |
| 2852: | |
| 2853: |
|
| 2854: | if ($this->elementInScope($token['name'])) {
|
| 2855: | $this->generateImpliedEndTags();
|
| 2856: |
|
| 2857: | |
| 2858: | |
| 2859: |
|
| 2860: |
|
| 2861: |
|
| 2862: | |
| 2863: | |
| 2864: | |
| 2865: |
|
| 2866: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 2867: | if ($this->stack[$n]->nodeName === $token['name']) {
|
| 2868: | $n = -1;
|
| 2869: | }
|
| 2870: |
|
| 2871: | array_pop($this->stack);
|
| 2872: | }
|
| 2873: | }
|
| 2874: | break;
|
| 2875: |
|
| 2876: |
|
| 2877: | case 'form':
|
| 2878: | |
| 2879: | |
| 2880: |
|
| 2881: | if ($this->elementInScope($token['name'])) {
|
| 2882: | $this->generateImpliedEndTags();
|
| 2883: |
|
| 2884: | }
|
| 2885: |
|
| 2886: | if (end($this->stack)->nodeName !== $token['name']) {
|
| 2887: | |
| 2888: | |
| 2889: |
|
| 2890: |
|
| 2891: |
|
| 2892: | } else {
|
| 2893: | |
| 2894: | |
| 2895: |
|
| 2896: | array_pop($this->stack);
|
| 2897: | }
|
| 2898: |
|
| 2899: |
|
| 2900: | $this->form_pointer = null;
|
| 2901: | break;
|
| 2902: |
|
| 2903: |
|
| 2904: | case 'p':
|
| 2905: | |
| 2906: |
|
| 2907: | if ($this->elementInScope('p')) {
|
| 2908: | $this->generateImpliedEndTags(array('p'));
|
| 2909: |
|
| 2910: | |
| 2911: |
|
| 2912: |
|
| 2913: |
|
| 2914: | |
| 2915: | |
| 2916: |
|
| 2917: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 2918: | if ($this->elementInScope('p')) {
|
| 2919: | array_pop($this->stack);
|
| 2920: |
|
| 2921: | } else {
|
| 2922: | break;
|
| 2923: | }
|
| 2924: | }
|
| 2925: | }
|
| 2926: | break;
|
| 2927: |
|
| 2928: |
|
| 2929: | case 'dd':
|
| 2930: | case 'dt':
|
| 2931: | case 'li':
|
| 2932: | |
| 2933: | |
| 2934: | |
| 2935: |
|
| 2936: | if ($this->elementInScope($token['name'])) {
|
| 2937: | $this->generateImpliedEndTags(array($token['name']));
|
| 2938: |
|
| 2939: | |
| 2940: |
|
| 2941: |
|
| 2942: |
|
| 2943: | |
| 2944: | |
| 2945: | |
| 2946: |
|
| 2947: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 2948: | if ($this->stack[$n]->nodeName === $token['name']) {
|
| 2949: | $n = -1;
|
| 2950: | }
|
| 2951: |
|
| 2952: | array_pop($this->stack);
|
| 2953: | }
|
| 2954: | }
|
| 2955: | break;
|
| 2956: |
|
| 2957: | |
| 2958: |
|
| 2959: | case 'h1':
|
| 2960: | case 'h2':
|
| 2961: | case 'h3':
|
| 2962: | case 'h4':
|
| 2963: | case 'h5':
|
| 2964: | case 'h6':
|
| 2965: | $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
|
| 2966: |
|
| 2967: | |
| 2968: | |
| 2969: |
|
| 2970: | if ($this->elementInScope($elements)) {
|
| 2971: | $this->generateImpliedEndTags();
|
| 2972: |
|
| 2973: | |
| 2974: |
|
| 2975: |
|
| 2976: |
|
| 2977: | |
| 2978: | |
| 2979: | |
| 2980: |
|
| 2981: | while ($this->elementInScope($elements)) {
|
| 2982: | array_pop($this->stack);
|
| 2983: | }
|
| 2984: | }
|
| 2985: | break;
|
| 2986: |
|
| 2987: | |
| 2988: |
|
| 2989: | case 'a':
|
| 2990: | case 'b':
|
| 2991: | case 'big':
|
| 2992: | case 'em':
|
| 2993: | case 'font':
|
| 2994: | case 'i':
|
| 2995: | case 'nobr':
|
| 2996: | case 's':
|
| 2997: | case 'small':
|
| 2998: | case 'strike':
|
| 2999: | case 'strong':
|
| 3000: | case 'tt':
|
| 3001: | case 'u':
|
| 3002: | |
| 3003: | |
| 3004: | |
| 3005: | |
| 3006: | |
| 3007: | |
| 3008: |
|
| 3009: | while (true) {
|
| 3010: | for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
|
| 3011: | if ($this->a_formatting[$a] === self::MARKER) {
|
| 3012: | break;
|
| 3013: |
|
| 3014: | } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
|
| 3015: | $formatting_element = $this->a_formatting[$a];
|
| 3016: | $in_stack = in_array($formatting_element, $this->stack, true);
|
| 3017: | $fe_af_pos = $a;
|
| 3018: | break;
|
| 3019: | }
|
| 3020: | }
|
| 3021: |
|
| 3022: | |
| 3023: | |
| 3024: | |
| 3025: |
|
| 3026: | if (!isset($formatting_element) || ($in_stack &&
|
| 3027: | !$this->elementInScope($token['name']))
|
| 3028: | ) {
|
| 3029: | break;
|
| 3030: |
|
| 3031: | |
| 3032: | |
| 3033: | |
| 3034: |
|
| 3035: | } elseif (isset($formatting_element) && !$in_stack) {
|
| 3036: | unset($this->a_formatting[$fe_af_pos]);
|
| 3037: | $this->a_formatting = array_merge($this->a_formatting);
|
| 3038: | break;
|
| 3039: | }
|
| 3040: |
|
| 3041: | |
| 3042: | |
| 3043: | |
| 3044: | |
| 3045: |
|
| 3046: | $fe_s_pos = array_search($formatting_element, $this->stack, true);
|
| 3047: | $length = count($this->stack);
|
| 3048: |
|
| 3049: | for ($s = $fe_s_pos + 1; $s < $length; $s++) {
|
| 3050: | $category = $this->getElementCategory($this->stack[$s]->nodeName);
|
| 3051: |
|
| 3052: | if ($category !== self::PHRASING && $category !== self::FORMATTING) {
|
| 3053: | $furthest_block = $this->stack[$s];
|
| 3054: | }
|
| 3055: | }
|
| 3056: |
|
| 3057: | |
| 3058: | |
| 3059: | |
| 3060: | |
| 3061: | |
| 3062: |
|
| 3063: | if (!isset($furthest_block)) {
|
| 3064: | for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
|
| 3065: | array_pop($this->stack);
|
| 3066: | }
|
| 3067: |
|
| 3068: | unset($this->a_formatting[$fe_af_pos]);
|
| 3069: | $this->a_formatting = array_merge($this->a_formatting);
|
| 3070: | break;
|
| 3071: | }
|
| 3072: |
|
| 3073: | |
| 3074: | |
| 3075: |
|
| 3076: | $common_ancestor = $this->stack[$fe_s_pos - 1];
|
| 3077: |
|
| 3078: | |
| 3079: |
|
| 3080: | if ($furthest_block->parentNode !== null) {
|
| 3081: | $furthest_block->parentNode->removeChild($furthest_block);
|
| 3082: | }
|
| 3083: |
|
| 3084: | |
| 3085: | |
| 3086: | |
| 3087: |
|
| 3088: | $bookmark = $fe_af_pos;
|
| 3089: |
|
| 3090: | |
| 3091: |
|
| 3092: | $node = $furthest_block;
|
| 3093: | $last_node = $furthest_block;
|
| 3094: |
|
| 3095: | while (true) {
|
| 3096: | for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
|
| 3097: | |
| 3098: |
|
| 3099: | $node = $this->stack[$n];
|
| 3100: |
|
| 3101: | |
| 3102: | |
| 3103: | |
| 3104: |
|
| 3105: | if (!in_array($node, $this->a_formatting, true)) {
|
| 3106: | unset($this->stack[$n]);
|
| 3107: | $this->stack = array_merge($this->stack);
|
| 3108: |
|
| 3109: | } else {
|
| 3110: | break;
|
| 3111: | }
|
| 3112: | }
|
| 3113: |
|
| 3114: | |
| 3115: | |
| 3116: |
|
| 3117: | if ($node === $formatting_element) {
|
| 3118: | break;
|
| 3119: |
|
| 3120: | |
| 3121: | |
| 3122: | |
| 3123: |
|
| 3124: | } elseif ($last_node === $furthest_block) {
|
| 3125: | $bookmark = array_search($node, $this->a_formatting, true) + 1;
|
| 3126: | }
|
| 3127: |
|
| 3128: | |
| 3129: | |
| 3130: | |
| 3131: | |
| 3132: | |
| 3133: |
|
| 3134: | if ($node->hasChildNodes()) {
|
| 3135: | $clone = $node->cloneNode();
|
| 3136: | $s_pos = array_search($node, $this->stack, true);
|
| 3137: | $a_pos = array_search($node, $this->a_formatting, true);
|
| 3138: |
|
| 3139: | $this->stack[$s_pos] = $clone;
|
| 3140: | $this->a_formatting[$a_pos] = $clone;
|
| 3141: | $node = $clone;
|
| 3142: | }
|
| 3143: |
|
| 3144: | |
| 3145: |
|
| 3146: | if ($last_node->parentNode !== null) {
|
| 3147: | $last_node->parentNode->removeChild($last_node);
|
| 3148: | }
|
| 3149: |
|
| 3150: | $node->appendChild($last_node);
|
| 3151: |
|
| 3152: |
|
| 3153: | $last_node = $node;
|
| 3154: | }
|
| 3155: |
|
| 3156: | |
| 3157: | |
| 3158: | |
| 3159: |
|
| 3160: | if ($last_node->parentNode !== null) {
|
| 3161: | $last_node->parentNode->removeChild($last_node);
|
| 3162: | }
|
| 3163: |
|
| 3164: | $common_ancestor->appendChild($last_node);
|
| 3165: |
|
| 3166: | |
| 3167: |
|
| 3168: | $clone = $formatting_element->cloneNode();
|
| 3169: |
|
| 3170: | |
| 3171: | |
| 3172: |
|
| 3173: | while ($furthest_block->hasChildNodes()) {
|
| 3174: | $child = $furthest_block->firstChild;
|
| 3175: | $furthest_block->removeChild($child);
|
| 3176: | $clone->appendChild($child);
|
| 3177: | }
|
| 3178: |
|
| 3179: |
|
| 3180: | $furthest_block->appendChild($clone);
|
| 3181: |
|
| 3182: | |
| 3183: | |
| 3184: | |
| 3185: |
|
| 3186: | $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
|
| 3187: | unset($this->a_formatting[$fe_af_pos]);
|
| 3188: | $this->a_formatting = array_merge($this->a_formatting);
|
| 3189: |
|
| 3190: | $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
|
| 3191: | $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
|
| 3192: | $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
|
| 3193: |
|
| 3194: | |
| 3195: | |
| 3196: | |
| 3197: | |
| 3198: |
|
| 3199: | $fe_s_pos = array_search($formatting_element, $this->stack, true);
|
| 3200: | $fb_s_pos = array_search($furthest_block, $this->stack, true);
|
| 3201: | unset($this->stack[$fe_s_pos]);
|
| 3202: |
|
| 3203: | $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
|
| 3204: | $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
|
| 3205: | $this->stack = array_merge($s_part1, array($clone), $s_part2);
|
| 3206: |
|
| 3207: |
|
| 3208: | unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
|
| 3209: | }
|
| 3210: | break;
|
| 3211: |
|
| 3212: | |
| 3213: |
|
| 3214: | case 'button':
|
| 3215: | case 'marquee':
|
| 3216: | case 'object':
|
| 3217: | |
| 3218: | |
| 3219: |
|
| 3220: | if ($this->elementInScope($token['name'])) {
|
| 3221: | $this->generateImpliedEndTags();
|
| 3222: |
|
| 3223: | |
| 3224: |
|
| 3225: |
|
| 3226: |
|
| 3227: | |
| 3228: | |
| 3229: | |
| 3230: | |
| 3231: |
|
| 3232: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 3233: | if ($this->stack[$n]->nodeName === $token['name']) {
|
| 3234: | $n = -1;
|
| 3235: | }
|
| 3236: |
|
| 3237: | array_pop($this->stack);
|
| 3238: | }
|
| 3239: |
|
| 3240: | $marker = end(array_keys($this->a_formatting, self::MARKER, true));
|
| 3241: |
|
| 3242: | for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
|
| 3243: | array_pop($this->a_formatting);
|
| 3244: | }
|
| 3245: | }
|
| 3246: | break;
|
| 3247: |
|
| 3248: | |
| 3249: | |
| 3250: | |
| 3251: |
|
| 3252: | case 'area':
|
| 3253: | case 'basefont':
|
| 3254: | case 'bgsound':
|
| 3255: | case 'br':
|
| 3256: | case 'embed':
|
| 3257: | case 'hr':
|
| 3258: | case 'iframe':
|
| 3259: | case 'image':
|
| 3260: | case 'img':
|
| 3261: | case 'input':
|
| 3262: | case 'isindex':
|
| 3263: | case 'noembed':
|
| 3264: | case 'noframes':
|
| 3265: | case 'param':
|
| 3266: | case 'select':
|
| 3267: | case 'spacer':
|
| 3268: | case 'table':
|
| 3269: | case 'textarea':
|
| 3270: | case 'wbr':
|
| 3271: |
|
| 3272: | break;
|
| 3273: |
|
| 3274: |
|
| 3275: | default:
|
| 3276: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 3277: | |
| 3278: |
|
| 3279: | $node = end($this->stack);
|
| 3280: |
|
| 3281: | |
| 3282: |
|
| 3283: | if ($token['name'] === $node->nodeName) {
|
| 3284: |
|
| 3285: | $this->generateImpliedEndTags();
|
| 3286: |
|
| 3287: | |
| 3288: | |
| 3289: |
|
| 3290: |
|
| 3291: |
|
| 3292: | |
| 3293: |
|
| 3294: | for ($x = count($this->stack) - $n; $x >= $n; $x--) {
|
| 3295: | array_pop($this->stack);
|
| 3296: | }
|
| 3297: |
|
| 3298: | } else {
|
| 3299: | $category = $this->getElementCategory($node);
|
| 3300: |
|
| 3301: | if ($category !== self::SPECIAL && $category !== self::SCOPING) {
|
| 3302: | |
| 3303: | |
| 3304: | |
| 3305: |
|
| 3306: | return false;
|
| 3307: | }
|
| 3308: | }
|
| 3309: | }
|
| 3310: | break;
|
| 3311: | }
|
| 3312: | break;
|
| 3313: | }
|
| 3314: | }
|
| 3315: |
|
| 3316: | private function inTable($token)
|
| 3317: | {
|
| 3318: | $clear = array('html', 'table');
|
| 3319: |
|
| 3320: | |
| 3321: | |
| 3322: |
|
| 3323: | if ($token['type'] === HTML5::CHARACTR &&
|
| 3324: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 3325: | ) {
|
| 3326: |
|
| 3327: | $text = $this->dom->createTextNode($token['data']);
|
| 3328: | end($this->stack)->appendChild($text);
|
| 3329: |
|
| 3330: |
|
| 3331: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 3332: | |
| 3333: |
|
| 3334: | $comment = $this->dom->createComment($token['data']);
|
| 3335: | end($this->stack)->appendChild($comment);
|
| 3336: |
|
| 3337: |
|
| 3338: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 3339: | $token['name'] === 'caption'
|
| 3340: | ) {
|
| 3341: |
|
| 3342: | $this->clearStackToTableContext($clear);
|
| 3343: |
|
| 3344: | |
| 3345: |
|
| 3346: | $this->a_formatting[] = self::MARKER;
|
| 3347: |
|
| 3348: | |
| 3349: |
|
| 3350: | $this->insertElement($token);
|
| 3351: | $this->mode = self::IN_CAPTION;
|
| 3352: |
|
| 3353: |
|
| 3354: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 3355: | $token['name'] === 'colgroup'
|
| 3356: | ) {
|
| 3357: |
|
| 3358: | $this->clearStackToTableContext($clear);
|
| 3359: |
|
| 3360: | |
| 3361: |
|
| 3362: | $this->insertElement($token);
|
| 3363: | $this->mode = self::IN_CGROUP;
|
| 3364: |
|
| 3365: |
|
| 3366: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 3367: | $token['name'] === 'col'
|
| 3368: | ) {
|
| 3369: | $this->inTable(
|
| 3370: | array(
|
| 3371: | 'name' => 'colgroup',
|
| 3372: | 'type' => HTML5::STARTTAG,
|
| 3373: | 'attr' => array()
|
| 3374: | )
|
| 3375: | );
|
| 3376: |
|
| 3377: | $this->inColumnGroup($token);
|
| 3378: |
|
| 3379: |
|
| 3380: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
| 3381: | $token['name'],
|
| 3382: | array('tbody', 'tfoot', 'thead')
|
| 3383: | )
|
| 3384: | ) {
|
| 3385: |
|
| 3386: | $this->clearStackToTableContext($clear);
|
| 3387: |
|
| 3388: | |
| 3389: |
|
| 3390: | $this->insertElement($token);
|
| 3391: | $this->mode = self::IN_TBODY;
|
| 3392: |
|
| 3393: |
|
| 3394: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 3395: | in_array($token['name'], array('td', 'th', 'tr'))
|
| 3396: | ) {
|
| 3397: | |
| 3398: |
|
| 3399: | $this->inTable(
|
| 3400: | array(
|
| 3401: | 'name' => 'tbody',
|
| 3402: | 'type' => HTML5::STARTTAG,
|
| 3403: | 'attr' => array()
|
| 3404: | )
|
| 3405: | );
|
| 3406: |
|
| 3407: | return $this->inTableBody($token);
|
| 3408: |
|
| 3409: |
|
| 3410: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 3411: | $token['name'] === 'table'
|
| 3412: | ) {
|
| 3413: | |
| 3414: | |
| 3415: |
|
| 3416: | $this->inTable(
|
| 3417: | array(
|
| 3418: | 'name' => 'table',
|
| 3419: | 'type' => HTML5::ENDTAG
|
| 3420: | )
|
| 3421: | );
|
| 3422: |
|
| 3423: | return $this->mainPhase($token);
|
| 3424: |
|
| 3425: |
|
| 3426: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 3427: | $token['name'] === 'table'
|
| 3428: | ) {
|
| 3429: | |
| 3430: | |
| 3431: |
|
| 3432: | if (!$this->elementInScope($token['name'], true)) {
|
| 3433: | return false;
|
| 3434: |
|
| 3435: |
|
| 3436: | } else {
|
| 3437: |
|
| 3438: | $this->generateImpliedEndTags();
|
| 3439: |
|
| 3440: | |
| 3441: |
|
| 3442: |
|
| 3443: |
|
| 3444: | |
| 3445: |
|
| 3446: | while (true) {
|
| 3447: | $current = end($this->stack)->nodeName;
|
| 3448: | array_pop($this->stack);
|
| 3449: |
|
| 3450: | if ($current === 'table') {
|
| 3451: | break;
|
| 3452: | }
|
| 3453: | }
|
| 3454: |
|
| 3455: |
|
| 3456: | $this->resetInsertionMode();
|
| 3457: | }
|
| 3458: |
|
| 3459: | |
| 3460: |
|
| 3461: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
| 3462: | $token['name'],
|
| 3463: | array(
|
| 3464: | 'body',
|
| 3465: | 'caption',
|
| 3466: | 'col',
|
| 3467: | 'colgroup',
|
| 3468: | 'html',
|
| 3469: | 'tbody',
|
| 3470: | 'td',
|
| 3471: | 'tfoot',
|
| 3472: | 'th',
|
| 3473: | 'thead',
|
| 3474: | 'tr'
|
| 3475: | )
|
| 3476: | )
|
| 3477: | ) {
|
| 3478: |
|
| 3479: |
|
| 3480: |
|
| 3481: | } else {
|
| 3482: | |
| 3483: |
|
| 3484: |
|
| 3485: | |
| 3486: | |
| 3487: |
|
| 3488: | if (in_array(
|
| 3489: | end($this->stack)->nodeName,
|
| 3490: | array('table', 'tbody', 'tfoot', 'thead', 'tr')
|
| 3491: | )
|
| 3492: | ) {
|
| 3493: | |
| 3494: | |
| 3495: | |
| 3496: | |
| 3497: | |
| 3498: | |
| 3499: | |
| 3500: | |
| 3501: | |
| 3502: | |
| 3503: |
|
| 3504: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 3505: | if ($this->stack[$n]->nodeName === 'table') {
|
| 3506: | $table = $this->stack[$n];
|
| 3507: | break;
|
| 3508: | }
|
| 3509: | }
|
| 3510: |
|
| 3511: | if (isset($table) && $table->parentNode !== null) {
|
| 3512: | $this->foster_parent = $table->parentNode;
|
| 3513: |
|
| 3514: | } elseif (!isset($table)) {
|
| 3515: | $this->foster_parent = $this->stack[0];
|
| 3516: |
|
| 3517: | } elseif (isset($table) && ($table->parentNode === null ||
|
| 3518: | $table->parentNode->nodeType !== XML_ELEMENT_NODE)
|
| 3519: | ) {
|
| 3520: | $this->foster_parent = $this->stack[$n - 1];
|
| 3521: | }
|
| 3522: | }
|
| 3523: |
|
| 3524: | $this->inBody($token);
|
| 3525: | }
|
| 3526: | }
|
| 3527: |
|
| 3528: | private function inCaption($token)
|
| 3529: | {
|
| 3530: |
|
| 3531: | if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
|
| 3532: | |
| 3533: | |
| 3534: |
|
| 3535: | if (!$this->elementInScope($token['name'], true)) {
|
| 3536: |
|
| 3537: |
|
| 3538: |
|
| 3539: | } else {
|
| 3540: |
|
| 3541: | $this->generateImpliedEndTags();
|
| 3542: |
|
| 3543: | |
| 3544: |
|
| 3545: |
|
| 3546: |
|
| 3547: | |
| 3548: |
|
| 3549: | while (true) {
|
| 3550: | $node = end($this->stack)->nodeName;
|
| 3551: | array_pop($this->stack);
|
| 3552: |
|
| 3553: | if ($node === 'caption') {
|
| 3554: | break;
|
| 3555: | }
|
| 3556: | }
|
| 3557: |
|
| 3558: | |
| 3559: |
|
| 3560: | $this->clearTheActiveFormattingElementsUpToTheLastMarker();
|
| 3561: |
|
| 3562: |
|
| 3563: | $this->mode = self::IN_TABLE;
|
| 3564: | }
|
| 3565: |
|
| 3566: | |
| 3567: | |
| 3568: |
|
| 3569: | } elseif (($token['type'] === HTML5::STARTTAG && in_array(
|
| 3570: | $token['name'],
|
| 3571: | array(
|
| 3572: | 'caption',
|
| 3573: | 'col',
|
| 3574: | 'colgroup',
|
| 3575: | 'tbody',
|
| 3576: | 'td',
|
| 3577: | 'tfoot',
|
| 3578: | 'th',
|
| 3579: | 'thead',
|
| 3580: | 'tr'
|
| 3581: | )
|
| 3582: | )) || ($token['type'] === HTML5::ENDTAG &&
|
| 3583: | $token['name'] === 'table')
|
| 3584: | ) {
|
| 3585: | |
| 3586: | |
| 3587: |
|
| 3588: | $this->inCaption(
|
| 3589: | array(
|
| 3590: | 'name' => 'caption',
|
| 3591: | 'type' => HTML5::ENDTAG
|
| 3592: | )
|
| 3593: | );
|
| 3594: |
|
| 3595: | return $this->inTable($token);
|
| 3596: |
|
| 3597: | |
| 3598: |
|
| 3599: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
| 3600: | $token['name'],
|
| 3601: | array(
|
| 3602: | 'body',
|
| 3603: | 'col',
|
| 3604: | 'colgroup',
|
| 3605: | 'html',
|
| 3606: | 'tbody',
|
| 3607: | 'tfoot',
|
| 3608: | 'th',
|
| 3609: | 'thead',
|
| 3610: | 'tr'
|
| 3611: | )
|
| 3612: | )
|
| 3613: | ) {
|
| 3614: |
|
| 3615: |
|
| 3616: |
|
| 3617: | } else {
|
| 3618: |
|
| 3619: | $this->inBody($token);
|
| 3620: | }
|
| 3621: | }
|
| 3622: |
|
| 3623: | private function inColumnGroup($token)
|
| 3624: | {
|
| 3625: | |
| 3626: | |
| 3627: |
|
| 3628: | if ($token['type'] === HTML5::CHARACTR &&
|
| 3629: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 3630: | ) {
|
| 3631: |
|
| 3632: | $text = $this->dom->createTextNode($token['data']);
|
| 3633: | end($this->stack)->appendChild($text);
|
| 3634: |
|
| 3635: |
|
| 3636: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 3637: | |
| 3638: |
|
| 3639: | $comment = $this->dom->createComment($token['data']);
|
| 3640: | end($this->stack)->appendChild($comment);
|
| 3641: |
|
| 3642: |
|
| 3643: | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
|
| 3644: | |
| 3645: |
|
| 3646: | $this->insertElement($token);
|
| 3647: | array_pop($this->stack);
|
| 3648: |
|
| 3649: |
|
| 3650: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 3651: | $token['name'] === 'colgroup'
|
| 3652: | ) {
|
| 3653: | |
| 3654: |
|
| 3655: | if (end($this->stack)->nodeName === 'html') {
|
| 3656: |
|
| 3657: |
|
| 3658: | |
| 3659: | |
| 3660: |
|
| 3661: | } else {
|
| 3662: | array_pop($this->stack);
|
| 3663: | $this->mode = self::IN_TABLE;
|
| 3664: | }
|
| 3665: |
|
| 3666: |
|
| 3667: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
|
| 3668: |
|
| 3669: |
|
| 3670: |
|
| 3671: | } else {
|
| 3672: | |
| 3673: |
|
| 3674: | $this->inColumnGroup(
|
| 3675: | array(
|
| 3676: | 'name' => 'colgroup',
|
| 3677: | 'type' => HTML5::ENDTAG
|
| 3678: | )
|
| 3679: | );
|
| 3680: |
|
| 3681: | return $this->inTable($token);
|
| 3682: | }
|
| 3683: | }
|
| 3684: |
|
| 3685: | private function inTableBody($token)
|
| 3686: | {
|
| 3687: | $clear = array('tbody', 'tfoot', 'thead', 'html');
|
| 3688: |
|
| 3689: |
|
| 3690: | if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
|
| 3691: |
|
| 3692: | $this->clearStackToTableContext($clear);
|
| 3693: |
|
| 3694: | |
| 3695: |
|
| 3696: | $this->insertElement($token);
|
| 3697: | $this->mode = self::IN_ROW;
|
| 3698: |
|
| 3699: |
|
| 3700: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 3701: | ($token['name'] === 'th' || $token['name'] === 'td')
|
| 3702: | ) {
|
| 3703: | |
| 3704: |
|
| 3705: | $this->inTableBody(
|
| 3706: | array(
|
| 3707: | 'name' => 'tr',
|
| 3708: | 'type' => HTML5::STARTTAG,
|
| 3709: | 'attr' => array()
|
| 3710: | )
|
| 3711: | );
|
| 3712: |
|
| 3713: | return $this->inRow($token);
|
| 3714: |
|
| 3715: |
|
| 3716: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 3717: | in_array($token['name'], array('tbody', 'tfoot', 'thead'))
|
| 3718: | ) {
|
| 3719: | |
| 3720: | |
| 3721: |
|
| 3722: | if (!$this->elementInScope($token['name'], true)) {
|
| 3723: |
|
| 3724: |
|
| 3725: |
|
| 3726: | } else {
|
| 3727: |
|
| 3728: | $this->clearStackToTableContext($clear);
|
| 3729: |
|
| 3730: | |
| 3731: |
|
| 3732: | array_pop($this->stack);
|
| 3733: | $this->mode = self::IN_TABLE;
|
| 3734: | }
|
| 3735: |
|
| 3736: | |
| 3737: |
|
| 3738: | } elseif (($token['type'] === HTML5::STARTTAG && in_array(
|
| 3739: | $token['name'],
|
| 3740: | array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')
|
| 3741: | )) ||
|
| 3742: | ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')
|
| 3743: | ) {
|
| 3744: | |
| 3745: | |
| 3746: |
|
| 3747: | if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
|
| 3748: |
|
| 3749: |
|
| 3750: |
|
| 3751: | } else {
|
| 3752: |
|
| 3753: | $this->clearStackToTableContext($clear);
|
| 3754: |
|
| 3755: | |
| 3756: | |
| 3757: |
|
| 3758: | $this->inTableBody(
|
| 3759: | array(
|
| 3760: | 'name' => end($this->stack)->nodeName,
|
| 3761: | 'type' => HTML5::ENDTAG
|
| 3762: | )
|
| 3763: | );
|
| 3764: |
|
| 3765: | return $this->mainPhase($token);
|
| 3766: | }
|
| 3767: |
|
| 3768: | |
| 3769: |
|
| 3770: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
| 3771: | $token['name'],
|
| 3772: | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
|
| 3773: | )
|
| 3774: | ) {
|
| 3775: |
|
| 3776: |
|
| 3777: |
|
| 3778: | } else {
|
| 3779: |
|
| 3780: | $this->inTable($token);
|
| 3781: | }
|
| 3782: | }
|
| 3783: |
|
| 3784: | private function inRow($token)
|
| 3785: | {
|
| 3786: | $clear = array('tr', 'html');
|
| 3787: |
|
| 3788: |
|
| 3789: | if ($token['type'] === HTML5::STARTTAG &&
|
| 3790: | ($token['name'] === 'th' || $token['name'] === 'td')
|
| 3791: | ) {
|
| 3792: |
|
| 3793: | $this->clearStackToTableContext($clear);
|
| 3794: |
|
| 3795: | |
| 3796: |
|
| 3797: | $this->insertElement($token);
|
| 3798: | $this->mode = self::IN_CELL;
|
| 3799: |
|
| 3800: | |
| 3801: |
|
| 3802: | $this->a_formatting[] = self::MARKER;
|
| 3803: |
|
| 3804: |
|
| 3805: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
|
| 3806: | |
| 3807: | |
| 3808: |
|
| 3809: | if (!$this->elementInScope($token['name'], true)) {
|
| 3810: |
|
| 3811: |
|
| 3812: |
|
| 3813: | } else {
|
| 3814: |
|
| 3815: | $this->clearStackToTableContext($clear);
|
| 3816: |
|
| 3817: | |
| 3818: | |
| 3819: |
|
| 3820: | array_pop($this->stack);
|
| 3821: | $this->mode = self::IN_TBODY;
|
| 3822: | }
|
| 3823: |
|
| 3824: | |
| 3825: |
|
| 3826: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
| 3827: | $token['name'],
|
| 3828: | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')
|
| 3829: | )
|
| 3830: | ) {
|
| 3831: | |
| 3832: |
|
| 3833: | $this->inRow(
|
| 3834: | array(
|
| 3835: | 'name' => 'tr',
|
| 3836: | 'type' => HTML5::ENDTAG
|
| 3837: | )
|
| 3838: | );
|
| 3839: |
|
| 3840: | return $this->inCell($token);
|
| 3841: |
|
| 3842: |
|
| 3843: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 3844: | in_array($token['name'], array('tbody', 'tfoot', 'thead'))
|
| 3845: | ) {
|
| 3846: | |
| 3847: | |
| 3848: |
|
| 3849: | if (!$this->elementInScope($token['name'], true)) {
|
| 3850: |
|
| 3851: |
|
| 3852: |
|
| 3853: | } else {
|
| 3854: | |
| 3855: |
|
| 3856: | $this->inRow(
|
| 3857: | array(
|
| 3858: | 'name' => 'tr',
|
| 3859: | 'type' => HTML5::ENDTAG
|
| 3860: | )
|
| 3861: | );
|
| 3862: |
|
| 3863: | return $this->inCell($token);
|
| 3864: | }
|
| 3865: |
|
| 3866: | |
| 3867: |
|
| 3868: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
| 3869: | $token['name'],
|
| 3870: | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
|
| 3871: | )
|
| 3872: | ) {
|
| 3873: |
|
| 3874: |
|
| 3875: |
|
| 3876: | } else {
|
| 3877: |
|
| 3878: | $this->inTable($token);
|
| 3879: | }
|
| 3880: | }
|
| 3881: |
|
| 3882: | private function inCell($token)
|
| 3883: | {
|
| 3884: |
|
| 3885: | if ($token['type'] === HTML5::ENDTAG &&
|
| 3886: | ($token['name'] === 'td' || $token['name'] === 'th')
|
| 3887: | ) {
|
| 3888: | |
| 3889: | |
| 3890: |
|
| 3891: | if (!$this->elementInScope($token['name'], true)) {
|
| 3892: |
|
| 3893: |
|
| 3894: |
|
| 3895: | } else {
|
| 3896: | |
| 3897: |
|
| 3898: | $this->generateImpliedEndTags(array($token['name']));
|
| 3899: |
|
| 3900: | |
| 3901: |
|
| 3902: |
|
| 3903: |
|
| 3904: | |
| 3905: |
|
| 3906: | while (true) {
|
| 3907: | $node = end($this->stack)->nodeName;
|
| 3908: | array_pop($this->stack);
|
| 3909: |
|
| 3910: | if ($node === $token['name']) {
|
| 3911: | break;
|
| 3912: | }
|
| 3913: | }
|
| 3914: |
|
| 3915: | |
| 3916: |
|
| 3917: | $this->clearTheActiveFormattingElementsUpToTheLastMarker();
|
| 3918: |
|
| 3919: | |
| 3920: |
|
| 3921: | $this->mode = self::IN_ROW;
|
| 3922: | }
|
| 3923: |
|
| 3924: | |
| 3925: |
|
| 3926: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
| 3927: | $token['name'],
|
| 3928: | array(
|
| 3929: | 'caption',
|
| 3930: | 'col',
|
| 3931: | 'colgroup',
|
| 3932: | 'tbody',
|
| 3933: | 'td',
|
| 3934: | 'tfoot',
|
| 3935: | 'th',
|
| 3936: | 'thead',
|
| 3937: | 'tr'
|
| 3938: | )
|
| 3939: | )
|
| 3940: | ) {
|
| 3941: | |
| 3942: | |
| 3943: |
|
| 3944: | if (!$this->elementInScope(array('td', 'th'), true)) {
|
| 3945: |
|
| 3946: |
|
| 3947: | |
| 3948: |
|
| 3949: | } else {
|
| 3950: | $this->closeCell();
|
| 3951: | return $this->inRow($token);
|
| 3952: | }
|
| 3953: |
|
| 3954: | |
| 3955: |
|
| 3956: | } elseif ($token['type'] === HTML5::STARTTAG && in_array(
|
| 3957: | $token['name'],
|
| 3958: | array(
|
| 3959: | 'caption',
|
| 3960: | 'col',
|
| 3961: | 'colgroup',
|
| 3962: | 'tbody',
|
| 3963: | 'td',
|
| 3964: | 'tfoot',
|
| 3965: | 'th',
|
| 3966: | 'thead',
|
| 3967: | 'tr'
|
| 3968: | )
|
| 3969: | )
|
| 3970: | ) {
|
| 3971: | |
| 3972: | |
| 3973: |
|
| 3974: | if (!$this->elementInScope(array('td', 'th'), true)) {
|
| 3975: |
|
| 3976: |
|
| 3977: | |
| 3978: |
|
| 3979: | } else {
|
| 3980: | $this->closeCell();
|
| 3981: | return $this->inRow($token);
|
| 3982: | }
|
| 3983: |
|
| 3984: | |
| 3985: |
|
| 3986: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
| 3987: | $token['name'],
|
| 3988: | array('body', 'caption', 'col', 'colgroup', 'html')
|
| 3989: | )
|
| 3990: | ) {
|
| 3991: |
|
| 3992: |
|
| 3993: | |
| 3994: |
|
| 3995: | } elseif ($token['type'] === HTML5::ENDTAG && in_array(
|
| 3996: | $token['name'],
|
| 3997: | array('table', 'tbody', 'tfoot', 'thead', 'tr')
|
| 3998: | )
|
| 3999: | ) {
|
| 4000: | |
| 4001: | |
| 4002: | |
| 4003: |
|
| 4004: | if (!$this->elementInScope($token['name'], true)) {
|
| 4005: |
|
| 4006: |
|
| 4007: | |
| 4008: |
|
| 4009: | } else {
|
| 4010: | $this->closeCell();
|
| 4011: | return $this->inRow($token);
|
| 4012: | }
|
| 4013: |
|
| 4014: |
|
| 4015: | } else {
|
| 4016: |
|
| 4017: | $this->inBody($token);
|
| 4018: | }
|
| 4019: | }
|
| 4020: |
|
| 4021: | private function inSelect($token)
|
| 4022: | {
|
| 4023: |
|
| 4024: |
|
| 4025: |
|
| 4026: | if ($token['type'] === HTML5::CHARACTR) {
|
| 4027: |
|
| 4028: | $this->insertText($token['data']);
|
| 4029: |
|
| 4030: |
|
| 4031: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 4032: | |
| 4033: |
|
| 4034: | $this->insertComment($token['data']);
|
| 4035: |
|
| 4036: |
|
| 4037: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 4038: | $token['name'] === 'option'
|
| 4039: | ) {
|
| 4040: | |
| 4041: |
|
| 4042: | if (end($this->stack)->nodeName === 'option') {
|
| 4043: | $this->inSelect(
|
| 4044: | array(
|
| 4045: | 'name' => 'option',
|
| 4046: | 'type' => HTML5::ENDTAG
|
| 4047: | )
|
| 4048: | );
|
| 4049: | }
|
| 4050: |
|
| 4051: |
|
| 4052: | $this->insertElement($token);
|
| 4053: |
|
| 4054: |
|
| 4055: | } elseif ($token['type'] === HTML5::STARTTAG &&
|
| 4056: | $token['name'] === 'optgroup'
|
| 4057: | ) {
|
| 4058: | |
| 4059: |
|
| 4060: | if (end($this->stack)->nodeName === 'option') {
|
| 4061: | $this->inSelect(
|
| 4062: | array(
|
| 4063: | 'name' => 'option',
|
| 4064: | 'type' => HTML5::ENDTAG
|
| 4065: | )
|
| 4066: | );
|
| 4067: | }
|
| 4068: |
|
| 4069: | |
| 4070: |
|
| 4071: | if (end($this->stack)->nodeName === 'optgroup') {
|
| 4072: | $this->inSelect(
|
| 4073: | array(
|
| 4074: | 'name' => 'optgroup',
|
| 4075: | 'type' => HTML5::ENDTAG
|
| 4076: | )
|
| 4077: | );
|
| 4078: | }
|
| 4079: |
|
| 4080: |
|
| 4081: | $this->insertElement($token);
|
| 4082: |
|
| 4083: |
|
| 4084: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 4085: | $token['name'] === 'optgroup'
|
| 4086: | ) {
|
| 4087: | |
| 4088: | |
| 4089: | |
| 4090: |
|
| 4091: | $elements_in_stack = count($this->stack);
|
| 4092: |
|
| 4093: | if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
|
| 4094: | $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'
|
| 4095: | ) {
|
| 4096: | $this->inSelect(
|
| 4097: | array(
|
| 4098: | 'name' => 'option',
|
| 4099: | 'type' => HTML5::ENDTAG
|
| 4100: | )
|
| 4101: | );
|
| 4102: | }
|
| 4103: |
|
| 4104: | |
| 4105: | |
| 4106: |
|
| 4107: | if ($this->stack[$elements_in_stack - 1] === 'optgroup') {
|
| 4108: | array_pop($this->stack);
|
| 4109: | }
|
| 4110: |
|
| 4111: |
|
| 4112: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 4113: | $token['name'] === 'option'
|
| 4114: | ) {
|
| 4115: | |
| 4116: | |
| 4117: |
|
| 4118: | if (end($this->stack)->nodeName === 'option') {
|
| 4119: | array_pop($this->stack);
|
| 4120: | }
|
| 4121: |
|
| 4122: |
|
| 4123: | } elseif ($token['type'] === HTML5::ENDTAG &&
|
| 4124: | $token['name'] === 'select'
|
| 4125: | ) {
|
| 4126: | |
| 4127: | |
| 4128: |
|
| 4129: | if (!$this->elementInScope($token['name'], true)) {
|
| 4130: |
|
| 4131: |
|
| 4132: |
|
| 4133: | } else {
|
| 4134: | |
| 4135: |
|
| 4136: | while (true) {
|
| 4137: | $current = end($this->stack)->nodeName;
|
| 4138: | array_pop($this->stack);
|
| 4139: |
|
| 4140: | if ($current === 'select') {
|
| 4141: | break;
|
| 4142: | }
|
| 4143: | }
|
| 4144: |
|
| 4145: |
|
| 4146: | $this->resetInsertionMode();
|
| 4147: | }
|
| 4148: |
|
| 4149: |
|
| 4150: | } elseif ($token['name'] === 'select' &&
|
| 4151: | $token['type'] === HTML5::STARTTAG
|
| 4152: | ) {
|
| 4153: | |
| 4154: |
|
| 4155: | $this->inSelect(
|
| 4156: | array(
|
| 4157: | 'name' => 'select',
|
| 4158: | 'type' => HTML5::ENDTAG
|
| 4159: | )
|
| 4160: | );
|
| 4161: |
|
| 4162: | |
| 4163: |
|
| 4164: | } elseif (in_array(
|
| 4165: | $token['name'],
|
| 4166: | array(
|
| 4167: | 'caption',
|
| 4168: | 'table',
|
| 4169: | 'tbody',
|
| 4170: | 'tfoot',
|
| 4171: | 'thead',
|
| 4172: | 'tr',
|
| 4173: | 'td',
|
| 4174: | 'th'
|
| 4175: | )
|
| 4176: | ) && $token['type'] === HTML5::ENDTAG
|
| 4177: | ) {
|
| 4178: |
|
| 4179: |
|
| 4180: |
|
| 4181: | |
| 4182: | |
| 4183: | |
| 4184: |
|
| 4185: | if ($this->elementInScope($token['name'], true)) {
|
| 4186: | $this->inSelect(
|
| 4187: | array(
|
| 4188: | 'name' => 'select',
|
| 4189: | 'type' => HTML5::ENDTAG
|
| 4190: | )
|
| 4191: | );
|
| 4192: |
|
| 4193: | $this->mainPhase($token);
|
| 4194: | }
|
| 4195: |
|
| 4196: |
|
| 4197: | } else {
|
| 4198: |
|
| 4199: | }
|
| 4200: | }
|
| 4201: |
|
| 4202: | private function afterBody($token)
|
| 4203: | {
|
| 4204: |
|
| 4205: |
|
| 4206: | |
| 4207: | |
| 4208: |
|
| 4209: | if ($token['type'] === HTML5::CHARACTR &&
|
| 4210: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 4211: | ) {
|
| 4212: | |
| 4213: |
|
| 4214: | $this->inBody($token);
|
| 4215: |
|
| 4216: |
|
| 4217: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 4218: | |
| 4219: | |
| 4220: |
|
| 4221: | $comment = $this->dom->createComment($token['data']);
|
| 4222: | $this->stack[0]->appendChild($comment);
|
| 4223: |
|
| 4224: |
|
| 4225: | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
|
| 4226: | |
| 4227: | |
| 4228: | |
| 4229: |
|
| 4230: |
|
| 4231: |
|
| 4232: | $this->phase = self::END_PHASE;
|
| 4233: |
|
| 4234: |
|
| 4235: | } else {
|
| 4236: | |
| 4237: |
|
| 4238: | $this->mode = self::IN_BODY;
|
| 4239: | return $this->inBody($token);
|
| 4240: | }
|
| 4241: | }
|
| 4242: |
|
| 4243: | private function inFrameset($token)
|
| 4244: | {
|
| 4245: |
|
| 4246: |
|
| 4247: | |
| 4248: | |
| 4249: |
|
| 4250: | if ($token['type'] === HTML5::CHARACTR &&
|
| 4251: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 4252: | ) {
|
| 4253: |
|
| 4254: | $this->insertText($token['data']);
|
| 4255: |
|
| 4256: |
|
| 4257: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 4258: | |
| 4259: |
|
| 4260: | $this->insertComment($token['data']);
|
| 4261: |
|
| 4262: |
|
| 4263: | } elseif ($token['name'] === 'frameset' &&
|
| 4264: | $token['type'] === HTML5::STARTTAG
|
| 4265: | ) {
|
| 4266: | $this->insertElement($token);
|
| 4267: |
|
| 4268: |
|
| 4269: | } elseif ($token['name'] === 'frameset' &&
|
| 4270: | $token['type'] === HTML5::ENDTAG
|
| 4271: | ) {
|
| 4272: | |
| 4273: |
|
| 4274: | if (end($this->stack)->nodeName === 'html') {
|
| 4275: |
|
| 4276: |
|
| 4277: | } else {
|
| 4278: | |
| 4279: |
|
| 4280: | array_pop($this->stack);
|
| 4281: |
|
| 4282: | |
| 4283: | |
| 4284: | |
| 4285: |
|
| 4286: | $this->mode = self::AFTR_FRAME;
|
| 4287: | }
|
| 4288: |
|
| 4289: |
|
| 4290: | } elseif ($token['name'] === 'frame' &&
|
| 4291: | $token['type'] === HTML5::STARTTAG
|
| 4292: | ) {
|
| 4293: |
|
| 4294: | $this->insertElement($token);
|
| 4295: |
|
| 4296: |
|
| 4297: | array_pop($this->stack);
|
| 4298: |
|
| 4299: |
|
| 4300: | } elseif ($token['name'] === 'noframes' &&
|
| 4301: | $token['type'] === HTML5::STARTTAG
|
| 4302: | ) {
|
| 4303: |
|
| 4304: | $this->inBody($token);
|
| 4305: |
|
| 4306: |
|
| 4307: | } else {
|
| 4308: |
|
| 4309: | }
|
| 4310: | }
|
| 4311: |
|
| 4312: | private function afterFrameset($token)
|
| 4313: | {
|
| 4314: |
|
| 4315: |
|
| 4316: | |
| 4317: | |
| 4318: |
|
| 4319: | if ($token['type'] === HTML5::CHARACTR &&
|
| 4320: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 4321: | ) {
|
| 4322: |
|
| 4323: | $this->insertText($token['data']);
|
| 4324: |
|
| 4325: |
|
| 4326: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 4327: | |
| 4328: |
|
| 4329: | $this->insertComment($token['data']);
|
| 4330: |
|
| 4331: |
|
| 4332: | } elseif ($token['name'] === 'html' &&
|
| 4333: | $token['type'] === HTML5::ENDTAG
|
| 4334: | ) {
|
| 4335: |
|
| 4336: | $this->phase = self::END_PHASE;
|
| 4337: |
|
| 4338: |
|
| 4339: | } elseif ($token['name'] === 'noframes' &&
|
| 4340: | $token['type'] === HTML5::STARTTAG
|
| 4341: | ) {
|
| 4342: |
|
| 4343: | $this->inBody($token);
|
| 4344: |
|
| 4345: |
|
| 4346: | } else {
|
| 4347: |
|
| 4348: | }
|
| 4349: | }
|
| 4350: |
|
| 4351: | private function trailingEndPhase($token)
|
| 4352: | {
|
| 4353: | |
| 4354: |
|
| 4355: |
|
| 4356: |
|
| 4357: | if ($token['type'] === HTML5::DOCTYPE) {
|
| 4358: |
|
| 4359: |
|
| 4360: |
|
| 4361: | } elseif ($token['type'] === HTML5::COMMENT) {
|
| 4362: | |
| 4363: |
|
| 4364: | $comment = $this->dom->createComment($token['data']);
|
| 4365: | $this->dom->appendChild($comment);
|
| 4366: |
|
| 4367: | |
| 4368: | |
| 4369: |
|
| 4370: | } elseif ($token['type'] === HTML5::CHARACTR &&
|
| 4371: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
|
| 4372: | ) {
|
| 4373: |
|
| 4374: | $this->mainPhase($token);
|
| 4375: |
|
| 4376: | |
| 4377: | |
| 4378: |
|
| 4379: | } elseif (($token['type'] === HTML5::CHARACTR &&
|
| 4380: | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
|
| 4381: | $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG
|
| 4382: | ) {
|
| 4383: | |
| 4384: |
|
| 4385: | $this->phase = self::MAIN_PHASE;
|
| 4386: | return $this->mainPhase($token);
|
| 4387: |
|
| 4388: |
|
| 4389: | } elseif ($token['type'] === HTML5::EOF) {
|
| 4390: |
|
| 4391: | }
|
| 4392: | }
|
| 4393: |
|
| 4394: | private function insertElement($token, $append = true, $check = false)
|
| 4395: | {
|
| 4396: |
|
| 4397: | if ($check) {
|
| 4398: |
|
| 4399: |
|
| 4400: | $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
|
| 4401: |
|
| 4402: | $token['name'] = ltrim($token['name'], '-0..9');
|
| 4403: |
|
| 4404: | if ($token['name'] === '') {
|
| 4405: | $token['name'] = 'span';
|
| 4406: | }
|
| 4407: | }
|
| 4408: |
|
| 4409: | $el = $this->dom->createElement($token['name']);
|
| 4410: |
|
| 4411: | foreach ($token['attr'] as $attr) {
|
| 4412: | if (!$el->hasAttribute($attr['name'])) {
|
| 4413: | $el->setAttribute($attr['name'], (string)$attr['value']);
|
| 4414: | }
|
| 4415: | }
|
| 4416: |
|
| 4417: | $this->appendToRealParent($el);
|
| 4418: | $this->stack[] = $el;
|
| 4419: |
|
| 4420: | return $el;
|
| 4421: | }
|
| 4422: |
|
| 4423: | private function insertText($data)
|
| 4424: | {
|
| 4425: | $text = $this->dom->createTextNode($data);
|
| 4426: | $this->appendToRealParent($text);
|
| 4427: | }
|
| 4428: |
|
| 4429: | private function insertComment($data)
|
| 4430: | {
|
| 4431: | $comment = $this->dom->createComment($data);
|
| 4432: | $this->appendToRealParent($comment);
|
| 4433: | }
|
| 4434: |
|
| 4435: | private function appendToRealParent($node)
|
| 4436: | {
|
| 4437: | if ($this->foster_parent === null) {
|
| 4438: | end($this->stack)->appendChild($node);
|
| 4439: |
|
| 4440: | } elseif ($this->foster_parent !== null) {
|
| 4441: | |
| 4442: | |
| 4443: | |
| 4444: | |
| 4445: | |
| 4446: |
|
| 4447: | for ($n = count($this->stack) - 1; $n >= 0; $n--) {
|
| 4448: | if ($this->stack[$n]->nodeName === 'table' &&
|
| 4449: | $this->stack[$n]->parentNode !== null
|
| 4450: | ) {
|
| 4451: | $table = $this->stack[$n];
|
| 4452: | break;
|
| 4453: | }
|
| 4454: | }
|
| 4455: |
|
| 4456: | if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
|
| 4457: | $this->foster_parent->insertBefore($node, $table);
|
| 4458: | } else {
|
| 4459: | $this->foster_parent->appendChild($node);
|
| 4460: | }
|
| 4461: |
|
| 4462: | $this->foster_parent = null;
|
| 4463: | }
|
| 4464: | }
|
| 4465: |
|
| 4466: | private function elementInScope($el, $table = false)
|
| 4467: | {
|
| 4468: | if (is_array($el)) {
|
| 4469: | foreach ($el as $element) {
|
| 4470: | if ($this->elementInScope($element, $table)) {
|
| 4471: | return true;
|
| 4472: | }
|
| 4473: | }
|
| 4474: |
|
| 4475: | return false;
|
| 4476: | }
|
| 4477: |
|
| 4478: | $leng = count($this->stack);
|
| 4479: |
|
| 4480: | for ($n = 0; $n < $leng; $n++) {
|
| 4481: | |
| 4482: |
|
| 4483: | $node = $this->stack[$leng - 1 - $n];
|
| 4484: |
|
| 4485: | if ($node->tagName === $el) {
|
| 4486: |
|
| 4487: | return true;
|
| 4488: |
|
| 4489: | } elseif ($node->tagName === 'table') {
|
| 4490: | |
| 4491: |
|
| 4492: | return false;
|
| 4493: |
|
| 4494: | } elseif ($table === true && in_array(
|
| 4495: | $node->tagName,
|
| 4496: | array(
|
| 4497: | 'caption',
|
| 4498: | 'td',
|
| 4499: | 'th',
|
| 4500: | 'button',
|
| 4501: | 'marquee',
|
| 4502: | 'object'
|
| 4503: | )
|
| 4504: | )
|
| 4505: | ) {
|
| 4506: | |
| 4507: | |
| 4508: |
|
| 4509: | return false;
|
| 4510: |
|
| 4511: | } elseif ($node === $node->ownerDocument->documentElement) {
|
| 4512: | |
| 4513: | |
| 4514: | |
| 4515: |
|
| 4516: | return false;
|
| 4517: | }
|
| 4518: |
|
| 4519: | |
| 4520: | |
| 4521: | |
| 4522: |
|
| 4523: | }
|
| 4524: | }
|
| 4525: |
|
| 4526: | private function reconstructActiveFormattingElements()
|
| 4527: | {
|
| 4528: | |
| 4529: |
|
| 4530: | $formatting_elements = count($this->a_formatting);
|
| 4531: |
|
| 4532: | if ($formatting_elements === 0) {
|
| 4533: | return false;
|
| 4534: | }
|
| 4535: |
|
| 4536: | |
| 4537: |
|
| 4538: | $entry = end($this->a_formatting);
|
| 4539: |
|
| 4540: | |
| 4541: | |
| 4542: | |
| 4543: |
|
| 4544: | if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
|
| 4545: | return false;
|
| 4546: | }
|
| 4547: |
|
| 4548: | for ($a = $formatting_elements - 1; $a >= 0; true) {
|
| 4549: | |
| 4550: |
|
| 4551: | if ($a === 0) {
|
| 4552: | $step_seven = false;
|
| 4553: | break;
|
| 4554: | }
|
| 4555: |
|
| 4556: | |
| 4557: |
|
| 4558: | $a--;
|
| 4559: | $entry = $this->a_formatting[$a];
|
| 4560: |
|
| 4561: | |
| 4562: |
|
| 4563: | if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
|
| 4564: | break;
|
| 4565: | }
|
| 4566: | }
|
| 4567: |
|
| 4568: | while (true) {
|
| 4569: | |
| 4570: |
|
| 4571: | if (isset($step_seven) && $step_seven === true) {
|
| 4572: | $a++;
|
| 4573: | $entry = $this->a_formatting[$a];
|
| 4574: | }
|
| 4575: |
|
| 4576: |
|
| 4577: | $clone = $entry->cloneNode();
|
| 4578: |
|
| 4579: | |
| 4580: |
|
| 4581: | end($this->stack)->appendChild($clone);
|
| 4582: | $this->stack[] = $clone;
|
| 4583: |
|
| 4584: | |
| 4585: |
|
| 4586: | $this->a_formatting[$a] = $clone;
|
| 4587: |
|
| 4588: | |
| 4589: |
|
| 4590: | if (end($this->a_formatting) !== $clone) {
|
| 4591: | $step_seven = true;
|
| 4592: | } else {
|
| 4593: | break;
|
| 4594: | }
|
| 4595: | }
|
| 4596: | }
|
| 4597: |
|
| 4598: | private function clearTheActiveFormattingElementsUpToTheLastMarker()
|
| 4599: | {
|
| 4600: | |
| 4601: | |
| 4602: |
|
| 4603: |
|
| 4604: | while (true) {
|
| 4605: | |
| 4606: |
|
| 4607: | $entry = end($this->a_formatting);
|
| 4608: |
|
| 4609: |
|
| 4610: | array_pop($this->a_formatting);
|
| 4611: |
|
| 4612: | |
| 4613: |
|
| 4614: | if ($entry === self::MARKER) {
|
| 4615: | break;
|
| 4616: | }
|
| 4617: | }
|
| 4618: | }
|
| 4619: |
|
| 4620: | private function generateImpliedEndTags($exclude = array())
|
| 4621: | {
|
| 4622: | |
| 4623: | |
| 4624: | |
| 4625: | |
| 4626: |
|
| 4627: | $node = end($this->stack);
|
| 4628: | $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
|
| 4629: |
|
| 4630: | while (in_array(end($this->stack)->nodeName, $elements)) {
|
| 4631: | array_pop($this->stack);
|
| 4632: | }
|
| 4633: | }
|
| 4634: |
|
| 4635: | private function getElementCategory($node)
|
| 4636: | {
|
| 4637: | $name = $node->tagName;
|
| 4638: | if (in_array($name, $this->special)) {
|
| 4639: | return self::SPECIAL;
|
| 4640: | } elseif (in_array($name, $this->scoping)) {
|
| 4641: | return self::SCOPING;
|
| 4642: | } elseif (in_array($name, $this->formatting)) {
|
| 4643: | return self::FORMATTING;
|
| 4644: | } else {
|
| 4645: | return self::PHRASING;
|
| 4646: | }
|
| 4647: | }
|
| 4648: |
|
| 4649: | private function clearStackToTableContext($elements)
|
| 4650: | {
|
| 4651: | |
| 4652: | |
| 4653: | |
| 4654: | |
| 4655: |
|
| 4656: | while (true) {
|
| 4657: | $node = end($this->stack)->nodeName;
|
| 4658: |
|
| 4659: | if (in_array($node, $elements)) {
|
| 4660: | break;
|
| 4661: | } else {
|
| 4662: | array_pop($this->stack);
|
| 4663: | }
|
| 4664: | }
|
| 4665: | }
|
| 4666: |
|
| 4667: | private function resetInsertionMode()
|
| 4668: | {
|
| 4669: |
|
| 4670: | $last = false;
|
| 4671: | $leng = count($this->stack);
|
| 4672: |
|
| 4673: | for ($n = $leng - 1; $n >= 0; $n--) {
|
| 4674: |
|
| 4675: | $node = $this->stack[$n];
|
| 4676: |
|
| 4677: | |
| 4678: | |
| 4679: | |
| 4680: |
|
| 4681: | if ($this->stack[0]->isSameNode($node)) {
|
| 4682: | $last = true;
|
| 4683: | }
|
| 4684: |
|
| 4685: | |
| 4686: |
|
| 4687: | if ($node->nodeName === 'select') {
|
| 4688: | $this->mode = self::IN_SELECT;
|
| 4689: | break;
|
| 4690: |
|
| 4691: | |
| 4692: |
|
| 4693: | } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {
|
| 4694: | $this->mode = self::IN_CELL;
|
| 4695: | break;
|
| 4696: |
|
| 4697: | |
| 4698: |
|
| 4699: | } elseif ($node->nodeName === 'tr') {
|
| 4700: | $this->mode = self::IN_ROW;
|
| 4701: | break;
|
| 4702: |
|
| 4703: | |
| 4704: |
|
| 4705: | } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
|
| 4706: | $this->mode = self::IN_TBODY;
|
| 4707: | break;
|
| 4708: |
|
| 4709: | |
| 4710: |
|
| 4711: | } elseif ($node->nodeName === 'caption') {
|
| 4712: | $this->mode = self::IN_CAPTION;
|
| 4713: | break;
|
| 4714: |
|
| 4715: | |
| 4716: |
|
| 4717: | } elseif ($node->nodeName === 'colgroup') {
|
| 4718: | $this->mode = self::IN_CGROUP;
|
| 4719: | break;
|
| 4720: |
|
| 4721: | |
| 4722: |
|
| 4723: | } elseif ($node->nodeName === 'table') {
|
| 4724: | $this->mode = self::IN_TABLE;
|
| 4725: | break;
|
| 4726: |
|
| 4727: | |
| 4728: | |
| 4729: |
|
| 4730: | } elseif ($node->nodeName === 'head') {
|
| 4731: | $this->mode = self::IN_BODY;
|
| 4732: | break;
|
| 4733: |
|
| 4734: | |
| 4735: |
|
| 4736: | } elseif ($node->nodeName === 'body') {
|
| 4737: | $this->mode = self::IN_BODY;
|
| 4738: | break;
|
| 4739: |
|
| 4740: | |
| 4741: |
|
| 4742: | } elseif ($node->nodeName === 'frameset') {
|
| 4743: | $this->mode = self::IN_FRAME;
|
| 4744: | break;
|
| 4745: |
|
| 4746: | |
| 4747: | |
| 4748: | |
| 4749: |
|
| 4750: | } elseif ($node->nodeName === 'html') {
|
| 4751: | $this->mode = ($this->head_pointer === null)
|
| 4752: | ? self::BEFOR_HEAD
|
| 4753: | : self::AFTER_HEAD;
|
| 4754: |
|
| 4755: | break;
|
| 4756: |
|
| 4757: | |
| 4758: |
|
| 4759: | } elseif ($last) {
|
| 4760: | $this->mode = self::IN_BODY;
|
| 4761: | break;
|
| 4762: | }
|
| 4763: | }
|
| 4764: | }
|
| 4765: |
|
| 4766: | private function closeCell()
|
| 4767: | {
|
| 4768: | |
| 4769: |
|
| 4770: | foreach (array('td', 'th') as $cell) {
|
| 4771: | if ($this->elementInScope($cell, true)) {
|
| 4772: | $this->inCell(
|
| 4773: | array(
|
| 4774: | 'name' => $cell,
|
| 4775: | 'type' => HTML5::ENDTAG
|
| 4776: | )
|
| 4777: | );
|
| 4778: |
|
| 4779: | break;
|
| 4780: | }
|
| 4781: | }
|
| 4782: | }
|
| 4783: |
|
| 4784: | public function save()
|
| 4785: | {
|
| 4786: | return $this->dom;
|
| 4787: | }
|
| 4788: | }
|
| 4789: | |