| 1: | <?php
|
| 2: |
|
| 3: | |
| 4: | |
| 5: | |
| 6: | |
| 7: | |
| 8: | |
| 9: | |
| 10: | |
| 11: | |
| 12: | |
| 13: |
|
| 14: | class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
| 15: | {
|
| 16: |
|
| 17: | |
| 18: | |
| 19: | |
| 20: |
|
| 21: | protected $tokens;
|
| 22: |
|
| 23: | |
| 24: | |
| 25: | |
| 26: |
|
| 27: | protected $token;
|
| 28: |
|
| 29: | |
| 30: | |
| 31: | |
| 32: |
|
| 33: | protected $zipper;
|
| 34: |
|
| 35: | |
| 36: | |
| 37: | |
| 38: |
|
| 39: | protected $stack;
|
| 40: |
|
| 41: | |
| 42: | |
| 43: | |
| 44: |
|
| 45: | protected $injectors;
|
| 46: |
|
| 47: | |
| 48: | |
| 49: | |
| 50: |
|
| 51: | protected $config;
|
| 52: |
|
| 53: | |
| 54: | |
| 55: | |
| 56: |
|
| 57: | protected $context;
|
| 58: |
|
| 59: | |
| 60: | |
| 61: | |
| 62: | |
| 63: | |
| 64: | |
| 65: |
|
| 66: | public function execute($tokens, $config, $context)
|
| 67: | {
|
| 68: | $definition = $config->getHTMLDefinition();
|
| 69: |
|
| 70: |
|
| 71: | $generator = new HTMLPurifier_Generator($config, $context);
|
| 72: | $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
|
| 73: |
|
| 74: | $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
|
| 75: | $e = $context->get('ErrorCollector', true);
|
| 76: | $i = false;
|
| 77: | list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
|
| 78: | if ($token === NULL) {
|
| 79: | return array();
|
| 80: | }
|
| 81: | $reprocess = false;
|
| 82: | $stack = array();
|
| 83: |
|
| 84: |
|
| 85: | $this->stack =& $stack;
|
| 86: | $this->tokens =& $tokens;
|
| 87: | $this->token =& $token;
|
| 88: | $this->zipper =& $zipper;
|
| 89: | $this->config = $config;
|
| 90: | $this->context = $context;
|
| 91: |
|
| 92: |
|
| 93: | $context->register('CurrentNesting', $stack);
|
| 94: | $context->register('InputZipper', $zipper);
|
| 95: | $context->register('CurrentToken', $token);
|
| 96: |
|
| 97: |
|
| 98: |
|
| 99: | $this->injectors = array();
|
| 100: |
|
| 101: | $injectors = $config->getBatch('AutoFormat');
|
| 102: | $def_injectors = $definition->info_injector;
|
| 103: | $custom_injectors = $injectors['Custom'];
|
| 104: | unset($injectors['Custom']);
|
| 105: | foreach ($injectors as $injector => $b) {
|
| 106: |
|
| 107: | if (strpos($injector, '.') !== false) {
|
| 108: | continue;
|
| 109: | }
|
| 110: | $injector = "HTMLPurifier_Injector_$injector";
|
| 111: | if (!$b) {
|
| 112: | continue;
|
| 113: | }
|
| 114: | $this->injectors[] = new $injector;
|
| 115: | }
|
| 116: | foreach ($def_injectors as $injector) {
|
| 117: |
|
| 118: | $this->injectors[] = $injector;
|
| 119: | }
|
| 120: | foreach ($custom_injectors as $injector) {
|
| 121: | if (!$injector) {
|
| 122: | continue;
|
| 123: | }
|
| 124: | if (is_string($injector)) {
|
| 125: | $injector = "HTMLPurifier_Injector_$injector";
|
| 126: | $injector = new $injector;
|
| 127: | }
|
| 128: | $this->injectors[] = $injector;
|
| 129: | }
|
| 130: |
|
| 131: |
|
| 132: |
|
| 133: | foreach ($this->injectors as $ix => $injector) {
|
| 134: | $error = $injector->prepare($config, $context);
|
| 135: | if (!$error) {
|
| 136: | continue;
|
| 137: | }
|
| 138: | array_splice($this->injectors, $ix, 1);
|
| 139: | trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
|
| 140: | }
|
| 141: |
|
| 142: |
|
| 143: |
|
| 144: |
|
| 145: |
|
| 146: |
|
| 147: |
|
| 148: |
|
| 149: |
|
| 150: |
|
| 151: |
|
| 152: |
|
| 153: | for (;;
|
| 154: |
|
| 155: | $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
|
| 156: |
|
| 157: |
|
| 158: | if (is_int($i)) {
|
| 159: |
|
| 160: |
|
| 161: |
|
| 162: | $rewind_offset = $this->injectors[$i]->getRewindOffset();
|
| 163: | if (is_int($rewind_offset)) {
|
| 164: | for ($j = 0; $j < $rewind_offset; $j++) {
|
| 165: | if (empty($zipper->front)) break;
|
| 166: | $token = $zipper->prev($token);
|
| 167: |
|
| 168: |
|
| 169: | unset($token->skip[$i]);
|
| 170: | $token->rewind = $i;
|
| 171: | if ($token instanceof HTMLPurifier_Token_Start) {
|
| 172: | array_pop($this->stack);
|
| 173: | } elseif ($token instanceof HTMLPurifier_Token_End) {
|
| 174: | $this->stack[] = $token->start;
|
| 175: | }
|
| 176: | }
|
| 177: | }
|
| 178: | $i = false;
|
| 179: | }
|
| 180: |
|
| 181: |
|
| 182: | if ($token === NULL) {
|
| 183: |
|
| 184: | if (empty($this->stack)) {
|
| 185: | break;
|
| 186: | }
|
| 187: |
|
| 188: |
|
| 189: | $top_nesting = array_pop($this->stack);
|
| 190: | $this->stack[] = $top_nesting;
|
| 191: |
|
| 192: |
|
| 193: | if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
|
| 194: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
|
| 195: | }
|
| 196: |
|
| 197: |
|
| 198: | $token = new HTMLPurifier_Token_End($top_nesting->name);
|
| 199: |
|
| 200: |
|
| 201: | $reprocess = true;
|
| 202: | continue;
|
| 203: | }
|
| 204: |
|
| 205: |
|
| 206: |
|
| 207: |
|
| 208: |
|
| 209: | if (empty($token->is_tag)) {
|
| 210: | if ($token instanceof HTMLPurifier_Token_Text) {
|
| 211: | foreach ($this->injectors as $i => $injector) {
|
| 212: | if (isset($token->skip[$i])) {
|
| 213: |
|
| 214: | continue;
|
| 215: | }
|
| 216: | if ($token->rewind !== null && $token->rewind !== $i) {
|
| 217: | continue;
|
| 218: | }
|
| 219: |
|
| 220: | $r = $token;
|
| 221: | $injector->handleText($r);
|
| 222: | $token = $this->processToken($r, $i);
|
| 223: | $reprocess = true;
|
| 224: | break;
|
| 225: | }
|
| 226: | }
|
| 227: |
|
| 228: | continue;
|
| 229: | }
|
| 230: |
|
| 231: | if (isset($definition->info[$token->name])) {
|
| 232: | $type = $definition->info[$token->name]->child->type;
|
| 233: | } else {
|
| 234: | $type = false;
|
| 235: | }
|
| 236: |
|
| 237: |
|
| 238: | $ok = false;
|
| 239: | if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
| 240: |
|
| 241: | $token = new HTMLPurifier_Token_Empty(
|
| 242: | $token->name,
|
| 243: | $token->attr,
|
| 244: | $token->line,
|
| 245: | $token->col,
|
| 246: | $token->armor
|
| 247: | );
|
| 248: | $ok = true;
|
| 249: | } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
| 250: |
|
| 251: |
|
| 252: | $old_token = $token;
|
| 253: | $token = new HTMLPurifier_Token_End($token->name);
|
| 254: | $token = $this->insertBefore(
|
| 255: | new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
|
| 256: | );
|
| 257: |
|
| 258: | $reprocess = true;
|
| 259: | continue;
|
| 260: | } elseif ($token instanceof HTMLPurifier_Token_Empty) {
|
| 261: |
|
| 262: | $ok = true;
|
| 263: | } elseif ($token instanceof HTMLPurifier_Token_Start) {
|
| 264: |
|
| 265: |
|
| 266: |
|
| 267: | if (!empty($this->stack)) {
|
| 268: |
|
| 269: |
|
| 270: |
|
| 271: |
|
| 272: |
|
| 273: |
|
| 274: |
|
| 275: |
|
| 276: |
|
| 277: |
|
| 278: |
|
| 279: |
|
| 280: |
|
| 281: |
|
| 282: | $parent = array_pop($this->stack);
|
| 283: | $this->stack[] = $parent;
|
| 284: |
|
| 285: | $parent_def = null;
|
| 286: | $parent_elements = null;
|
| 287: | $autoclose = false;
|
| 288: | if (isset($definition->info[$parent->name])) {
|
| 289: | $parent_def = $definition->info[$parent->name];
|
| 290: | $parent_elements = $parent_def->child->getAllowedElements($config);
|
| 291: | $autoclose = !isset($parent_elements[$token->name]);
|
| 292: | }
|
| 293: |
|
| 294: | if ($autoclose && $definition->info[$token->name]->wrap) {
|
| 295: |
|
| 296: |
|
| 297: |
|
| 298: | $wrapname = $definition->info[$token->name]->wrap;
|
| 299: | $wrapdef = $definition->info[$wrapname];
|
| 300: | $elements = $wrapdef->child->getAllowedElements($config);
|
| 301: | if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
|
| 302: | $newtoken = new HTMLPurifier_Token_Start($wrapname);
|
| 303: | $token = $this->insertBefore($newtoken);
|
| 304: | $reprocess = true;
|
| 305: | continue;
|
| 306: | }
|
| 307: | }
|
| 308: |
|
| 309: | $carryover = false;
|
| 310: | if ($autoclose && $parent_def->formatting) {
|
| 311: | $carryover = true;
|
| 312: | }
|
| 313: |
|
| 314: | if ($autoclose) {
|
| 315: |
|
| 316: |
|
| 317: | $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
|
| 318: | if (!$autoclose_ok) {
|
| 319: | foreach ($this->stack as $ancestor) {
|
| 320: | $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
|
| 321: | if (isset($elements[$token->name])) {
|
| 322: | $autoclose_ok = true;
|
| 323: | break;
|
| 324: | }
|
| 325: | if ($definition->info[$token->name]->wrap) {
|
| 326: | $wrapname = $definition->info[$token->name]->wrap;
|
| 327: | $wrapdef = $definition->info[$wrapname];
|
| 328: | $wrap_elements = $wrapdef->child->getAllowedElements($config);
|
| 329: | if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
|
| 330: | $autoclose_ok = true;
|
| 331: | break;
|
| 332: | }
|
| 333: | }
|
| 334: | }
|
| 335: | }
|
| 336: | if ($autoclose_ok) {
|
| 337: |
|
| 338: | $new_token = new HTMLPurifier_Token_End($parent->name);
|
| 339: | $new_token->start = $parent;
|
| 340: |
|
| 341: | if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
| 342: | if (!$carryover) {
|
| 343: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
| 344: | } else {
|
| 345: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
| 346: | }
|
| 347: | }
|
| 348: | if ($carryover) {
|
| 349: | $element = clone $parent;
|
| 350: |
|
| 351: | $element->armor['MakeWellFormed_TagClosedError'] = true;
|
| 352: | $element->carryover = true;
|
| 353: | $token = $this->processToken(array($new_token, $token, $element));
|
| 354: | } else {
|
| 355: | $token = $this->insertBefore($new_token);
|
| 356: | }
|
| 357: | } else {
|
| 358: | $token = $this->remove();
|
| 359: | }
|
| 360: | $reprocess = true;
|
| 361: | continue;
|
| 362: | }
|
| 363: |
|
| 364: | }
|
| 365: | $ok = true;
|
| 366: | }
|
| 367: |
|
| 368: | if ($ok) {
|
| 369: | foreach ($this->injectors as $i => $injector) {
|
| 370: | if (isset($token->skip[$i])) {
|
| 371: |
|
| 372: | continue;
|
| 373: | }
|
| 374: | if ($token->rewind !== null && $token->rewind !== $i) {
|
| 375: | continue;
|
| 376: | }
|
| 377: | $r = $token;
|
| 378: | $injector->handleElement($r);
|
| 379: | $token = $this->processToken($r, $i);
|
| 380: | $reprocess = true;
|
| 381: | break;
|
| 382: | }
|
| 383: | if (!$reprocess) {
|
| 384: |
|
| 385: | if ($token instanceof HTMLPurifier_Token_Start) {
|
| 386: | $this->stack[] = $token;
|
| 387: | } elseif ($token instanceof HTMLPurifier_Token_End) {
|
| 388: | throw new HTMLPurifier_Exception(
|
| 389: | 'Improper handling of end tag in start code; possible error in MakeWellFormed'
|
| 390: | );
|
| 391: | }
|
| 392: | }
|
| 393: | continue;
|
| 394: | }
|
| 395: |
|
| 396: |
|
| 397: | if (!$token instanceof HTMLPurifier_Token_End) {
|
| 398: | throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
|
| 399: | }
|
| 400: |
|
| 401: |
|
| 402: | if (empty($this->stack)) {
|
| 403: | if ($escape_invalid_tags) {
|
| 404: | if ($e) {
|
| 405: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
| 406: | }
|
| 407: | $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
|
| 408: | } else {
|
| 409: | if ($e) {
|
| 410: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
| 411: | }
|
| 412: | $token = $this->remove();
|
| 413: | }
|
| 414: | $reprocess = true;
|
| 415: | continue;
|
| 416: | }
|
| 417: |
|
| 418: |
|
| 419: |
|
| 420: |
|
| 421: |
|
| 422: | $current_parent = array_pop($this->stack);
|
| 423: | if ($current_parent->name == $token->name) {
|
| 424: | $token->start = $current_parent;
|
| 425: | foreach ($this->injectors as $i => $injector) {
|
| 426: | if (isset($token->skip[$i])) {
|
| 427: |
|
| 428: | continue;
|
| 429: | }
|
| 430: | if ($token->rewind !== null && $token->rewind !== $i) {
|
| 431: | continue;
|
| 432: | }
|
| 433: | $r = $token;
|
| 434: | $injector->handleEnd($r);
|
| 435: | $token = $this->processToken($r, $i);
|
| 436: | $this->stack[] = $current_parent;
|
| 437: | $reprocess = true;
|
| 438: | break;
|
| 439: | }
|
| 440: | continue;
|
| 441: | }
|
| 442: |
|
| 443: |
|
| 444: |
|
| 445: |
|
| 446: | $this->stack[] = $current_parent;
|
| 447: |
|
| 448: |
|
| 449: |
|
| 450: | $size = count($this->stack);
|
| 451: |
|
| 452: | $skipped_tags = false;
|
| 453: | for ($j = $size - 2; $j >= 0; $j--) {
|
| 454: | if ($this->stack[$j]->name == $token->name) {
|
| 455: | $skipped_tags = array_slice($this->stack, $j);
|
| 456: | break;
|
| 457: | }
|
| 458: | }
|
| 459: |
|
| 460: |
|
| 461: | if ($skipped_tags === false) {
|
| 462: | if ($escape_invalid_tags) {
|
| 463: | if ($e) {
|
| 464: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
| 465: | }
|
| 466: | $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
|
| 467: | } else {
|
| 468: | if ($e) {
|
| 469: | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
| 470: | }
|
| 471: | $token = $this->remove();
|
| 472: | }
|
| 473: | $reprocess = true;
|
| 474: | continue;
|
| 475: | }
|
| 476: |
|
| 477: |
|
| 478: | $c = count($skipped_tags);
|
| 479: | if ($e) {
|
| 480: | for ($j = $c - 1; $j > 0; $j--) {
|
| 481: |
|
| 482: |
|
| 483: | if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
|
| 484: | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
|
| 485: | }
|
| 486: | }
|
| 487: | }
|
| 488: |
|
| 489: |
|
| 490: | $replace = array($token);
|
| 491: | for ($j = 1; $j < $c; $j++) {
|
| 492: |
|
| 493: | $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
|
| 494: | $new_token->start = $skipped_tags[$j];
|
| 495: | array_unshift($replace, $new_token);
|
| 496: | if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
|
| 497: |
|
| 498: | $element = clone $skipped_tags[$j];
|
| 499: | $element->carryover = true;
|
| 500: | $element->armor['MakeWellFormed_TagClosedError'] = true;
|
| 501: | $replace[] = $element;
|
| 502: | }
|
| 503: | }
|
| 504: | $token = $this->processToken($replace);
|
| 505: | $reprocess = true;
|
| 506: | continue;
|
| 507: | }
|
| 508: |
|
| 509: | $context->destroy('CurrentToken');
|
| 510: | $context->destroy('CurrentNesting');
|
| 511: | $context->destroy('InputZipper');
|
| 512: |
|
| 513: | unset($this->injectors, $this->stack, $this->tokens);
|
| 514: | return $zipper->toArray($token);
|
| 515: | }
|
| 516: |
|
| 517: | |
| 518: | |
| 519: | |
| 520: | |
| 521: | |
| 522: | |
| 523: | |
| 524: | |
| 525: | |
| 526: | |
| 527: | |
| 528: | |
| 529: | |
| 530: | |
| 531: | |
| 532: | |
| 533: | |
| 534: | |
| 535: | |
| 536: | |
| 537: |
|
| 538: | protected function processToken($token, $injector = -1)
|
| 539: | {
|
| 540: |
|
| 541: |
|
| 542: |
|
| 543: |
|
| 544: | if (is_object($token)) {
|
| 545: | $tmp = $token;
|
| 546: | $token = array(1, $tmp);
|
| 547: | }
|
| 548: | if (is_int($token)) {
|
| 549: | $tmp = $token;
|
| 550: | $token = array($tmp);
|
| 551: | }
|
| 552: | if ($token === false) {
|
| 553: | $token = array(1);
|
| 554: | }
|
| 555: | if (!is_array($token)) {
|
| 556: | throw new HTMLPurifier_Exception('Invalid token type from injector');
|
| 557: | }
|
| 558: | if (!is_int($token[0])) {
|
| 559: | array_unshift($token, 1);
|
| 560: | }
|
| 561: | if ($token[0] === 0) {
|
| 562: | throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
|
| 563: | }
|
| 564: |
|
| 565: |
|
| 566: |
|
| 567: |
|
| 568: | $delete = array_shift($token);
|
| 569: | list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
|
| 570: |
|
| 571: | if ($injector > -1) {
|
| 572: |
|
| 573: |
|
| 574: |
|
| 575: |
|
| 576: |
|
| 577: |
|
| 578: | $oldskip = isset($old[0]) ? $old[0]->skip : array();
|
| 579: | foreach ($token as $object) {
|
| 580: | $object->skip = $oldskip;
|
| 581: | $object->skip[$injector] = true;
|
| 582: | }
|
| 583: | }
|
| 584: |
|
| 585: | return $r;
|
| 586: |
|
| 587: | }
|
| 588: |
|
| 589: | |
| 590: | |
| 591: | |
| 592: | |
| 593: |
|
| 594: | private function insertBefore($token)
|
| 595: | {
|
| 596: |
|
| 597: |
|
| 598: | $splice = $this->zipper->splice($this->token, 0, array($token));
|
| 599: |
|
| 600: | return $splice[1];
|
| 601: | }
|
| 602: |
|
| 603: | |
| 604: | |
| 605: | |
| 606: |
|
| 607: | private function remove()
|
| 608: | {
|
| 609: | return $this->zipper->delete();
|
| 610: | }
|
| 611: | }
|
| 612: |
|
| 613: |
|
| 614: |
|
| 615: |
|
| 616: |
|
| 617: |
|
| 618: |
|
| 619: |
|
| 620: |
|
| 621: |
|
| 622: |
|
| 623: |
|
| 624: |
|
| 625: |
|
| 626: |
|
| 627: |
|
| 628: |
|
| 629: |
|
| 630: |
|
| 631: |
|
| 632: |
|
| 633: |
|
| 634: |
|
| 635: |
|
| 636: |
|
| 637: |
|
| 638: |
|
| 639: |
|
| 640: |
|
| 641: |
|
| 642: |
|
| 643: |
|
| 644: |
|
| 645: |
|
| 646: |
|
| 647: |
|
| 648: |
|
| 649: |
|
| 650: |
|
| 651: |
|
| 652: |
|
| 653: |
|
| 654: |
|
| 655: |
|
| 656: |
|
| 657: |
|
| 658: |
|
| 659: |
|
| 660: | |