1: <?php
2:
3: /**
4: * Removes all unrecognized tags from the list of tokens.
5: *
6: * This strategy iterates through all the tokens and removes unrecognized
7: * tokens. If a token is not recognized but a TagTransform is defined for
8: * that element, the element will be transformed accordingly.
9: */
10:
11: class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
12: {
13:
14: /**
15: * @param HTMLPurifier_Token[] $tokens
16: * @param HTMLPurifier_Config $config
17: * @param HTMLPurifier_Context $context
18: * @return array|HTMLPurifier_Token[]
19: */
20: public function execute($tokens, $config, $context)
21: {
22: $definition = $config->getHTMLDefinition();
23: $generator = new HTMLPurifier_Generator($config, $context);
24: $result = array();
25:
26: $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
27: $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
28:
29: // currently only used to determine if comments should be kept
30: $trusted = $config->get('HTML.Trusted');
31: $comment_lookup = $config->get('HTML.AllowedComments');
32: $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
33: $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
34:
35: $remove_script_contents = $config->get('Core.RemoveScriptContents');
36: $hidden_elements = $config->get('Core.HiddenElements');
37:
38: // remove script contents compatibility
39: if ($remove_script_contents === true) {
40: $hidden_elements['script'] = true;
41: } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
42: unset($hidden_elements['script']);
43: }
44:
45: $attr_validator = new HTMLPurifier_AttrValidator();
46:
47: // removes tokens until it reaches a closing tag with its value
48: $remove_until = false;
49:
50: // converts comments into text tokens when this is equal to a tag name
51: $textify_comments = false;
52:
53: $token = false;
54: $context->register('CurrentToken', $token);
55:
56: $e = false;
57: if ($config->get('Core.CollectErrors')) {
58: $e =& $context->get('ErrorCollector');
59: }
60:
61: foreach ($tokens as $token) {
62: if ($remove_until) {
63: if (empty($token->is_tag) || $token->name !== $remove_until) {
64: continue;
65: }
66: }
67: if (!empty($token->is_tag)) {
68: // DEFINITION CALL
69:
70: // before any processing, try to transform the element
71: if (isset($definition->info_tag_transform[$token->name])) {
72: $original_name = $token->name;
73: // there is a transformation for this tag
74: // DEFINITION CALL
75: $token = $definition->
76: info_tag_transform[$token->name]->transform($token, $config, $context);
77: if ($e) {
78: $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
79: }
80: }
81:
82: if (isset($definition->info[$token->name])) {
83: // mostly everything's good, but
84: // we need to make sure required attributes are in order
85: if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
86: $definition->info[$token->name]->required_attr &&
87: ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
88: ) {
89: $attr_validator->validateToken($token, $config, $context);
90: $ok = true;
91: foreach ($definition->info[$token->name]->required_attr as $name) {
92: if (!isset($token->attr[$name])) {
93: $ok = false;
94: break;
95: }
96: }
97: if (!$ok) {
98: if ($e) {
99: $e->send(
100: E_ERROR,
101: 'Strategy_RemoveForeignElements: Missing required attribute',
102: $name
103: );
104: }
105: continue;
106: }
107: $token->armor['ValidateAttributes'] = true;
108: }
109:
110: if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
111: $textify_comments = $token->name;
112: } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
113: $textify_comments = false;
114: }
115:
116: } elseif ($escape_invalid_tags) {
117: // invalid tag, generate HTML representation and insert in
118: if ($e) {
119: $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
120: }
121: $token = new HTMLPurifier_Token_Text(
122: $generator->generateFromToken($token)
123: );
124: } else {
125: // check if we need to destroy all of the tag's children
126: // CAN BE GENERICIZED
127: if (isset($hidden_elements[$token->name])) {
128: if ($token instanceof HTMLPurifier_Token_Start) {
129: $remove_until = $token->name;
130: } elseif ($token instanceof HTMLPurifier_Token_Empty) {
131: // do nothing: we're still looking
132: } else {
133: $remove_until = false;
134: }
135: if ($e) {
136: $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
137: }
138: } else {
139: if ($e) {
140: $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
141: }
142: }
143: continue;
144: }
145: } elseif ($token instanceof HTMLPurifier_Token_Comment) {
146: // textify comments in script tags when they are allowed
147: if ($textify_comments !== false) {
148: $data = $token->data;
149: $token = new HTMLPurifier_Token_Text($data);
150: } elseif ($trusted || $check_comments) {
151: // always cleanup comments
152: $trailing_hyphen = false;
153: if ($e) {
154: // perform check whether or not there's a trailing hyphen
155: if (substr($token->data, -1) == '-') {
156: $trailing_hyphen = true;
157: }
158: }
159: $token->data = rtrim($token->data, '-');
160: $found_double_hyphen = false;
161: while (strpos($token->data, '--') !== false) {
162: $found_double_hyphen = true;
163: $token->data = str_replace('--', '-', $token->data);
164: }
165: if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
166: ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
167: // OK good
168: if ($e) {
169: if ($trailing_hyphen) {
170: $e->send(
171: E_NOTICE,
172: 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
173: );
174: }
175: if ($found_double_hyphen) {
176: $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
177: }
178: }
179: } else {
180: if ($e) {
181: $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
182: }
183: continue;
184: }
185: } else {
186: // strip comments
187: if ($e) {
188: $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
189: }
190: continue;
191: }
192: } elseif ($token instanceof HTMLPurifier_Token_Text) {
193: } else {
194: continue;
195: }
196: $result[] = $token;
197: }
198: if ($remove_until && $e) {
199: // we removed tokens until the end, throw error
200: $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
201: }
202: $context->destroy('CurrentToken');
203: return $result;
204: }
205: }
206:
207: // vim: et sw=4 sts=4
208: