1: <?php
2:
3: /**
4: * Definition of the purified HTML that describes allowed children,
5: * attributes, and many other things.
6: *
7: * Conventions:
8: *
9: * All member variables that are prefixed with info
10: * (including the main $info array) are used by HTML Purifier internals
11: * and should not be directly edited when customizing the HTMLDefinition.
12: * They can usually be set via configuration directives or custom
13: * modules.
14: *
15: * On the other hand, member variables without the info prefix are used
16: * internally by the HTMLDefinition and MUST NOT be used by other HTML
17: * Purifier internals. Many of them, however, are public, and may be
18: * edited by userspace code to tweak the behavior of HTMLDefinition.
19: *
20: * @note This class is inspected by Printer_HTMLDefinition; please
21: * update that class if things here change.
22: *
23: * @warning Directives that change this object's structure must be in
24: * the HTML or Attr namespace!
25: */
26: class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
27: {
28:
29: // FULLY-PUBLIC VARIABLES ---------------------------------------------
30:
31: /**
32: * Associative array of element names to HTMLPurifier_ElementDef.
33: * @type HTMLPurifier_ElementDef[]
34: */
35: public $info = array();
36:
37: /**
38: * Associative array of global attribute name to attribute definition.
39: * @type array
40: */
41: public $info_global_attr = array();
42:
43: /**
44: * String name of parent element HTML will be going into.
45: * @type string
46: */
47: public $info_parent = 'div';
48:
49: /**
50: * Definition for parent element, allows parent element to be a
51: * tag that's not allowed inside the HTML fragment.
52: * @type HTMLPurifier_ElementDef
53: */
54: public $info_parent_def;
55:
56: /**
57: * String name of element used to wrap inline elements in block context.
58: * @type string
59: * @note This is rarely used except for BLOCKQUOTEs in strict mode
60: */
61: public $info_block_wrapper = 'p';
62:
63: /**
64: * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
65: * @type array
66: */
67: public $info_tag_transform = array();
68:
69: /**
70: * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
71: * @type HTMLPurifier_AttrTransform[]
72: */
73: public $info_attr_transform_pre = array();
74:
75: /**
76: * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
77: * @type HTMLPurifier_AttrTransform[]
78: */
79: public $info_attr_transform_post = array();
80:
81: /**
82: * Nested lookup array of content set name (Block, Inline) to
83: * element name to whether or not it belongs in that content set.
84: * @type array
85: */
86: public $info_content_sets = array();
87:
88: /**
89: * Indexed list of HTMLPurifier_Injector to be used.
90: * @type HTMLPurifier_Injector[]
91: */
92: public $info_injector = array();
93:
94: /**
95: * Doctype object
96: * @type HTMLPurifier_Doctype
97: */
98: public $doctype;
99:
100:
101:
102: // RAW CUSTOMIZATION STUFF --------------------------------------------
103:
104: /**
105: * Adds a custom attribute to a pre-existing element
106: * @note This is strictly convenience, and does not have a corresponding
107: * method in HTMLPurifier_HTMLModule
108: * @param string $element_name Element name to add attribute to
109: * @param string $attr_name Name of attribute
110: * @param mixed $def Attribute definition, can be string or object, see
111: * HTMLPurifier_AttrTypes for details
112: */
113: public function addAttribute($element_name, $attr_name, $def)
114: {
115: $module = $this->getAnonymousModule();
116: if (!isset($module->info[$element_name])) {
117: $element = $module->addBlankElement($element_name);
118: } else {
119: $element = $module->info[$element_name];
120: }
121: $element->attr[$attr_name] = $def;
122: }
123:
124: /**
125: * Adds a custom element to your HTML definition
126: * @see HTMLPurifier_HTMLModule::addElement() for detailed
127: * parameter and return value descriptions.
128: */
129: public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
130: {
131: $module = $this->getAnonymousModule();
132: // assume that if the user is calling this, the element
133: // is safe. This may not be a good idea
134: $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
135: return $element;
136: }
137:
138: /**
139: * Adds a blank element to your HTML definition, for overriding
140: * existing behavior
141: * @param string $element_name
142: * @return HTMLPurifier_ElementDef
143: * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
144: * parameter and return value descriptions.
145: */
146: public function addBlankElement($element_name)
147: {
148: $module = $this->getAnonymousModule();
149: $element = $module->addBlankElement($element_name);
150: return $element;
151: }
152:
153: /**
154: * Retrieves a reference to the anonymous module, so you can
155: * bust out advanced features without having to make your own
156: * module.
157: * @return HTMLPurifier_HTMLModule
158: */
159: public function getAnonymousModule()
160: {
161: if (!$this->_anonModule) {
162: $this->_anonModule = new HTMLPurifier_HTMLModule();
163: $this->_anonModule->name = 'Anonymous';
164: }
165: return $this->_anonModule;
166: }
167:
168: private $_anonModule = null;
169:
170: // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
171:
172: /**
173: * @type string
174: */
175: public $type = 'HTML';
176:
177: /**
178: * @type HTMLPurifier_HTMLModuleManager
179: */
180: public $manager;
181:
182: /**
183: * Performs low-cost, preliminary initialization.
184: */
185: public function __construct()
186: {
187: $this->manager = new HTMLPurifier_HTMLModuleManager();
188: }
189:
190: /**
191: * @param HTMLPurifier_Config $config
192: */
193: protected function doSetup($config)
194: {
195: $this->processModules($config);
196: $this->setupConfigStuff($config);
197: unset($this->manager);
198:
199: // cleanup some of the element definitions
200: foreach ($this->info as $k => $v) {
201: unset($this->info[$k]->content_model);
202: unset($this->info[$k]->content_model_type);
203: }
204: }
205:
206: /**
207: * Extract out the information from the manager
208: * @param HTMLPurifier_Config $config
209: */
210: protected function processModules($config)
211: {
212: if ($this->_anonModule) {
213: // for user specific changes
214: // this is late-loaded so we don't have to deal with PHP4
215: // reference wonky-ness
216: $this->manager->addModule($this->_anonModule);
217: unset($this->_anonModule);
218: }
219:
220: $this->manager->setup($config);
221: $this->doctype = $this->manager->doctype;
222:
223: foreach ($this->manager->modules as $module) {
224: foreach ($module->info_tag_transform as $k => $v) {
225: if ($v === false) {
226: unset($this->info_tag_transform[$k]);
227: } else {
228: $this->info_tag_transform[$k] = $v;
229: }
230: }
231: foreach ($module->info_attr_transform_pre as $k => $v) {
232: if ($v === false) {
233: unset($this->info_attr_transform_pre[$k]);
234: } else {
235: $this->info_attr_transform_pre[$k] = $v;
236: }
237: }
238: foreach ($module->info_attr_transform_post as $k => $v) {
239: if ($v === false) {
240: unset($this->info_attr_transform_post[$k]);
241: } else {
242: $this->info_attr_transform_post[$k] = $v;
243: }
244: }
245: foreach ($module->info_injector as $k => $v) {
246: if ($v === false) {
247: unset($this->info_injector[$k]);
248: } else {
249: $this->info_injector[$k] = $v;
250: }
251: }
252: }
253: $this->info = $this->manager->getElements();
254: $this->info_content_sets = $this->manager->contentSets->lookup;
255: }
256:
257: /**
258: * Sets up stuff based on config. We need a better way of doing this.
259: * @param HTMLPurifier_Config $config
260: */
261: protected function setupConfigStuff($config)
262: {
263: $block_wrapper = $config->get('HTML.BlockWrapper');
264: if (isset($this->info_content_sets['Block'][$block_wrapper])) {
265: $this->info_block_wrapper = $block_wrapper;
266: } else {
267: trigger_error(
268: 'Cannot use non-block element as block wrapper',
269: E_USER_ERROR
270: );
271: }
272:
273: $parent = $config->get('HTML.Parent');
274: $def = $this->manager->getElement($parent, true);
275: if ($def) {
276: $this->info_parent = $parent;
277: $this->info_parent_def = $def;
278: } else {
279: trigger_error(
280: 'Cannot use unrecognized element as parent',
281: E_USER_ERROR
282: );
283: $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
284: }
285:
286: // support template text
287: $support = "(for information on implementing this, see the support forums) ";
288:
289: // setup allowed elements -----------------------------------------
290:
291: $allowed_elements = $config->get('HTML.AllowedElements');
292: $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
293:
294: if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
295: $allowed = $config->get('HTML.Allowed');
296: if (is_string($allowed)) {
297: list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
298: }
299: }
300:
301: if (is_array($allowed_elements)) {
302: foreach ($this->info as $name => $d) {
303: if (!isset($allowed_elements[$name])) {
304: unset($this->info[$name]);
305: }
306: unset($allowed_elements[$name]);
307: }
308: // emit errors
309: foreach ($allowed_elements as $element => $d) {
310: $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
311: trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
312: }
313: }
314:
315: // setup allowed attributes ---------------------------------------
316:
317: $allowed_attributes_mutable = $allowed_attributes; // by copy!
318: if (is_array($allowed_attributes)) {
319: // This actually doesn't do anything, since we went away from
320: // global attributes. It's possible that userland code uses
321: // it, but HTMLModuleManager doesn't!
322: foreach ($this->info_global_attr as $attr => $x) {
323: $keys = array($attr, "*@$attr", "*.$attr");
324: $delete = true;
325: foreach ($keys as $key) {
326: if ($delete && isset($allowed_attributes[$key])) {
327: $delete = false;
328: }
329: if (isset($allowed_attributes_mutable[$key])) {
330: unset($allowed_attributes_mutable[$key]);
331: }
332: }
333: if ($delete) {
334: unset($this->info_global_attr[$attr]);
335: }
336: }
337:
338: foreach ($this->info as $tag => $info) {
339: foreach ($info->attr as $attr => $x) {
340: $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
341: $delete = true;
342: foreach ($keys as $key) {
343: if ($delete && isset($allowed_attributes[$key])) {
344: $delete = false;
345: }
346: if (isset($allowed_attributes_mutable[$key])) {
347: unset($allowed_attributes_mutable[$key]);
348: }
349: }
350: if ($delete) {
351: if ($this->info[$tag]->attr[$attr]->required) {
352: trigger_error(
353: "Required attribute '$attr' in element '$tag' " .
354: "was not allowed, which means '$tag' will not be allowed either",
355: E_USER_WARNING
356: );
357: }
358: unset($this->info[$tag]->attr[$attr]);
359: }
360: }
361: }
362: // emit errors
363: foreach ($allowed_attributes_mutable as $elattr => $d) {
364: $bits = preg_split('/[.@]/', $elattr, 2);
365: $c = count($bits);
366: switch ($c) {
367: case 2:
368: if ($bits[0] !== '*') {
369: $element = htmlspecialchars($bits[0]);
370: $attribute = htmlspecialchars($bits[1]);
371: if (!isset($this->info[$element])) {
372: trigger_error(
373: "Cannot allow attribute '$attribute' if element " .
374: "'$element' is not allowed/supported $support"
375: );
376: } else {
377: trigger_error(
378: "Attribute '$attribute' in element '$element' not supported $support",
379: E_USER_WARNING
380: );
381: }
382: break;
383: }
384: // otherwise fall through
385: case 1:
386: $attribute = htmlspecialchars($bits[0]);
387: trigger_error(
388: "Global attribute '$attribute' is not ".
389: "supported in any elements $support",
390: E_USER_WARNING
391: );
392: break;
393: }
394: }
395: }
396:
397: // setup forbidden elements ---------------------------------------
398:
399: $forbidden_elements = $config->get('HTML.ForbiddenElements');
400: $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
401:
402: foreach ($this->info as $tag => $info) {
403: if (isset($forbidden_elements[$tag])) {
404: unset($this->info[$tag]);
405: continue;
406: }
407: foreach ($info->attr as $attr => $x) {
408: if (isset($forbidden_attributes["$tag@$attr"]) ||
409: isset($forbidden_attributes["*@$attr"]) ||
410: isset($forbidden_attributes[$attr])
411: ) {
412: unset($this->info[$tag]->attr[$attr]);
413: continue;
414: } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually
415: // $tag.$attr are not user supplied, so no worries!
416: trigger_error(
417: "Error with $tag.$attr: tag.attr syntax not supported for " .
418: "HTML.ForbiddenAttributes; use tag@attr instead",
419: E_USER_WARNING
420: );
421: }
422: }
423: }
424: foreach ($forbidden_attributes as $key => $v) {
425: if (strlen($key) < 2) {
426: continue;
427: }
428: if ($key[0] != '*') {
429: continue;
430: }
431: if ($key[1] == '.') {
432: trigger_error(
433: "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",
434: E_USER_WARNING
435: );
436: }
437: }
438:
439: // setup injectors -----------------------------------------------------
440: foreach ($this->info_injector as $i => $injector) {
441: if ($injector->checkNeeded($config) !== false) {
442: // remove injector that does not have it's required
443: // elements/attributes present, and is thus not needed.
444: unset($this->info_injector[$i]);
445: }
446: }
447: }
448:
449: /**
450: * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
451: * separate lists for processing. Format is element[attr1|attr2],element2...
452: * @warning Although it's largely drawn from TinyMCE's implementation,
453: * it is different, and you'll probably have to modify your lists
454: * @param array $list String list to parse
455: * @return array
456: * @todo Give this its own class, probably static interface
457: */
458: public function parseTinyMCEAllowedList($list)
459: {
460: $list = str_replace(array(' ', "\t"), '', $list);
461:
462: $elements = array();
463: $attributes = array();
464:
465: $chunks = preg_split('/(,|[\n\r]+)/', $list);
466: foreach ($chunks as $chunk) {
467: if (empty($chunk)) {
468: continue;
469: }
470: // remove TinyMCE element control characters
471: if (!strpos($chunk, '[')) {
472: $element = $chunk;
473: $attr = false;
474: } else {
475: list($element, $attr) = explode('[', $chunk);
476: }
477: if ($element !== '*') {
478: $elements[$element] = true;
479: }
480: if (!$attr) {
481: continue;
482: }
483: $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
484: $attr = explode('|', $attr);
485: foreach ($attr as $key) {
486: $attributes["$element.$key"] = true;
487: }
488: }
489: return array($elements, $attributes);
490: }
491: }
492:
493: // vim: et sw=4 sts=4
494: