1: <?php
2:
3: class HTMLPurifier_HTMLModuleManager
4: {
5:
6: /**
7: * @type HTMLPurifier_DoctypeRegistry
8: */
9: public $doctypes;
10:
11: /**
12: * Instance of current doctype.
13: * @type string
14: */
15: public $doctype;
16:
17: /**
18: * @type HTMLPurifier_AttrTypes
19: */
20: public $attrTypes;
21:
22: /**
23: * Active instances of modules for the specified doctype are
24: * indexed, by name, in this array.
25: * @type HTMLPurifier_HTMLModule[]
26: */
27: public $modules = array();
28:
29: /**
30: * Array of recognized HTMLPurifier_HTMLModule instances,
31: * indexed by module's class name. This array is usually lazy loaded, but a
32: * user can overload a module by pre-emptively registering it.
33: * @type HTMLPurifier_HTMLModule[]
34: */
35: public $registeredModules = array();
36:
37: /**
38: * List of extra modules that were added by the user
39: * using addModule(). These get unconditionally merged into the current doctype, whatever
40: * it may be.
41: * @type HTMLPurifier_HTMLModule[]
42: */
43: public $userModules = array();
44:
45: /**
46: * Associative array of element name to list of modules that have
47: * definitions for the element; this array is dynamically filled.
48: * @type array
49: */
50: public $elementLookup = array();
51:
52: /**
53: * List of prefixes we should use for registering small names.
54: * @type array
55: */
56: public $prefixes = array('HTMLPurifier_HTMLModule_');
57:
58: /**
59: * @type HTMLPurifier_ContentSets
60: */
61: public $contentSets;
62:
63: /**
64: * @type HTMLPurifier_AttrCollections
65: */
66: public $attrCollections;
67:
68: /**
69: * If set to true, unsafe elements and attributes will be allowed.
70: * @type bool
71: */
72: public $trusted = false;
73:
74: public function __construct()
75: {
76: // editable internal objects
77: $this->attrTypes = new HTMLPurifier_AttrTypes();
78: $this->doctypes = new HTMLPurifier_DoctypeRegistry();
79:
80: // setup basic modules
81: $common = array(
82: 'CommonAttributes', 'Text', 'Hypertext', 'List',
83: 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
84: 'StyleAttribute',
85: // Unsafe:
86: 'Scripting', 'Object', 'Forms',
87: // Sorta legacy, but present in strict:
88: 'Name',
89: );
90: $transitional = array('Legacy', 'Target', 'Iframe');
91: $xml = array('XMLCommonAttributes');
92: $non_xml = array('NonXMLCommonAttributes');
93:
94: // setup basic doctypes
95: $this->doctypes->register(
96: 'HTML 4.01 Transitional',
97: false,
98: array_merge($common, $transitional, $non_xml),
99: array('Tidy_Transitional', 'Tidy_Proprietary'),
100: array(),
101: '-//W3C//DTD HTML 4.01 Transitional//EN',
102: 'http://www.w3.org/TR/html4/loose.dtd'
103: );
104:
105: $this->doctypes->register(
106: 'HTML 4.01 Strict',
107: false,
108: array_merge($common, $non_xml),
109: array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
110: array(),
111: '-//W3C//DTD HTML 4.01//EN',
112: 'http://www.w3.org/TR/html4/strict.dtd'
113: );
114:
115: $this->doctypes->register(
116: 'XHTML 1.0 Transitional',
117: true,
118: array_merge($common, $transitional, $xml, $non_xml),
119: array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
120: array(),
121: '-//W3C//DTD XHTML 1.0 Transitional//EN',
122: 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
123: );
124:
125: $this->doctypes->register(
126: 'XHTML 1.0 Strict',
127: true,
128: array_merge($common, $xml, $non_xml),
129: array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
130: array(),
131: '-//W3C//DTD XHTML 1.0 Strict//EN',
132: 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
133: );
134:
135: $this->doctypes->register(
136: 'XHTML 1.1',
137: true,
138: // Iframe is a real XHTML 1.1 module, despite being
139: // "transitional"!
140: array_merge($common, $xml, array('Ruby', 'Iframe')),
141: array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
142: array(),
143: '-//W3C//DTD XHTML 1.1//EN',
144: 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
145: );
146:
147: }
148:
149: /**
150: * Registers a module to the recognized module list, useful for
151: * overloading pre-existing modules.
152: * @param $module Mixed: string module name, with or without
153: * HTMLPurifier_HTMLModule prefix, or instance of
154: * subclass of HTMLPurifier_HTMLModule.
155: * @param $overload Boolean whether or not to overload previous modules.
156: * If this is not set, and you do overload a module,
157: * HTML Purifier will complain with a warning.
158: * @note This function will not call autoload, you must instantiate
159: * (and thus invoke) autoload outside the method.
160: * @note If a string is passed as a module name, different variants
161: * will be tested in this order:
162: * - Check for HTMLPurifier_HTMLModule_$name
163: * - Check all prefixes with $name in order they were added
164: * - Check for literal object name
165: * - Throw fatal error
166: * If your object name collides with an internal class, specify
167: * your module manually. All modules must have been included
168: * externally: registerModule will not perform inclusions for you!
169: */
170: public function registerModule($module, $overload = false)
171: {
172: if (is_string($module)) {
173: // attempt to load the module
174: $original_module = $module;
175: $ok = false;
176: foreach ($this->prefixes as $prefix) {
177: $module = $prefix . $original_module;
178: if (class_exists($module)) {
179: $ok = true;
180: break;
181: }
182: }
183: if (!$ok) {
184: $module = $original_module;
185: if (!class_exists($module)) {
186: trigger_error(
187: $original_module . ' module does not exist',
188: E_USER_ERROR
189: );
190: return;
191: }
192: }
193: $module = new $module();
194: }
195: if (empty($module->name)) {
196: trigger_error('Module instance of ' . get_class($module) . ' must have name');
197: return;
198: }
199: if (!$overload && isset($this->registeredModules[$module->name])) {
200: trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
201: }
202: $this->registeredModules[$module->name] = $module;
203: }
204:
205: /**
206: * Adds a module to the current doctype by first registering it,
207: * and then tacking it on to the active doctype
208: */
209: public function addModule($module)
210: {
211: $this->registerModule($module);
212: if (is_object($module)) {
213: $module = $module->name;
214: }
215: $this->userModules[] = $module;
216: }
217:
218: /**
219: * Adds a class prefix that registerModule() will use to resolve a
220: * string name to a concrete class
221: */
222: public function addPrefix($prefix)
223: {
224: $this->prefixes[] = $prefix;
225: }
226:
227: /**
228: * Performs processing on modules, after being called you may
229: * use getElement() and getElements()
230: * @param HTMLPurifier_Config $config
231: */
232: public function setup($config)
233: {
234: $this->trusted = $config->get('HTML.Trusted');
235:
236: // generate
237: $this->doctype = $this->doctypes->make($config);
238: $modules = $this->doctype->modules;
239:
240: // take out the default modules that aren't allowed
241: $lookup = $config->get('HTML.AllowedModules');
242: $special_cases = $config->get('HTML.CoreModules');
243:
244: if (is_array($lookup)) {
245: foreach ($modules as $k => $m) {
246: if (isset($special_cases[$m])) {
247: continue;
248: }
249: if (!isset($lookup[$m])) {
250: unset($modules[$k]);
251: }
252: }
253: }
254:
255: // custom modules
256: if ($config->get('HTML.Proprietary')) {
257: $modules[] = 'Proprietary';
258: }
259: if ($config->get('HTML.SafeObject')) {
260: $modules[] = 'SafeObject';
261: }
262: if ($config->get('HTML.SafeEmbed')) {
263: $modules[] = 'SafeEmbed';
264: }
265: if ($config->get('HTML.SafeScripting') !== array()) {
266: $modules[] = 'SafeScripting';
267: }
268: if ($config->get('HTML.Nofollow')) {
269: $modules[] = 'Nofollow';
270: }
271: if ($config->get('HTML.TargetBlank')) {
272: $modules[] = 'TargetBlank';
273: }
274: // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank
275: // so that its post-attr-transform gets run afterwards.
276: if ($config->get('HTML.TargetNoreferrer')) {
277: $modules[] = 'TargetNoreferrer';
278: }
279: if ($config->get('HTML.TargetNoopener')) {
280: $modules[] = 'TargetNoopener';
281: }
282:
283: // merge in custom modules
284: $modules = array_merge($modules, $this->userModules);
285:
286: foreach ($modules as $module) {
287: $this->processModule($module);
288: $this->modules[$module]->setup($config);
289: }
290:
291: foreach ($this->doctype->tidyModules as $module) {
292: $this->processModule($module);
293: $this->modules[$module]->setup($config);
294: }
295:
296: // prepare any injectors
297: foreach ($this->modules as $module) {
298: $n = array();
299: foreach ($module->info_injector as $injector) {
300: if (!is_object($injector)) {
301: $class = "HTMLPurifier_Injector_$injector";
302: $injector = new $class;
303: }
304: $n[$injector->name] = $injector;
305: }
306: $module->info_injector = $n;
307: }
308:
309: // setup lookup table based on all valid modules
310: foreach ($this->modules as $module) {
311: foreach ($module->info as $name => $def) {
312: if (!isset($this->elementLookup[$name])) {
313: $this->elementLookup[$name] = array();
314: }
315: $this->elementLookup[$name][] = $module->name;
316: }
317: }
318:
319: // note the different choice
320: $this->contentSets = new HTMLPurifier_ContentSets(
321: // content set assembly deals with all possible modules,
322: // not just ones deemed to be "safe"
323: $this->modules
324: );
325: $this->attrCollections = new HTMLPurifier_AttrCollections(
326: $this->attrTypes,
327: // there is no way to directly disable a global attribute,
328: // but using AllowedAttributes or simply not including
329: // the module in your custom doctype should be sufficient
330: $this->modules
331: );
332: }
333:
334: /**
335: * Takes a module and adds it to the active module collection,
336: * registering it if necessary.
337: */
338: public function processModule($module)
339: {
340: if (!isset($this->registeredModules[$module]) || is_object($module)) {
341: $this->registerModule($module);
342: }
343: $this->modules[$module] = $this->registeredModules[$module];
344: }
345:
346: /**
347: * Retrieves merged element definitions.
348: * @return Array of HTMLPurifier_ElementDef
349: */
350: public function getElements()
351: {
352: $elements = array();
353: foreach ($this->modules as $module) {
354: if (!$this->trusted && !$module->safe) {
355: continue;
356: }
357: foreach ($module->info as $name => $v) {
358: if (isset($elements[$name])) {
359: continue;
360: }
361: $elements[$name] = $this->getElement($name);
362: }
363: }
364:
365: // remove dud elements, this happens when an element that
366: // appeared to be safe actually wasn't
367: foreach ($elements as $n => $v) {
368: if ($v === false) {
369: unset($elements[$n]);
370: }
371: }
372:
373: return $elements;
374:
375: }
376:
377: /**
378: * Retrieves a single merged element definition
379: * @param string $name Name of element
380: * @param bool $trusted Boolean trusted overriding parameter: set to true
381: * if you want the full version of an element
382: * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef
383: * @note You may notice that modules are getting iterated over twice (once
384: * in getElements() and once here). This
385: * is because
386: */
387: public function getElement($name, $trusted = null)
388: {
389: if (!isset($this->elementLookup[$name])) {
390: return false;
391: }
392:
393: // setup global state variables
394: $def = false;
395: if ($trusted === null) {
396: $trusted = $this->trusted;
397: }
398:
399: // iterate through each module that has registered itself to this
400: // element
401: foreach ($this->elementLookup[$name] as $module_name) {
402: $module = $this->modules[$module_name];
403:
404: // refuse to create/merge from a module that is deemed unsafe--
405: // pretend the module doesn't exist--when trusted mode is not on.
406: if (!$trusted && !$module->safe) {
407: continue;
408: }
409:
410: // clone is used because, ideally speaking, the original
411: // definition should not be modified. Usually, this will
412: // make no difference, but for consistency's sake
413: $new_def = clone $module->info[$name];
414:
415: if (!$def && $new_def->standalone) {
416: $def = $new_def;
417: } elseif ($def) {
418: // This will occur even if $new_def is standalone. In practice,
419: // this will usually result in a full replacement.
420: $def->mergeIn($new_def);
421: } else {
422: // :TODO:
423: // non-standalone definitions that don't have a standalone
424: // to merge into could be deferred to the end
425: // HOWEVER, it is perfectly valid for a non-standalone
426: // definition to lack a standalone definition, even
427: // after all processing: this allows us to safely
428: // specify extra attributes for elements that may not be
429: // enabled all in one place. In particular, this might
430: // be the case for trusted elements. WARNING: care must
431: // be taken that the /extra/ definitions are all safe.
432: continue;
433: }
434:
435: // attribute value expansions
436: $this->attrCollections->performInclusions($def->attr);
437: $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
438:
439: // descendants_are_inline, for ChildDef_Chameleon
440: if (is_string($def->content_model) &&
441: strpos($def->content_model, 'Inline') !== false) {
442: if ($name != 'del' && $name != 'ins') {
443: // this is for you, ins/del
444: $def->descendants_are_inline = true;
445: }
446: }
447:
448: $this->contentSets->generateChildDef($def, $module);
449: }
450:
451: // This can occur if there is a blank definition, but no base to
452: // mix it in with
453: if (!$def) {
454: return false;
455: }
456:
457: // add information on required attributes
458: foreach ($def->attr as $attr_name => $attr_def) {
459: if ($attr_def->required) {
460: $def->required_attr[] = $attr_name;
461: }
462: }
463: return $def;
464: }
465: }
466:
467: // vim: et sw=4 sts=4
468: