1: <?php
2:
3: /**
4: * Represents an XHTML 1.1 module, with information on elements, tags
5: * and attributes.
6: * @note Even though this is technically XHTML 1.1, it is also used for
7: * regular HTML parsing. We are using modulization as a convenient
8: * way to represent the internals of HTMLDefinition, and our
9: * implementation is by no means conforming and does not directly
10: * use the normative DTDs or XML schemas.
11: * @note The public variables in a module should almost directly
12: * correspond to the variables in HTMLPurifier_HTMLDefinition.
13: * However, the prefix info carries no special meaning in these
14: * objects (include it anyway if that's the correspondence though).
15: * @todo Consider making some member functions protected
16: */
17:
18: class HTMLPurifier_HTMLModule
19: {
20:
21: // -- Overloadable ----------------------------------------------------
22:
23: /**
24: * Short unique string identifier of the module.
25: * @type string
26: */
27: public $name;
28:
29: /**
30: * Informally, a list of elements this module changes.
31: * Not used in any significant way.
32: * @type array
33: */
34: public $elements = array();
35:
36: /**
37: * Associative array of element names to element definitions.
38: * Some definitions may be incomplete, to be merged in later
39: * with the full definition.
40: * @type array
41: */
42: public $info = array();
43:
44: /**
45: * Associative array of content set names to content set additions.
46: * This is commonly used to, say, add an A element to the Inline
47: * content set. This corresponds to an internal variable $content_sets
48: * and NOT info_content_sets member variable of HTMLDefinition.
49: * @type array
50: */
51: public $content_sets = array();
52:
53: /**
54: * Associative array of attribute collection names to attribute
55: * collection additions. More rarely used for adding attributes to
56: * the global collections. Example is the StyleAttribute module adding
57: * the style attribute to the Core. Corresponds to HTMLDefinition's
58: * attr_collections->info, since the object's data is only info,
59: * with extra behavior associated with it.
60: * @type array
61: */
62: public $attr_collections = array();
63:
64: /**
65: * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
66: * @type array
67: */
68: public $info_tag_transform = array();
69:
70: /**
71: * List of HTMLPurifier_AttrTransform to be performed before validation.
72: * @type array
73: */
74: public $info_attr_transform_pre = array();
75:
76: /**
77: * List of HTMLPurifier_AttrTransform to be performed after validation.
78: * @type array
79: */
80: public $info_attr_transform_post = array();
81:
82: /**
83: * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
84: * An injector will only be invoked if all of it's pre-requisites are met;
85: * if an injector fails setup, there will be no error; it will simply be
86: * silently disabled.
87: * @type array
88: */
89: public $info_injector = array();
90:
91: /**
92: * Boolean flag that indicates whether or not getChildDef is implemented.
93: * For optimization reasons: may save a call to a function. Be sure
94: * to set it if you do implement getChildDef(), otherwise it will have
95: * no effect!
96: * @type bool
97: */
98: public $defines_child_def = false;
99:
100: /**
101: * Boolean flag whether or not this module is safe. If it is not safe, all
102: * of its members are unsafe. Modules are safe by default (this might be
103: * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
104: * which is based off of safe HTML, to explicitly say, "This is safe," even
105: * though there are modules which are "unsafe")
106: *
107: * @type bool
108: * @note Previously, safety could be applied at an element level granularity.
109: * We've removed this ability, so in order to add "unsafe" elements
110: * or attributes, a dedicated module with this property set to false
111: * must be used.
112: */
113: public $safe = true;
114:
115: /**
116: * Retrieves a proper HTMLPurifier_ChildDef subclass based on
117: * content_model and content_model_type member variables of
118: * the HTMLPurifier_ElementDef class. There is a similar function
119: * in HTMLPurifier_HTMLDefinition.
120: * @param HTMLPurifier_ElementDef $def
121: * @return HTMLPurifier_ChildDef subclass
122: */
123: public function getChildDef($def)
124: {
125: return false;
126: }
127:
128: // -- Convenience -----------------------------------------------------
129:
130: /**
131: * Convenience function that sets up a new element
132: * @param string $element Name of element to add
133: * @param string|bool $type What content set should element be registered to?
134: * Set as false to skip this step.
135: * @param string|HTMLPurifier_ChildDef $contents Allowed children in form of:
136: * "$content_model_type: $content_model"
137: * @param array|string $attr_includes What attribute collections to register to
138: * element?
139: * @param array $attr What unique attributes does the element define?
140: * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
141: * @return HTMLPurifier_ElementDef Created element definition object, so you
142: * can set advanced parameters
143: */
144: public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
145: {
146: $this->elements[] = $element;
147: // parse content_model
148: list($content_model_type, $content_model) = $this->parseContents($contents);
149: // merge in attribute inclusions
150: $this->mergeInAttrIncludes($attr, $attr_includes);
151: // add element to content sets
152: if ($type) {
153: $this->addElementToContentSet($element, $type);
154: }
155: // create element
156: $this->info[$element] = HTMLPurifier_ElementDef::create(
157: $content_model,
158: $content_model_type,
159: $attr
160: );
161: // literal object $contents means direct child manipulation
162: if (!is_string($contents)) {
163: $this->info[$element]->child = $contents;
164: }
165: return $this->info[$element];
166: }
167:
168: /**
169: * Convenience function that creates a totally blank, non-standalone
170: * element.
171: * @param string $element Name of element to create
172: * @return HTMLPurifier_ElementDef Created element
173: */
174: public function addBlankElement($element)
175: {
176: if (!isset($this->info[$element])) {
177: $this->elements[] = $element;
178: $this->info[$element] = new HTMLPurifier_ElementDef();
179: $this->info[$element]->standalone = false;
180: } else {
181: trigger_error("Definition for $element already exists in module, cannot redefine");
182: }
183: return $this->info[$element];
184: }
185:
186: /**
187: * Convenience function that registers an element to a content set
188: * @param string $element Element to register
189: * @param string $type Name content set (warning: case sensitive, usually upper-case
190: * first letter)
191: */
192: public function addElementToContentSet($element, $type)
193: {
194: if (!isset($this->content_sets[$type])) {
195: $this->content_sets[$type] = '';
196: } else {
197: $this->content_sets[$type] .= ' | ';
198: }
199: $this->content_sets[$type] .= $element;
200: }
201:
202: /**
203: * Convenience function that transforms single-string contents
204: * into separate content model and content model type
205: * @param string $contents Allowed children in form of:
206: * "$content_model_type: $content_model"
207: * @return array
208: * @note If contents is an object, an array of two nulls will be
209: * returned, and the callee needs to take the original $contents
210: * and use it directly.
211: */
212: public function parseContents($contents)
213: {
214: if (!is_string($contents)) {
215: return array(null, null);
216: } // defer
217: switch ($contents) {
218: // check for shorthand content model forms
219: case 'Empty':
220: return array('empty', '');
221: case 'Inline':
222: return array('optional', 'Inline | #PCDATA');
223: case 'Flow':
224: return array('optional', 'Flow | #PCDATA');
225: }
226: list($content_model_type, $content_model) = explode(':', $contents);
227: $content_model_type = strtolower(trim($content_model_type));
228: $content_model = trim($content_model);
229: return array($content_model_type, $content_model);
230: }
231:
232: /**
233: * Convenience function that merges a list of attribute includes into
234: * an attribute array.
235: * @param array $attr Reference to attr array to modify
236: * @param array $attr_includes Array of includes / string include to merge in
237: */
238: public function mergeInAttrIncludes(&$attr, $attr_includes)
239: {
240: if (!is_array($attr_includes)) {
241: if (empty($attr_includes)) {
242: $attr_includes = array();
243: } else {
244: $attr_includes = array($attr_includes);
245: }
246: }
247: $attr[0] = $attr_includes;
248: }
249:
250: /**
251: * Convenience function that generates a lookup table with boolean
252: * true as value.
253: * @param string $list List of values to turn into a lookup
254: * @note You can also pass an arbitrary number of arguments in
255: * place of the regular argument
256: * @return array array equivalent of list
257: */
258: public function makeLookup($list)
259: {
260: $args = func_get_args();
261: if (is_string($list)) {
262: $list = $args;
263: }
264: $ret = array();
265: foreach ($list as $value) {
266: if (is_null($value)) {
267: continue;
268: }
269: $ret[$value] = true;
270: }
271: return $ret;
272: }
273:
274: /**
275: * Lazy load construction of the module after determining whether
276: * or not it's needed, and also when a finalized configuration object
277: * is available.
278: * @param HTMLPurifier_Config $config
279: */
280: public function setup($config)
281: {
282: }
283: }
284:
285: // vim: et sw=4 sts=4
286: