| 1: | <?php |
| 2: | |
| 3: | /** |
| 4: | * Represents an XHTML 1.1 module, with information on elements, tags |
| 5: | * and attributes. |
| 6: | * @note Even though this is technically XHTML 1.1, it is also used for |
| 7: | * regular HTML parsing. We are using modulization as a convenient |
| 8: | * way to represent the internals of HTMLDefinition, and our |
| 9: | * implementation is by no means conforming and does not directly |
| 10: | * use the normative DTDs or XML schemas. |
| 11: | * @note The public variables in a module should almost directly |
| 12: | * correspond to the variables in HTMLPurifier_HTMLDefinition. |
| 13: | * However, the prefix info carries no special meaning in these |
| 14: | * objects (include it anyway if that's the correspondence though). |
| 15: | * @todo Consider making some member functions protected |
| 16: | */ |
| 17: | |
| 18: | class HTMLPurifier_HTMLModule |
| 19: | { |
| 20: | |
| 21: | // -- Overloadable ---------------------------------------------------- |
| 22: | |
| 23: | /** |
| 24: | * Short unique string identifier of the module. |
| 25: | * @type string |
| 26: | */ |
| 27: | public $name; |
| 28: | |
| 29: | /** |
| 30: | * Informally, a list of elements this module changes. |
| 31: | * Not used in any significant way. |
| 32: | * @type array |
| 33: | */ |
| 34: | public $elements = array(); |
| 35: | |
| 36: | /** |
| 37: | * Associative array of element names to element definitions. |
| 38: | * Some definitions may be incomplete, to be merged in later |
| 39: | * with the full definition. |
| 40: | * @type array |
| 41: | */ |
| 42: | public $info = array(); |
| 43: | |
| 44: | /** |
| 45: | * Associative array of content set names to content set additions. |
| 46: | * This is commonly used to, say, add an A element to the Inline |
| 47: | * content set. This corresponds to an internal variable $content_sets |
| 48: | * and NOT info_content_sets member variable of HTMLDefinition. |
| 49: | * @type array |
| 50: | */ |
| 51: | public $content_sets = array(); |
| 52: | |
| 53: | /** |
| 54: | * Associative array of attribute collection names to attribute |
| 55: | * collection additions. More rarely used for adding attributes to |
| 56: | * the global collections. Example is the StyleAttribute module adding |
| 57: | * the style attribute to the Core. Corresponds to HTMLDefinition's |
| 58: | * attr_collections->info, since the object's data is only info, |
| 59: | * with extra behavior associated with it. |
| 60: | * @type array |
| 61: | */ |
| 62: | public $attr_collections = array(); |
| 63: | |
| 64: | /** |
| 65: | * Associative array of deprecated tag name to HTMLPurifier_TagTransform. |
| 66: | * @type array |
| 67: | */ |
| 68: | public $info_tag_transform = array(); |
| 69: | |
| 70: | /** |
| 71: | * List of HTMLPurifier_AttrTransform to be performed before validation. |
| 72: | * @type array |
| 73: | */ |
| 74: | public $info_attr_transform_pre = array(); |
| 75: | |
| 76: | /** |
| 77: | * List of HTMLPurifier_AttrTransform to be performed after validation. |
| 78: | * @type array |
| 79: | */ |
| 80: | public $info_attr_transform_post = array(); |
| 81: | |
| 82: | /** |
| 83: | * List of HTMLPurifier_Injector to be performed during well-formedness fixing. |
| 84: | * An injector will only be invoked if all of it's pre-requisites are met; |
| 85: | * if an injector fails setup, there will be no error; it will simply be |
| 86: | * silently disabled. |
| 87: | * @type array |
| 88: | */ |
| 89: | public $info_injector = array(); |
| 90: | |
| 91: | /** |
| 92: | * Boolean flag that indicates whether or not getChildDef is implemented. |
| 93: | * For optimization reasons: may save a call to a function. Be sure |
| 94: | * to set it if you do implement getChildDef(), otherwise it will have |
| 95: | * no effect! |
| 96: | * @type bool |
| 97: | */ |
| 98: | public $defines_child_def = false; |
| 99: | |
| 100: | /** |
| 101: | * Boolean flag whether or not this module is safe. If it is not safe, all |
| 102: | * of its members are unsafe. Modules are safe by default (this might be |
| 103: | * slightly dangerous, but it doesn't make much sense to force HTML Purifier, |
| 104: | * which is based off of safe HTML, to explicitly say, "This is safe," even |
| 105: | * though there are modules which are "unsafe") |
| 106: | * |
| 107: | * @type bool |
| 108: | * @note Previously, safety could be applied at an element level granularity. |
| 109: | * We've removed this ability, so in order to add "unsafe" elements |
| 110: | * or attributes, a dedicated module with this property set to false |
| 111: | * must be used. |
| 112: | */ |
| 113: | public $safe = true; |
| 114: | |
| 115: | /** |
| 116: | * Retrieves a proper HTMLPurifier_ChildDef subclass based on |
| 117: | * content_model and content_model_type member variables of |
| 118: | * the HTMLPurifier_ElementDef class. There is a similar function |
| 119: | * in HTMLPurifier_HTMLDefinition. |
| 120: | * @param HTMLPurifier_ElementDef $def |
| 121: | * @return HTMLPurifier_ChildDef subclass |
| 122: | */ |
| 123: | public function getChildDef($def) |
| 124: | { |
| 125: | return false; |
| 126: | } |
| 127: | |
| 128: | // -- Convenience ----------------------------------------------------- |
| 129: | |
| 130: | /** |
| 131: | * Convenience function that sets up a new element |
| 132: | * @param string $element Name of element to add |
| 133: | * @param string|bool $type What content set should element be registered to? |
| 134: | * Set as false to skip this step. |
| 135: | * @param string|HTMLPurifier_ChildDef $contents Allowed children in form of: |
| 136: | * "$content_model_type: $content_model" |
| 137: | * @param array|string $attr_includes What attribute collections to register to |
| 138: | * element? |
| 139: | * @param array $attr What unique attributes does the element define? |
| 140: | * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters. |
| 141: | * @return HTMLPurifier_ElementDef Created element definition object, so you |
| 142: | * can set advanced parameters |
| 143: | */ |
| 144: | public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) |
| 145: | { |
| 146: | $this->elements[] = $element; |
| 147: | // parse content_model |
| 148: | list($content_model_type, $content_model) = $this->parseContents($contents); |
| 149: | // merge in attribute inclusions |
| 150: | $this->mergeInAttrIncludes($attr, $attr_includes); |
| 151: | // add element to content sets |
| 152: | if ($type) { |
| 153: | $this->addElementToContentSet($element, $type); |
| 154: | } |
| 155: | // create element |
| 156: | $this->info[$element] = HTMLPurifier_ElementDef::create( |
| 157: | $content_model, |
| 158: | $content_model_type, |
| 159: | $attr |
| 160: | ); |
| 161: | // literal object $contents means direct child manipulation |
| 162: | if (!is_string($contents)) { |
| 163: | $this->info[$element]->child = $contents; |
| 164: | } |
| 165: | return $this->info[$element]; |
| 166: | } |
| 167: | |
| 168: | /** |
| 169: | * Convenience function that creates a totally blank, non-standalone |
| 170: | * element. |
| 171: | * @param string $element Name of element to create |
| 172: | * @return HTMLPurifier_ElementDef Created element |
| 173: | */ |
| 174: | public function addBlankElement($element) |
| 175: | { |
| 176: | if (!isset($this->info[$element])) { |
| 177: | $this->elements[] = $element; |
| 178: | $this->info[$element] = new HTMLPurifier_ElementDef(); |
| 179: | $this->info[$element]->standalone = false; |
| 180: | } else { |
| 181: | trigger_error("Definition for $element already exists in module, cannot redefine"); |
| 182: | } |
| 183: | return $this->info[$element]; |
| 184: | } |
| 185: | |
| 186: | /** |
| 187: | * Convenience function that registers an element to a content set |
| 188: | * @param string $element Element to register |
| 189: | * @param string $type Name content set (warning: case sensitive, usually upper-case |
| 190: | * first letter) |
| 191: | */ |
| 192: | public function addElementToContentSet($element, $type) |
| 193: | { |
| 194: | if (!isset($this->content_sets[$type])) { |
| 195: | $this->content_sets[$type] = ''; |
| 196: | } else { |
| 197: | $this->content_sets[$type] .= ' | '; |
| 198: | } |
| 199: | $this->content_sets[$type] .= $element; |
| 200: | } |
| 201: | |
| 202: | /** |
| 203: | * Convenience function that transforms single-string contents |
| 204: | * into separate content model and content model type |
| 205: | * @param string $contents Allowed children in form of: |
| 206: | * "$content_model_type: $content_model" |
| 207: | * @return array |
| 208: | * @note If contents is an object, an array of two nulls will be |
| 209: | * returned, and the callee needs to take the original $contents |
| 210: | * and use it directly. |
| 211: | */ |
| 212: | public function parseContents($contents) |
| 213: | { |
| 214: | if (!is_string($contents)) { |
| 215: | return array(null, null); |
| 216: | } // defer |
| 217: | switch ($contents) { |
| 218: | // check for shorthand content model forms |
| 219: | case 'Empty': |
| 220: | return array('empty', ''); |
| 221: | case 'Inline': |
| 222: | return array('optional', 'Inline | #PCDATA'); |
| 223: | case 'Flow': |
| 224: | return array('optional', 'Flow | #PCDATA'); |
| 225: | } |
| 226: | list($content_model_type, $content_model) = explode(':', $contents); |
| 227: | $content_model_type = strtolower(trim($content_model_type)); |
| 228: | $content_model = trim($content_model); |
| 229: | return array($content_model_type, $content_model); |
| 230: | } |
| 231: | |
| 232: | /** |
| 233: | * Convenience function that merges a list of attribute includes into |
| 234: | * an attribute array. |
| 235: | * @param array $attr Reference to attr array to modify |
| 236: | * @param array $attr_includes Array of includes / string include to merge in |
| 237: | */ |
| 238: | public function mergeInAttrIncludes(&$attr, $attr_includes) |
| 239: | { |
| 240: | if (!is_array($attr_includes)) { |
| 241: | if (empty($attr_includes)) { |
| 242: | $attr_includes = array(); |
| 243: | } else { |
| 244: | $attr_includes = array($attr_includes); |
| 245: | } |
| 246: | } |
| 247: | $attr[0] = $attr_includes; |
| 248: | } |
| 249: | |
| 250: | /** |
| 251: | * Convenience function that generates a lookup table with boolean |
| 252: | * true as value. |
| 253: | * @param string $list List of values to turn into a lookup |
| 254: | * @note You can also pass an arbitrary number of arguments in |
| 255: | * place of the regular argument |
| 256: | * @return array array equivalent of list |
| 257: | */ |
| 258: | public function makeLookup($list) |
| 259: | { |
| 260: | $args = func_get_args(); |
| 261: | if (is_string($list)) { |
| 262: | $list = $args; |
| 263: | } |
| 264: | $ret = array(); |
| 265: | foreach ($list as $value) { |
| 266: | if (is_null($value)) { |
| 267: | continue; |
| 268: | } |
| 269: | $ret[$value] = true; |
| 270: | } |
| 271: | return $ret; |
| 272: | } |
| 273: | |
| 274: | /** |
| 275: | * Lazy load construction of the module after determining whether |
| 276: | * or not it's needed, and also when a finalized configuration object |
| 277: | * is available. |
| 278: | * @param HTMLPurifier_Config $config |
| 279: | */ |
| 280: | public function setup($config) |
| 281: | { |
| 282: | } |
| 283: | } |
| 284: | |
| 285: | // vim: et sw=4 sts=4 |
| 286: |