| 1: | <?php | 
| 2: | |
| 3: | /** | 
| 4: | * Represents an XHTML 1.1 module, with information on elements, tags | 
| 5: | * and attributes. | 
| 6: | * @note Even though this is technically XHTML 1.1, it is also used for | 
| 7: | * regular HTML parsing. We are using modulization as a convenient | 
| 8: | * way to represent the internals of HTMLDefinition, and our | 
| 9: | * implementation is by no means conforming and does not directly | 
| 10: | * use the normative DTDs or XML schemas. | 
| 11: | * @note The public variables in a module should almost directly | 
| 12: | * correspond to the variables in HTMLPurifier_HTMLDefinition. | 
| 13: | * However, the prefix info carries no special meaning in these | 
| 14: | * objects (include it anyway if that's the correspondence though). | 
| 15: | * @todo Consider making some member functions protected | 
| 16: | */ | 
| 17: | |
| 18: | class HTMLPurifier_HTMLModule | 
| 19: | { | 
| 20: | |
| 21: | // -- Overloadable ---------------------------------------------------- | 
| 22: | |
| 23: | /** | 
| 24: | * Short unique string identifier of the module. | 
| 25: | * @type string | 
| 26: | */ | 
| 27: | public $name; | 
| 28: | |
| 29: | /** | 
| 30: | * Informally, a list of elements this module changes. | 
| 31: | * Not used in any significant way. | 
| 32: | * @type array | 
| 33: | */ | 
| 34: | public $elements = array(); | 
| 35: | |
| 36: | /** | 
| 37: | * Associative array of element names to element definitions. | 
| 38: | * Some definitions may be incomplete, to be merged in later | 
| 39: | * with the full definition. | 
| 40: | * @type array | 
| 41: | */ | 
| 42: | public $info = array(); | 
| 43: | |
| 44: | /** | 
| 45: | * Associative array of content set names to content set additions. | 
| 46: | * This is commonly used to, say, add an A element to the Inline | 
| 47: | * content set. This corresponds to an internal variable $content_sets | 
| 48: | * and NOT info_content_sets member variable of HTMLDefinition. | 
| 49: | * @type array | 
| 50: | */ | 
| 51: | public $content_sets = array(); | 
| 52: | |
| 53: | /** | 
| 54: | * Associative array of attribute collection names to attribute | 
| 55: | * collection additions. More rarely used for adding attributes to | 
| 56: | * the global collections. Example is the StyleAttribute module adding | 
| 57: | * the style attribute to the Core. Corresponds to HTMLDefinition's | 
| 58: | * attr_collections->info, since the object's data is only info, | 
| 59: | * with extra behavior associated with it. | 
| 60: | * @type array | 
| 61: | */ | 
| 62: | public $attr_collections = array(); | 
| 63: | |
| 64: | /** | 
| 65: | * Associative array of deprecated tag name to HTMLPurifier_TagTransform. | 
| 66: | * @type array | 
| 67: | */ | 
| 68: | public $info_tag_transform = array(); | 
| 69: | |
| 70: | /** | 
| 71: | * List of HTMLPurifier_AttrTransform to be performed before validation. | 
| 72: | * @type array | 
| 73: | */ | 
| 74: | public $info_attr_transform_pre = array(); | 
| 75: | |
| 76: | /** | 
| 77: | * List of HTMLPurifier_AttrTransform to be performed after validation. | 
| 78: | * @type array | 
| 79: | */ | 
| 80: | public $info_attr_transform_post = array(); | 
| 81: | |
| 82: | /** | 
| 83: | * List of HTMLPurifier_Injector to be performed during well-formedness fixing. | 
| 84: | * An injector will only be invoked if all of it's pre-requisites are met; | 
| 85: | * if an injector fails setup, there will be no error; it will simply be | 
| 86: | * silently disabled. | 
| 87: | * @type array | 
| 88: | */ | 
| 89: | public $info_injector = array(); | 
| 90: | |
| 91: | /** | 
| 92: | * Boolean flag that indicates whether or not getChildDef is implemented. | 
| 93: | * For optimization reasons: may save a call to a function. Be sure | 
| 94: | * to set it if you do implement getChildDef(), otherwise it will have | 
| 95: | * no effect! | 
| 96: | * @type bool | 
| 97: | */ | 
| 98: | public $defines_child_def = false; | 
| 99: | |
| 100: | /** | 
| 101: | * Boolean flag whether or not this module is safe. If it is not safe, all | 
| 102: | * of its members are unsafe. Modules are safe by default (this might be | 
| 103: | * slightly dangerous, but it doesn't make much sense to force HTML Purifier, | 
| 104: | * which is based off of safe HTML, to explicitly say, "This is safe," even | 
| 105: | * though there are modules which are "unsafe") | 
| 106: | * | 
| 107: | * @type bool | 
| 108: | * @note Previously, safety could be applied at an element level granularity. | 
| 109: | * We've removed this ability, so in order to add "unsafe" elements | 
| 110: | * or attributes, a dedicated module with this property set to false | 
| 111: | * must be used. | 
| 112: | */ | 
| 113: | public $safe = true; | 
| 114: | |
| 115: | /** | 
| 116: | * Retrieves a proper HTMLPurifier_ChildDef subclass based on | 
| 117: | * content_model and content_model_type member variables of | 
| 118: | * the HTMLPurifier_ElementDef class. There is a similar function | 
| 119: | * in HTMLPurifier_HTMLDefinition. | 
| 120: | * @param HTMLPurifier_ElementDef $def | 
| 121: | * @return HTMLPurifier_ChildDef subclass | 
| 122: | */ | 
| 123: | public function getChildDef($def) | 
| 124: | { | 
| 125: | return false; | 
| 126: | } | 
| 127: | |
| 128: | // -- Convenience ----------------------------------------------------- | 
| 129: | |
| 130: | /** | 
| 131: | * Convenience function that sets up a new element | 
| 132: | * @param string $element Name of element to add | 
| 133: | * @param string|bool $type What content set should element be registered to? | 
| 134: | * Set as false to skip this step. | 
| 135: | * @param string|HTMLPurifier_ChildDef $contents Allowed children in form of: | 
| 136: | * "$content_model_type: $content_model" | 
| 137: | * @param array|string $attr_includes What attribute collections to register to | 
| 138: | * element? | 
| 139: | * @param array $attr What unique attributes does the element define? | 
| 140: | * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters. | 
| 141: | * @return HTMLPurifier_ElementDef Created element definition object, so you | 
| 142: | * can set advanced parameters | 
| 143: | */ | 
| 144: | public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) | 
| 145: | { | 
| 146: | $this->elements[] = $element; | 
| 147: | // parse content_model | 
| 148: | list($content_model_type, $content_model) = $this->parseContents($contents); | 
| 149: | // merge in attribute inclusions | 
| 150: | $this->mergeInAttrIncludes($attr, $attr_includes); | 
| 151: | // add element to content sets | 
| 152: | if ($type) { | 
| 153: | $this->addElementToContentSet($element, $type); | 
| 154: | } | 
| 155: | // create element | 
| 156: | $this->info[$element] = HTMLPurifier_ElementDef::create( | 
| 157: | $content_model, | 
| 158: | $content_model_type, | 
| 159: | $attr | 
| 160: | ); | 
| 161: | // literal object $contents means direct child manipulation | 
| 162: | if (!is_string($contents)) { | 
| 163: | $this->info[$element]->child = $contents; | 
| 164: | } | 
| 165: | return $this->info[$element]; | 
| 166: | } | 
| 167: | |
| 168: | /** | 
| 169: | * Convenience function that creates a totally blank, non-standalone | 
| 170: | * element. | 
| 171: | * @param string $element Name of element to create | 
| 172: | * @return HTMLPurifier_ElementDef Created element | 
| 173: | */ | 
| 174: | public function addBlankElement($element) | 
| 175: | { | 
| 176: | if (!isset($this->info[$element])) { | 
| 177: | $this->elements[] = $element; | 
| 178: | $this->info[$element] = new HTMLPurifier_ElementDef(); | 
| 179: | $this->info[$element]->standalone = false; | 
| 180: | } else { | 
| 181: | trigger_error("Definition for $element already exists in module, cannot redefine"); | 
| 182: | } | 
| 183: | return $this->info[$element]; | 
| 184: | } | 
| 185: | |
| 186: | /** | 
| 187: | * Convenience function that registers an element to a content set | 
| 188: | * @param string $element Element to register | 
| 189: | * @param string $type Name content set (warning: case sensitive, usually upper-case | 
| 190: | * first letter) | 
| 191: | */ | 
| 192: | public function addElementToContentSet($element, $type) | 
| 193: | { | 
| 194: | if (!isset($this->content_sets[$type])) { | 
| 195: | $this->content_sets[$type] = ''; | 
| 196: | } else { | 
| 197: | $this->content_sets[$type] .= ' | '; | 
| 198: | } | 
| 199: | $this->content_sets[$type] .= $element; | 
| 200: | } | 
| 201: | |
| 202: | /** | 
| 203: | * Convenience function that transforms single-string contents | 
| 204: | * into separate content model and content model type | 
| 205: | * @param string $contents Allowed children in form of: | 
| 206: | * "$content_model_type: $content_model" | 
| 207: | * @return array | 
| 208: | * @note If contents is an object, an array of two nulls will be | 
| 209: | * returned, and the callee needs to take the original $contents | 
| 210: | * and use it directly. | 
| 211: | */ | 
| 212: | public function parseContents($contents) | 
| 213: | { | 
| 214: | if (!is_string($contents)) { | 
| 215: | return array(null, null); | 
| 216: | } // defer | 
| 217: | switch ($contents) { | 
| 218: | // check for shorthand content model forms | 
| 219: | case 'Empty': | 
| 220: | return array('empty', ''); | 
| 221: | case 'Inline': | 
| 222: | return array('optional', 'Inline | #PCDATA'); | 
| 223: | case 'Flow': | 
| 224: | return array('optional', 'Flow | #PCDATA'); | 
| 225: | } | 
| 226: | list($content_model_type, $content_model) = explode(':', $contents); | 
| 227: | $content_model_type = strtolower(trim($content_model_type)); | 
| 228: | $content_model = trim($content_model); | 
| 229: | return array($content_model_type, $content_model); | 
| 230: | } | 
| 231: | |
| 232: | /** | 
| 233: | * Convenience function that merges a list of attribute includes into | 
| 234: | * an attribute array. | 
| 235: | * @param array $attr Reference to attr array to modify | 
| 236: | * @param array $attr_includes Array of includes / string include to merge in | 
| 237: | */ | 
| 238: | public function mergeInAttrIncludes(&$attr, $attr_includes) | 
| 239: | { | 
| 240: | if (!is_array($attr_includes)) { | 
| 241: | if (empty($attr_includes)) { | 
| 242: | $attr_includes = array(); | 
| 243: | } else { | 
| 244: | $attr_includes = array($attr_includes); | 
| 245: | } | 
| 246: | } | 
| 247: | $attr[0] = $attr_includes; | 
| 248: | } | 
| 249: | |
| 250: | /** | 
| 251: | * Convenience function that generates a lookup table with boolean | 
| 252: | * true as value. | 
| 253: | * @param string $list List of values to turn into a lookup | 
| 254: | * @note You can also pass an arbitrary number of arguments in | 
| 255: | * place of the regular argument | 
| 256: | * @return array array equivalent of list | 
| 257: | */ | 
| 258: | public function makeLookup($list) | 
| 259: | { | 
| 260: | $args = func_get_args(); | 
| 261: | if (is_string($list)) { | 
| 262: | $list = $args; | 
| 263: | } | 
| 264: | $ret = array(); | 
| 265: | foreach ($list as $value) { | 
| 266: | if (is_null($value)) { | 
| 267: | continue; | 
| 268: | } | 
| 269: | $ret[$value] = true; | 
| 270: | } | 
| 271: | return $ret; | 
| 272: | } | 
| 273: | |
| 274: | /** | 
| 275: | * Lazy load construction of the module after determining whether | 
| 276: | * or not it's needed, and also when a finalized configuration object | 
| 277: | * is available. | 
| 278: | * @param HTMLPurifier_Config $config | 
| 279: | */ | 
| 280: | public function setup($config) | 
| 281: | { | 
| 282: | } | 
| 283: | } | 
| 284: | |
| 285: | // vim: et sw=4 sts=4 | 
| 286: |