| 1: | <?php |
| 2: | |
| 3: | /*! @mainpage |
| 4: | * |
| 5: | * HTML Purifier is an HTML filter that will take an arbitrary snippet of |
| 6: | * HTML and rigorously test, validate and filter it into a version that |
| 7: | * is safe for output onto webpages. It achieves this by: |
| 8: | * |
| 9: | * -# Lexing (parsing into tokens) the document, |
| 10: | * -# Executing various strategies on the tokens: |
| 11: | * -# Removing all elements not in the whitelist, |
| 12: | * -# Making the tokens well-formed, |
| 13: | * -# Fixing the nesting of the nodes, and |
| 14: | * -# Validating attributes of the nodes; and |
| 15: | * -# Generating HTML from the purified tokens. |
| 16: | * |
| 17: | * However, most users will only need to interface with the HTMLPurifier |
| 18: | * and HTMLPurifier_Config. |
| 19: | */ |
| 20: | |
| 21: | /* |
| 22: | HTML Purifier 4.15.0 - Standards Compliant HTML Filtering |
| 23: | Copyright (C) 2006-2008 Edward Z. Yang |
| 24: | |
| 25: | This library is free software; you can redistribute it and/or |
| 26: | modify it under the terms of the GNU Lesser General Public |
| 27: | License as published by the Free Software Foundation; either |
| 28: | version 2.1 of the License, or (at your option) any later version. |
| 29: | |
| 30: | This library is distributed in the hope that it will be useful, |
| 31: | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 32: | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 33: | Lesser General Public License for more details. |
| 34: | |
| 35: | You should have received a copy of the GNU Lesser General Public |
| 36: | License along with this library; if not, write to the Free Software |
| 37: | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 38: | */ |
| 39: | |
| 40: | /** |
| 41: | * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. |
| 42: | * |
| 43: | * @note There are several points in which configuration can be specified |
| 44: | * for HTML Purifier. The precedence of these (from lowest to |
| 45: | * highest) is as follows: |
| 46: | * -# Instance: new HTMLPurifier($config) |
| 47: | * -# Invocation: purify($html, $config) |
| 48: | * These configurations are entirely independent of each other and |
| 49: | * are *not* merged (this behavior may change in the future). |
| 50: | * |
| 51: | * @todo We need an easier way to inject strategies using the configuration |
| 52: | * object. |
| 53: | */ |
| 54: | class HTMLPurifier |
| 55: | { |
| 56: | |
| 57: | /** |
| 58: | * Version of HTML Purifier. |
| 59: | * @type string |
| 60: | */ |
| 61: | public $version = '4.15.0'; |
| 62: | |
| 63: | /** |
| 64: | * Constant with version of HTML Purifier. |
| 65: | */ |
| 66: | const VERSION = '4.15.0'; |
| 67: | |
| 68: | /** |
| 69: | * Global configuration object. |
| 70: | * @type HTMLPurifier_Config |
| 71: | */ |
| 72: | public $config; |
| 73: | |
| 74: | /** |
| 75: | * Array of extra filter objects to run on HTML, |
| 76: | * for backwards compatibility. |
| 77: | * @type HTMLPurifier_Filter[] |
| 78: | */ |
| 79: | private $filters = array(); |
| 80: | |
| 81: | /** |
| 82: | * Single instance of HTML Purifier. |
| 83: | * @type HTMLPurifier |
| 84: | */ |
| 85: | private static $instance; |
| 86: | |
| 87: | /** |
| 88: | * @type HTMLPurifier_Strategy_Core |
| 89: | */ |
| 90: | protected $strategy; |
| 91: | |
| 92: | /** |
| 93: | * @type HTMLPurifier_Generator |
| 94: | */ |
| 95: | protected $generator; |
| 96: | |
| 97: | /** |
| 98: | * Resultant context of last run purification. |
| 99: | * Is an array of contexts if the last called method was purifyArray(). |
| 100: | * @type HTMLPurifier_Context |
| 101: | */ |
| 102: | public $context; |
| 103: | |
| 104: | /** |
| 105: | * Initializes the purifier. |
| 106: | * |
| 107: | * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object |
| 108: | * for all instances of the purifier, if omitted, a default |
| 109: | * configuration is supplied (which can be overridden on a |
| 110: | * per-use basis). |
| 111: | * The parameter can also be any type that |
| 112: | * HTMLPurifier_Config::create() supports. |
| 113: | */ |
| 114: | public function __construct($config = null) |
| 115: | { |
| 116: | $this->config = HTMLPurifier_Config::create($config); |
| 117: | $this->strategy = new HTMLPurifier_Strategy_Core(); |
| 118: | } |
| 119: | |
| 120: | /** |
| 121: | * Adds a filter to process the output. First come first serve |
| 122: | * |
| 123: | * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object |
| 124: | */ |
| 125: | public function addFilter($filter) |
| 126: | { |
| 127: | trigger_error( |
| 128: | 'HTMLPurifier->addFilter() is deprecated, use configuration directives' . |
| 129: | ' in the Filter namespace or Filter.Custom', |
| 130: | E_USER_WARNING |
| 131: | ); |
| 132: | $this->filters[] = $filter; |
| 133: | } |
| 134: | |
| 135: | /** |
| 136: | * Filters an HTML snippet/document to be XSS-free and standards-compliant. |
| 137: | * |
| 138: | * @param string $html String of HTML to purify |
| 139: | * @param HTMLPurifier_Config $config Config object for this operation, |
| 140: | * if omitted, defaults to the config object specified during this |
| 141: | * object's construction. The parameter can also be any type |
| 142: | * that HTMLPurifier_Config::create() supports. |
| 143: | * |
| 144: | * @return string Purified HTML |
| 145: | */ |
| 146: | public function purify($html, $config = null) |
| 147: | { |
| 148: | // :TODO: make the config merge in, instead of replace |
| 149: | $config = $config ? HTMLPurifier_Config::create($config) : $this->config; |
| 150: | |
| 151: | // implementation is partially environment dependant, partially |
| 152: | // configuration dependant |
| 153: | $lexer = HTMLPurifier_Lexer::create($config); |
| 154: | |
| 155: | $context = new HTMLPurifier_Context(); |
| 156: | |
| 157: | // setup HTML generator |
| 158: | $this->generator = new HTMLPurifier_Generator($config, $context); |
| 159: | $context->register('Generator', $this->generator); |
| 160: | |
| 161: | // set up global context variables |
| 162: | if ($config->get('Core.CollectErrors')) { |
| 163: | // may get moved out if other facilities use it |
| 164: | $language_factory = HTMLPurifier_LanguageFactory::instance(); |
| 165: | $language = $language_factory->create($config, $context); |
| 166: | $context->register('Locale', $language); |
| 167: | |
| 168: | $error_collector = new HTMLPurifier_ErrorCollector($context); |
| 169: | $context->register('ErrorCollector', $error_collector); |
| 170: | } |
| 171: | |
| 172: | // setup id_accumulator context, necessary due to the fact that |
| 173: | // AttrValidator can be called from many places |
| 174: | $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); |
| 175: | $context->register('IDAccumulator', $id_accumulator); |
| 176: | |
| 177: | $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); |
| 178: | |
| 179: | // setup filters |
| 180: | $filter_flags = $config->getBatch('Filter'); |
| 181: | $custom_filters = $filter_flags['Custom']; |
| 182: | unset($filter_flags['Custom']); |
| 183: | $filters = array(); |
| 184: | foreach ($filter_flags as $filter => $flag) { |
| 185: | if (!$flag) { |
| 186: | continue; |
| 187: | } |
| 188: | if (strpos($filter, '.') !== false) { |
| 189: | continue; |
| 190: | } |
| 191: | $class = "HTMLPurifier_Filter_$filter"; |
| 192: | $filters[] = new $class; |
| 193: | } |
| 194: | foreach ($custom_filters as $filter) { |
| 195: | // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat |
| 196: | $filters[] = $filter; |
| 197: | } |
| 198: | $filters = array_merge($filters, $this->filters); |
| 199: | // maybe prepare(), but later |
| 200: | |
| 201: | for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { |
| 202: | $html = $filters[$i]->preFilter($html, $config, $context); |
| 203: | } |
| 204: | |
| 205: | // purified HTML |
| 206: | $html = |
| 207: | $this->generator->generateFromTokens( |
| 208: | // list of tokens |
| 209: | $this->strategy->execute( |
| 210: | // list of un-purified tokens |
| 211: | $lexer->tokenizeHTML( |
| 212: | // un-purified HTML |
| 213: | $html, |
| 214: | $config, |
| 215: | $context |
| 216: | ), |
| 217: | $config, |
| 218: | $context |
| 219: | ) |
| 220: | ); |
| 221: | |
| 222: | for ($i = $filter_size - 1; $i >= 0; $i--) { |
| 223: | $html = $filters[$i]->postFilter($html, $config, $context); |
| 224: | } |
| 225: | |
| 226: | $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); |
| 227: | $this->context =& $context; |
| 228: | return $html; |
| 229: | } |
| 230: | |
| 231: | /** |
| 232: | * Filters an array of HTML snippets |
| 233: | * |
| 234: | * @param string[] $array_of_html Array of html snippets |
| 235: | * @param HTMLPurifier_Config $config Optional config object for this operation. |
| 236: | * See HTMLPurifier::purify() for more details. |
| 237: | * |
| 238: | * @return string[] Array of purified HTML |
| 239: | */ |
| 240: | public function purifyArray($array_of_html, $config = null) |
| 241: | { |
| 242: | $context_array = array(); |
| 243: | $array = array(); |
| 244: | foreach($array_of_html as $key=>$value){ |
| 245: | if (is_array($value)) { |
| 246: | $array[$key] = $this->purifyArray($value, $config); |
| 247: | } else { |
| 248: | $array[$key] = $this->purify($value, $config); |
| 249: | } |
| 250: | $context_array[$key] = $this->context; |
| 251: | } |
| 252: | $this->context = $context_array; |
| 253: | return $array; |
| 254: | } |
| 255: | |
| 256: | /** |
| 257: | * Singleton for enforcing just one HTML Purifier in your system |
| 258: | * |
| 259: | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype |
| 260: | * HTMLPurifier instance to overload singleton with, |
| 261: | * or HTMLPurifier_Config instance to configure the |
| 262: | * generated version with. |
| 263: | * |
| 264: | * @return HTMLPurifier |
| 265: | */ |
| 266: | public static function instance($prototype = null) |
| 267: | { |
| 268: | if (!self::$instance || $prototype) { |
| 269: | if ($prototype instanceof HTMLPurifier) { |
| 270: | self::$instance = $prototype; |
| 271: | } elseif ($prototype) { |
| 272: | self::$instance = new HTMLPurifier($prototype); |
| 273: | } else { |
| 274: | self::$instance = new HTMLPurifier(); |
| 275: | } |
| 276: | } |
| 277: | return self::$instance; |
| 278: | } |
| 279: | |
| 280: | /** |
| 281: | * Singleton for enforcing just one HTML Purifier in your system |
| 282: | * |
| 283: | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype |
| 284: | * HTMLPurifier instance to overload singleton with, |
| 285: | * or HTMLPurifier_Config instance to configure the |
| 286: | * generated version with. |
| 287: | * |
| 288: | * @return HTMLPurifier |
| 289: | * @note Backwards compatibility, see instance() |
| 290: | */ |
| 291: | public static function getInstance($prototype = null) |
| 292: | { |
| 293: | return HTMLPurifier::instance($prototype); |
| 294: | } |
| 295: | } |
| 296: | |
| 297: | // vim: et sw=4 sts=4 |
| 298: |