1: | <?php |
2: | |
3: | /*! @mainpage |
4: | * |
5: | * HTML Purifier is an HTML filter that will take an arbitrary snippet of |
6: | * HTML and rigorously test, validate and filter it into a version that |
7: | * is safe for output onto webpages. It achieves this by: |
8: | * |
9: | * -# Lexing (parsing into tokens) the document, |
10: | * -# Executing various strategies on the tokens: |
11: | * -# Removing all elements not in the whitelist, |
12: | * -# Making the tokens well-formed, |
13: | * -# Fixing the nesting of the nodes, and |
14: | * -# Validating attributes of the nodes; and |
15: | * -# Generating HTML from the purified tokens. |
16: | * |
17: | * However, most users will only need to interface with the HTMLPurifier |
18: | * and HTMLPurifier_Config. |
19: | */ |
20: | |
21: | /* |
22: | HTML Purifier 4.15.0 - Standards Compliant HTML Filtering |
23: | Copyright (C) 2006-2008 Edward Z. Yang |
24: | |
25: | This library is free software; you can redistribute it and/or |
26: | modify it under the terms of the GNU Lesser General Public |
27: | License as published by the Free Software Foundation; either |
28: | version 2.1 of the License, or (at your option) any later version. |
29: | |
30: | This library is distributed in the hope that it will be useful, |
31: | but WITHOUT ANY WARRANTY; without even the implied warranty of |
32: | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
33: | Lesser General Public License for more details. |
34: | |
35: | You should have received a copy of the GNU Lesser General Public |
36: | License along with this library; if not, write to the Free Software |
37: | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
38: | */ |
39: | |
40: | /** |
41: | * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. |
42: | * |
43: | * @note There are several points in which configuration can be specified |
44: | * for HTML Purifier. The precedence of these (from lowest to |
45: | * highest) is as follows: |
46: | * -# Instance: new HTMLPurifier($config) |
47: | * -# Invocation: purify($html, $config) |
48: | * These configurations are entirely independent of each other and |
49: | * are *not* merged (this behavior may change in the future). |
50: | * |
51: | * @todo We need an easier way to inject strategies using the configuration |
52: | * object. |
53: | */ |
54: | class HTMLPurifier |
55: | { |
56: | |
57: | /** |
58: | * Version of HTML Purifier. |
59: | * @type string |
60: | */ |
61: | public $version = '4.15.0'; |
62: | |
63: | /** |
64: | * Constant with version of HTML Purifier. |
65: | */ |
66: | const VERSION = '4.15.0'; |
67: | |
68: | /** |
69: | * Global configuration object. |
70: | * @type HTMLPurifier_Config |
71: | */ |
72: | public $config; |
73: | |
74: | /** |
75: | * Array of extra filter objects to run on HTML, |
76: | * for backwards compatibility. |
77: | * @type HTMLPurifier_Filter[] |
78: | */ |
79: | private $filters = array(); |
80: | |
81: | /** |
82: | * Single instance of HTML Purifier. |
83: | * @type HTMLPurifier |
84: | */ |
85: | private static $instance; |
86: | |
87: | /** |
88: | * @type HTMLPurifier_Strategy_Core |
89: | */ |
90: | protected $strategy; |
91: | |
92: | /** |
93: | * @type HTMLPurifier_Generator |
94: | */ |
95: | protected $generator; |
96: | |
97: | /** |
98: | * Resultant context of last run purification. |
99: | * Is an array of contexts if the last called method was purifyArray(). |
100: | * @type HTMLPurifier_Context |
101: | */ |
102: | public $context; |
103: | |
104: | /** |
105: | * Initializes the purifier. |
106: | * |
107: | * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object |
108: | * for all instances of the purifier, if omitted, a default |
109: | * configuration is supplied (which can be overridden on a |
110: | * per-use basis). |
111: | * The parameter can also be any type that |
112: | * HTMLPurifier_Config::create() supports. |
113: | */ |
114: | public function __construct($config = null) |
115: | { |
116: | $this->config = HTMLPurifier_Config::create($config); |
117: | $this->strategy = new HTMLPurifier_Strategy_Core(); |
118: | } |
119: | |
120: | /** |
121: | * Adds a filter to process the output. First come first serve |
122: | * |
123: | * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object |
124: | */ |
125: | public function addFilter($filter) |
126: | { |
127: | trigger_error( |
128: | 'HTMLPurifier->addFilter() is deprecated, use configuration directives' . |
129: | ' in the Filter namespace or Filter.Custom', |
130: | E_USER_WARNING |
131: | ); |
132: | $this->filters[] = $filter; |
133: | } |
134: | |
135: | /** |
136: | * Filters an HTML snippet/document to be XSS-free and standards-compliant. |
137: | * |
138: | * @param string $html String of HTML to purify |
139: | * @param HTMLPurifier_Config $config Config object for this operation, |
140: | * if omitted, defaults to the config object specified during this |
141: | * object's construction. The parameter can also be any type |
142: | * that HTMLPurifier_Config::create() supports. |
143: | * |
144: | * @return string Purified HTML |
145: | */ |
146: | public function purify($html, $config = null) |
147: | { |
148: | // :TODO: make the config merge in, instead of replace |
149: | $config = $config ? HTMLPurifier_Config::create($config) : $this->config; |
150: | |
151: | // implementation is partially environment dependant, partially |
152: | // configuration dependant |
153: | $lexer = HTMLPurifier_Lexer::create($config); |
154: | |
155: | $context = new HTMLPurifier_Context(); |
156: | |
157: | // setup HTML generator |
158: | $this->generator = new HTMLPurifier_Generator($config, $context); |
159: | $context->register('Generator', $this->generator); |
160: | |
161: | // set up global context variables |
162: | if ($config->get('Core.CollectErrors')) { |
163: | // may get moved out if other facilities use it |
164: | $language_factory = HTMLPurifier_LanguageFactory::instance(); |
165: | $language = $language_factory->create($config, $context); |
166: | $context->register('Locale', $language); |
167: | |
168: | $error_collector = new HTMLPurifier_ErrorCollector($context); |
169: | $context->register('ErrorCollector', $error_collector); |
170: | } |
171: | |
172: | // setup id_accumulator context, necessary due to the fact that |
173: | // AttrValidator can be called from many places |
174: | $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); |
175: | $context->register('IDAccumulator', $id_accumulator); |
176: | |
177: | $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); |
178: | |
179: | // setup filters |
180: | $filter_flags = $config->getBatch('Filter'); |
181: | $custom_filters = $filter_flags['Custom']; |
182: | unset($filter_flags['Custom']); |
183: | $filters = array(); |
184: | foreach ($filter_flags as $filter => $flag) { |
185: | if (!$flag) { |
186: | continue; |
187: | } |
188: | if (strpos($filter, '.') !== false) { |
189: | continue; |
190: | } |
191: | $class = "HTMLPurifier_Filter_$filter"; |
192: | $filters[] = new $class; |
193: | } |
194: | foreach ($custom_filters as $filter) { |
195: | // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat |
196: | $filters[] = $filter; |
197: | } |
198: | $filters = array_merge($filters, $this->filters); |
199: | // maybe prepare(), but later |
200: | |
201: | for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { |
202: | $html = $filters[$i]->preFilter($html, $config, $context); |
203: | } |
204: | |
205: | // purified HTML |
206: | $html = |
207: | $this->generator->generateFromTokens( |
208: | // list of tokens |
209: | $this->strategy->execute( |
210: | // list of un-purified tokens |
211: | $lexer->tokenizeHTML( |
212: | // un-purified HTML |
213: | $html, |
214: | $config, |
215: | $context |
216: | ), |
217: | $config, |
218: | $context |
219: | ) |
220: | ); |
221: | |
222: | for ($i = $filter_size - 1; $i >= 0; $i--) { |
223: | $html = $filters[$i]->postFilter($html, $config, $context); |
224: | } |
225: | |
226: | $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); |
227: | $this->context =& $context; |
228: | return $html; |
229: | } |
230: | |
231: | /** |
232: | * Filters an array of HTML snippets |
233: | * |
234: | * @param string[] $array_of_html Array of html snippets |
235: | * @param HTMLPurifier_Config $config Optional config object for this operation. |
236: | * See HTMLPurifier::purify() for more details. |
237: | * |
238: | * @return string[] Array of purified HTML |
239: | */ |
240: | public function purifyArray($array_of_html, $config = null) |
241: | { |
242: | $context_array = array(); |
243: | $array = array(); |
244: | foreach($array_of_html as $key=>$value){ |
245: | if (is_array($value)) { |
246: | $array[$key] = $this->purifyArray($value, $config); |
247: | } else { |
248: | $array[$key] = $this->purify($value, $config); |
249: | } |
250: | $context_array[$key] = $this->context; |
251: | } |
252: | $this->context = $context_array; |
253: | return $array; |
254: | } |
255: | |
256: | /** |
257: | * Singleton for enforcing just one HTML Purifier in your system |
258: | * |
259: | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype |
260: | * HTMLPurifier instance to overload singleton with, |
261: | * or HTMLPurifier_Config instance to configure the |
262: | * generated version with. |
263: | * |
264: | * @return HTMLPurifier |
265: | */ |
266: | public static function instance($prototype = null) |
267: | { |
268: | if (!self::$instance || $prototype) { |
269: | if ($prototype instanceof HTMLPurifier) { |
270: | self::$instance = $prototype; |
271: | } elseif ($prototype) { |
272: | self::$instance = new HTMLPurifier($prototype); |
273: | } else { |
274: | self::$instance = new HTMLPurifier(); |
275: | } |
276: | } |
277: | return self::$instance; |
278: | } |
279: | |
280: | /** |
281: | * Singleton for enforcing just one HTML Purifier in your system |
282: | * |
283: | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype |
284: | * HTMLPurifier instance to overload singleton with, |
285: | * or HTMLPurifier_Config instance to configure the |
286: | * generated version with. |
287: | * |
288: | * @return HTMLPurifier |
289: | * @note Backwards compatibility, see instance() |
290: | */ |
291: | public static function getInstance($prototype = null) |
292: | { |
293: | return HTMLPurifier::instance($prototype); |
294: | } |
295: | } |
296: | |
297: | // vim: et sw=4 sts=4 |
298: |