1: <?php
2:
3: /*! @mainpage
4: *
5: * HTML Purifier is an HTML filter that will take an arbitrary snippet of
6: * HTML and rigorously test, validate and filter it into a version that
7: * is safe for output onto webpages. It achieves this by:
8: *
9: * -# Lexing (parsing into tokens) the document,
10: * -# Executing various strategies on the tokens:
11: * -# Removing all elements not in the whitelist,
12: * -# Making the tokens well-formed,
13: * -# Fixing the nesting of the nodes, and
14: * -# Validating attributes of the nodes; and
15: * -# Generating HTML from the purified tokens.
16: *
17: * However, most users will only need to interface with the HTMLPurifier
18: * and HTMLPurifier_Config.
19: */
20:
21: /*
22: HTML Purifier 4.15.0 - Standards Compliant HTML Filtering
23: Copyright (C) 2006-2008 Edward Z. Yang
24:
25: This library is free software; you can redistribute it and/or
26: modify it under the terms of the GNU Lesser General Public
27: License as published by the Free Software Foundation; either
28: version 2.1 of the License, or (at your option) any later version.
29:
30: This library is distributed in the hope that it will be useful,
31: but WITHOUT ANY WARRANTY; without even the implied warranty of
32: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
33: Lesser General Public License for more details.
34:
35: You should have received a copy of the GNU Lesser General Public
36: License along with this library; if not, write to the Free Software
37: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
38: */
39:
40: /**
41: * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
42: *
43: * @note There are several points in which configuration can be specified
44: * for HTML Purifier. The precedence of these (from lowest to
45: * highest) is as follows:
46: * -# Instance: new HTMLPurifier($config)
47: * -# Invocation: purify($html, $config)
48: * These configurations are entirely independent of each other and
49: * are *not* merged (this behavior may change in the future).
50: *
51: * @todo We need an easier way to inject strategies using the configuration
52: * object.
53: */
54: class HTMLPurifier
55: {
56:
57: /**
58: * Version of HTML Purifier.
59: * @type string
60: */
61: public $version = '4.15.0';
62:
63: /**
64: * Constant with version of HTML Purifier.
65: */
66: const VERSION = '4.15.0';
67:
68: /**
69: * Global configuration object.
70: * @type HTMLPurifier_Config
71: */
72: public $config;
73:
74: /**
75: * Array of extra filter objects to run on HTML,
76: * for backwards compatibility.
77: * @type HTMLPurifier_Filter[]
78: */
79: private $filters = array();
80:
81: /**
82: * Single instance of HTML Purifier.
83: * @type HTMLPurifier
84: */
85: private static $instance;
86:
87: /**
88: * @type HTMLPurifier_Strategy_Core
89: */
90: protected $strategy;
91:
92: /**
93: * @type HTMLPurifier_Generator
94: */
95: protected $generator;
96:
97: /**
98: * Resultant context of last run purification.
99: * Is an array of contexts if the last called method was purifyArray().
100: * @type HTMLPurifier_Context
101: */
102: public $context;
103:
104: /**
105: * Initializes the purifier.
106: *
107: * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
108: * for all instances of the purifier, if omitted, a default
109: * configuration is supplied (which can be overridden on a
110: * per-use basis).
111: * The parameter can also be any type that
112: * HTMLPurifier_Config::create() supports.
113: */
114: public function __construct($config = null)
115: {
116: $this->config = HTMLPurifier_Config::create($config);
117: $this->strategy = new HTMLPurifier_Strategy_Core();
118: }
119:
120: /**
121: * Adds a filter to process the output. First come first serve
122: *
123: * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
124: */
125: public function addFilter($filter)
126: {
127: trigger_error(
128: 'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
129: ' in the Filter namespace or Filter.Custom',
130: E_USER_WARNING
131: );
132: $this->filters[] = $filter;
133: }
134:
135: /**
136: * Filters an HTML snippet/document to be XSS-free and standards-compliant.
137: *
138: * @param string $html String of HTML to purify
139: * @param HTMLPurifier_Config $config Config object for this operation,
140: * if omitted, defaults to the config object specified during this
141: * object's construction. The parameter can also be any type
142: * that HTMLPurifier_Config::create() supports.
143: *
144: * @return string Purified HTML
145: */
146: public function purify($html, $config = null)
147: {
148: // :TODO: make the config merge in, instead of replace
149: $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
150:
151: // implementation is partially environment dependant, partially
152: // configuration dependant
153: $lexer = HTMLPurifier_Lexer::create($config);
154:
155: $context = new HTMLPurifier_Context();
156:
157: // setup HTML generator
158: $this->generator = new HTMLPurifier_Generator($config, $context);
159: $context->register('Generator', $this->generator);
160:
161: // set up global context variables
162: if ($config->get('Core.CollectErrors')) {
163: // may get moved out if other facilities use it
164: $language_factory = HTMLPurifier_LanguageFactory::instance();
165: $language = $language_factory->create($config, $context);
166: $context->register('Locale', $language);
167:
168: $error_collector = new HTMLPurifier_ErrorCollector($context);
169: $context->register('ErrorCollector', $error_collector);
170: }
171:
172: // setup id_accumulator context, necessary due to the fact that
173: // AttrValidator can be called from many places
174: $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
175: $context->register('IDAccumulator', $id_accumulator);
176:
177: $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
178:
179: // setup filters
180: $filter_flags = $config->getBatch('Filter');
181: $custom_filters = $filter_flags['Custom'];
182: unset($filter_flags['Custom']);
183: $filters = array();
184: foreach ($filter_flags as $filter => $flag) {
185: if (!$flag) {
186: continue;
187: }
188: if (strpos($filter, '.') !== false) {
189: continue;
190: }
191: $class = "HTMLPurifier_Filter_$filter";
192: $filters[] = new $class;
193: }
194: foreach ($custom_filters as $filter) {
195: // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
196: $filters[] = $filter;
197: }
198: $filters = array_merge($filters, $this->filters);
199: // maybe prepare(), but later
200:
201: for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
202: $html = $filters[$i]->preFilter($html, $config, $context);
203: }
204:
205: // purified HTML
206: $html =
207: $this->generator->generateFromTokens(
208: // list of tokens
209: $this->strategy->execute(
210: // list of un-purified tokens
211: $lexer->tokenizeHTML(
212: // un-purified HTML
213: $html,
214: $config,
215: $context
216: ),
217: $config,
218: $context
219: )
220: );
221:
222: for ($i = $filter_size - 1; $i >= 0; $i--) {
223: $html = $filters[$i]->postFilter($html, $config, $context);
224: }
225:
226: $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
227: $this->context =& $context;
228: return $html;
229: }
230:
231: /**
232: * Filters an array of HTML snippets
233: *
234: * @param string[] $array_of_html Array of html snippets
235: * @param HTMLPurifier_Config $config Optional config object for this operation.
236: * See HTMLPurifier::purify() for more details.
237: *
238: * @return string[] Array of purified HTML
239: */
240: public function purifyArray($array_of_html, $config = null)
241: {
242: $context_array = array();
243: $array = array();
244: foreach($array_of_html as $key=>$value){
245: if (is_array($value)) {
246: $array[$key] = $this->purifyArray($value, $config);
247: } else {
248: $array[$key] = $this->purify($value, $config);
249: }
250: $context_array[$key] = $this->context;
251: }
252: $this->context = $context_array;
253: return $array;
254: }
255:
256: /**
257: * Singleton for enforcing just one HTML Purifier in your system
258: *
259: * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
260: * HTMLPurifier instance to overload singleton with,
261: * or HTMLPurifier_Config instance to configure the
262: * generated version with.
263: *
264: * @return HTMLPurifier
265: */
266: public static function instance($prototype = null)
267: {
268: if (!self::$instance || $prototype) {
269: if ($prototype instanceof HTMLPurifier) {
270: self::$instance = $prototype;
271: } elseif ($prototype) {
272: self::$instance = new HTMLPurifier($prototype);
273: } else {
274: self::$instance = new HTMLPurifier();
275: }
276: }
277: return self::$instance;
278: }
279:
280: /**
281: * Singleton for enforcing just one HTML Purifier in your system
282: *
283: * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
284: * HTMLPurifier instance to overload singleton with,
285: * or HTMLPurifier_Config instance to configure the
286: * generated version with.
287: *
288: * @return HTMLPurifier
289: * @note Backwards compatibility, see instance()
290: */
291: public static function getInstance($prototype = null)
292: {
293: return HTMLPurifier::instance($prototype);
294: }
295: }
296:
297: // vim: et sw=4 sts=4
298: