1: <?php
2:
3: /**
4: * HTML Purifier's internal representation of a URI.
5: * @note
6: * Internal data-structures are completely escaped. If the data needs
7: * to be used in a non-URI context (which is very unlikely), be sure
8: * to decode it first. The URI may not necessarily be well-formed until
9: * validate() is called.
10: */
11: class HTMLPurifier_URI
12: {
13: /**
14: * @type string
15: */
16: public $scheme;
17:
18: /**
19: * @type string
20: */
21: public $userinfo;
22:
23: /**
24: * @type string
25: */
26: public $host;
27:
28: /**
29: * @type int
30: */
31: public $port;
32:
33: /**
34: * @type string
35: */
36: public $path;
37:
38: /**
39: * @type string
40: */
41: public $query;
42:
43: /**
44: * @type string
45: */
46: public $fragment;
47:
48: /**
49: * @param string $scheme
50: * @param string $userinfo
51: * @param string $host
52: * @param int $port
53: * @param string $path
54: * @param string $query
55: * @param string $fragment
56: * @note Automatically normalizes scheme and port
57: */
58: public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
59: {
60: $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
61: $this->userinfo = $userinfo;
62: $this->host = $host;
63: $this->port = is_null($port) ? $port : (int)$port;
64: $this->path = $path;
65: $this->query = $query;
66: $this->fragment = $fragment;
67: }
68:
69: /**
70: * Retrieves a scheme object corresponding to the URI's scheme/default
71: * @param HTMLPurifier_Config $config
72: * @param HTMLPurifier_Context $context
73: * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
74: */
75: public function getSchemeObj($config, $context)
76: {
77: $registry = HTMLPurifier_URISchemeRegistry::instance();
78: if ($this->scheme !== null) {
79: $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
80: if (!$scheme_obj) {
81: return false;
82: } // invalid scheme, clean it out
83: } else {
84: // no scheme: retrieve the default one
85: $def = $config->getDefinition('URI');
86: $scheme_obj = $def->getDefaultScheme($config, $context);
87: if (!$scheme_obj) {
88: if ($def->defaultScheme !== null) {
89: // something funky happened to the default scheme object
90: trigger_error(
91: 'Default scheme object "' . $def->defaultScheme . '" was not readable',
92: E_USER_WARNING
93: );
94: } // suppress error if it's null
95: return false;
96: }
97: }
98: return $scheme_obj;
99: }
100:
101: /**
102: * Generic validation method applicable for all schemes. May modify
103: * this URI in order to get it into a compliant form.
104: * @param HTMLPurifier_Config $config
105: * @param HTMLPurifier_Context $context
106: * @return bool True if validation/filtering succeeds, false if failure
107: */
108: public function validate($config, $context)
109: {
110: // ABNF definitions from RFC 3986
111: $chars_sub_delims = '!$&\'()*+,;=';
112: $chars_gen_delims = ':/?#[]@';
113: $chars_pchar = $chars_sub_delims . ':@';
114:
115: // validate host
116: if (!is_null($this->host)) {
117: $host_def = new HTMLPurifier_AttrDef_URI_Host();
118: $this->host = $host_def->validate($this->host, $config, $context);
119: if ($this->host === false) {
120: $this->host = null;
121: }
122: }
123:
124: // validate scheme
125: // NOTE: It's not appropriate to check whether or not this
126: // scheme is in our registry, since a URIFilter may convert a
127: // URI that we don't allow into one we do. So instead, we just
128: // check if the scheme can be dropped because there is no host
129: // and it is our default scheme.
130: if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
131: // support for relative paths is pretty abysmal when the
132: // scheme is present, so axe it when possible
133: $def = $config->getDefinition('URI');
134: if ($def->defaultScheme === $this->scheme) {
135: $this->scheme = null;
136: }
137: }
138:
139: // validate username
140: if (!is_null($this->userinfo)) {
141: $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
142: $this->userinfo = $encoder->encode($this->userinfo);
143: }
144:
145: // validate port
146: if (!is_null($this->port)) {
147: if ($this->port < 1 || $this->port > 65535) {
148: $this->port = null;
149: }
150: }
151:
152: // validate path
153: $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
154: if (!is_null($this->host)) { // this catches $this->host === ''
155: // path-abempty (hier and relative)
156: // http://www.example.com/my/path
157: // //www.example.com/my/path (looks odd, but works, and
158: // recognized by most browsers)
159: // (this set is valid or invalid on a scheme by scheme
160: // basis, so we'll deal with it later)
161: // file:///my/path
162: // ///my/path
163: $this->path = $segments_encoder->encode($this->path);
164: } elseif ($this->path !== '') {
165: if ($this->path[0] === '/') {
166: // path-absolute (hier and relative)
167: // http:/my/path
168: // /my/path
169: if (strlen($this->path) >= 2 && $this->path[1] === '/') {
170: // This could happen if both the host gets stripped
171: // out
172: // http://my/path
173: // //my/path
174: $this->path = '';
175: } else {
176: $this->path = $segments_encoder->encode($this->path);
177: }
178: } elseif (!is_null($this->scheme)) {
179: // path-rootless (hier)
180: // http:my/path
181: // Short circuit evaluation means we don't need to check nz
182: $this->path = $segments_encoder->encode($this->path);
183: } else {
184: // path-noscheme (relative)
185: // my/path
186: // (once again, not checking nz)
187: $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
188: $c = strpos($this->path, '/');
189: if ($c !== false) {
190: $this->path =
191: $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
192: $segments_encoder->encode(substr($this->path, $c));
193: } else {
194: $this->path = $segment_nc_encoder->encode($this->path);
195: }
196: }
197: } else {
198: // path-empty (hier and relative)
199: $this->path = ''; // just to be safe
200: }
201:
202: // qf = query and fragment
203: $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
204:
205: if (!is_null($this->query)) {
206: $this->query = $qf_encoder->encode($this->query);
207: }
208:
209: if (!is_null($this->fragment)) {
210: $this->fragment = $qf_encoder->encode($this->fragment);
211: }
212: return true;
213: }
214:
215: /**
216: * Convert URI back to string
217: * @return string URI appropriate for output
218: */
219: public function toString()
220: {
221: // reconstruct authority
222: $authority = null;
223: // there is a rendering difference between a null authority
224: // (http:foo-bar) and an empty string authority
225: // (http:///foo-bar).
226: if (!is_null($this->host)) {
227: $authority = '';
228: if (!is_null($this->userinfo)) {
229: $authority .= $this->userinfo . '@';
230: }
231: $authority .= $this->host;
232: if (!is_null($this->port)) {
233: $authority .= ':' . $this->port;
234: }
235: }
236:
237: // Reconstruct the result
238: // One might wonder about parsing quirks from browsers after
239: // this reconstruction. Unfortunately, parsing behavior depends
240: // on what *scheme* was employed (file:///foo is handled *very*
241: // differently than http:///foo), so unfortunately we have to
242: // defer to the schemes to do the right thing.
243: $result = '';
244: if (!is_null($this->scheme)) {
245: $result .= $this->scheme . ':';
246: }
247: if (!is_null($authority)) {
248: $result .= '//' . $authority;
249: }
250: $result .= $this->path;
251: if (!is_null($this->query)) {
252: $result .= '?' . $this->query;
253: }
254: if (!is_null($this->fragment)) {
255: $result .= '#' . $this->fragment;
256: }
257:
258: return $result;
259: }
260:
261: /**
262: * Returns true if this URL might be considered a 'local' URL given
263: * the current context. This is true when the host is null, or
264: * when it matches the host supplied to the configuration.
265: *
266: * Note that this does not do any scheme checking, so it is mostly
267: * only appropriate for metadata that doesn't care about protocol
268: * security. isBenign is probably what you actually want.
269: * @param HTMLPurifier_Config $config
270: * @param HTMLPurifier_Context $context
271: * @return bool
272: */
273: public function isLocal($config, $context)
274: {
275: if ($this->host === null) {
276: return true;
277: }
278: $uri_def = $config->getDefinition('URI');
279: if ($uri_def->host === $this->host) {
280: return true;
281: }
282: return false;
283: }
284:
285: /**
286: * Returns true if this URL should be considered a 'benign' URL,
287: * that is:
288: *
289: * - It is a local URL (isLocal), and
290: * - It has a equal or better level of security
291: * @param HTMLPurifier_Config $config
292: * @param HTMLPurifier_Context $context
293: * @return bool
294: */
295: public function isBenign($config, $context)
296: {
297: if (!$this->isLocal($config, $context)) {
298: return false;
299: }
300:
301: $scheme_obj = $this->getSchemeObj($config, $context);
302: if (!$scheme_obj) {
303: return false;
304: } // conservative approach
305:
306: $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
307: if ($current_scheme_obj->secure) {
308: if (!$scheme_obj->secure) {
309: return false;
310: }
311: }
312: return true;
313: }
314: }
315:
316: // vim: et sw=4 sts=4
317: