1: <?php
2:
3: /**
4: * Class that handles operations involving percent-encoding in URIs.
5: *
6: * @warning
7: * Be careful when reusing instances of PercentEncoder. The object
8: * you use for normalize() SHOULD NOT be used for encode(), or
9: * vice-versa.
10: */
11: class HTMLPurifier_PercentEncoder
12: {
13:
14: /**
15: * Reserved characters to preserve when using encode().
16: * @type array
17: */
18: protected $preserve = array();
19:
20: /**
21: * String of characters that should be preserved while using encode().
22: * @param bool $preserve
23: */
24: public function __construct($preserve = false)
25: {
26: // unreserved letters, ought to const-ify
27: for ($i = 48; $i <= 57; $i++) { // digits
28: $this->preserve[$i] = true;
29: }
30: for ($i = 65; $i <= 90; $i++) { // upper-case
31: $this->preserve[$i] = true;
32: }
33: for ($i = 97; $i <= 122; $i++) { // lower-case
34: $this->preserve[$i] = true;
35: }
36: $this->preserve[45] = true; // Dash -
37: $this->preserve[46] = true; // Period .
38: $this->preserve[95] = true; // Underscore _
39: $this->preserve[126]= true; // Tilde ~
40:
41: // extra letters not to escape
42: if ($preserve !== false) {
43: for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
44: $this->preserve[ord($preserve[$i])] = true;
45: }
46: }
47: }
48:
49: /**
50: * Our replacement for urlencode, it encodes all non-reserved characters,
51: * as well as any extra characters that were instructed to be preserved.
52: * @note
53: * Assumes that the string has already been normalized, making any
54: * and all percent escape sequences valid. Percents will not be
55: * re-escaped, regardless of their status in $preserve
56: * @param string $string String to be encoded
57: * @return string Encoded string.
58: */
59: public function encode($string)
60: {
61: $ret = '';
62: for ($i = 0, $c = strlen($string); $i < $c; $i++) {
63: if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
64: $ret .= '%' . sprintf('%02X', $int);
65: } else {
66: $ret .= $string[$i];
67: }
68: }
69: return $ret;
70: }
71:
72: /**
73: * Fix up percent-encoding by decoding unreserved characters and normalizing.
74: * @warning This function is affected by $preserve, even though the
75: * usual desired behavior is for this not to preserve those
76: * characters. Be careful when reusing instances of PercentEncoder!
77: * @param string $string String to normalize
78: * @return string
79: */
80: public function normalize($string)
81: {
82: if ($string == '') {
83: return '';
84: }
85: $parts = explode('%', $string);
86: $ret = array_shift($parts);
87: foreach ($parts as $part) {
88: $length = strlen($part);
89: if ($length < 2) {
90: $ret .= '%25' . $part;
91: continue;
92: }
93: $encoding = substr($part, 0, 2);
94: $text = substr($part, 2);
95: if (!ctype_xdigit($encoding)) {
96: $ret .= '%25' . $part;
97: continue;
98: }
99: $int = hexdec($encoding);
100: if (isset($this->preserve[$int])) {
101: $ret .= chr($int) . $text;
102: continue;
103: }
104: $encoding = strtoupper($encoding);
105: $ret .= '%' . $encoding . $text;
106: }
107: return $ret;
108: }
109: }
110:
111: // vim: et sw=4 sts=4
112: