1: | <?php |
2: | |
3: | /* |
4: | * The MIT License (MIT) |
5: | * |
6: | * Copyright (c) 2013 Jonathan Vollebregt (jnvsor@gmail.com), Rokas Šleinius (raveren@gmail.com) |
7: | * |
8: | * Permission is hereby granted, free of charge, to any person obtaining a copy of |
9: | * this software and associated documentation files (the "Software"), to deal in |
10: | * the Software without restriction, including without limitation the rights to |
11: | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
12: | * the Software, and to permit persons to whom the Software is furnished to do so, |
13: | * subject to the following conditions: |
14: | * |
15: | * The above copyright notice and this permission notice shall be included in all |
16: | * copies or substantial portions of the Software. |
17: | * |
18: | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19: | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
20: | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
21: | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
22: | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
23: | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
24: | */ |
25: | |
26: | namespace Kint\Object; |
27: | |
28: | class BlobObject extends BasicObject |
29: | { |
30: | /** |
31: | * @var array Character encodings to detect |
32: | * |
33: | * @see https://secure.php.net/function.mb-detect-order |
34: | * |
35: | * In practice, mb_detect_encoding can only successfully determine the |
36: | * difference between the following common charsets at once without |
37: | * breaking things for one of the other charsets: |
38: | * - ASCII |
39: | * - UTF-8 |
40: | * - SJIS |
41: | * - EUC-JP |
42: | * |
43: | * The order of the charsets is significant. If you put UTF-8 before ASCII |
44: | * it will never match ASCII, because UTF-8 is a superset of ASCII. |
45: | * Similarly, SJIS and EUC-JP frequently match UTF-8 strings, so you should |
46: | * check UTF-8 first. SJIS and EUC-JP seem to work either way, but SJIS is |
47: | * more common so it should probably be first. |
48: | * |
49: | * While you're free to experiment with other charsets, remember to keep |
50: | * this behavior in mind when setting up your char_encodings array. |
51: | * |
52: | * This depends on the mbstring extension |
53: | */ |
54: | public static $char_encodings = array( |
55: | 'ASCII', |
56: | 'UTF-8', |
57: | ); |
58: | |
59: | /** |
60: | * @var array Legacy character encodings to detect |
61: | * |
62: | * @see https://secure.php.net/function.iconv |
63: | * |
64: | * Assuming the other encoding checks fail, this will perform a |
65: | * simple iconv conversion to check for invalid bytes. If any are |
66: | * found it will not match. |
67: | * |
68: | * This can be useful for ambiguous single byte encodings like |
69: | * windows-125x and iso-8859-x which have practically undetectable |
70: | * differences because they use every single byte available. |
71: | * |
72: | * This is *NOT* reliable and should not be trusted implicitly. As |
73: | * with char_encodings, the order of the charsets is significant. |
74: | * |
75: | * This depends on the iconv extension |
76: | */ |
77: | public static $legacy_encodings = array(); |
78: | |
79: | public $type = 'string'; |
80: | public $encoding = false; |
81: | public $hints = array('string'); |
82: | |
83: | public function getType() |
84: | { |
85: | if (false === $this->encoding) { |
86: | return 'binary '.$this->type; |
87: | } |
88: | |
89: | if ('ASCII' === $this->encoding) { |
90: | return $this->type; |
91: | } |
92: | |
93: | return $this->encoding.' '.$this->type; |
94: | } |
95: | |
96: | public function getValueShort() |
97: | { |
98: | if ($rep = $this->value) { |
99: | return '"'.$rep->contents.'"'; |
100: | } |
101: | } |
102: | |
103: | public function transplant(BasicObject $old) |
104: | { |
105: | parent::transplant($old); |
106: | |
107: | if ($old instanceof self) { |
108: | $this->encoding = $old->encoding; |
109: | } |
110: | } |
111: | |
112: | public static function strlen($string, $encoding = false) |
113: | { |
114: | if (\function_exists('mb_strlen')) { |
115: | if (false === $encoding) { |
116: | $encoding = self::detectEncoding($string); |
117: | } |
118: | |
119: | if ($encoding && 'ASCII' !== $encoding) { |
120: | return \mb_strlen($string, $encoding); |
121: | } |
122: | } |
123: | |
124: | return \strlen($string); |
125: | } |
126: | |
127: | public static function substr($string, $start, $length = null, $encoding = false) |
128: | { |
129: | if (\function_exists('mb_substr')) { |
130: | if (false === $encoding) { |
131: | $encoding = self::detectEncoding($string); |
132: | } |
133: | |
134: | if ($encoding && 'ASCII' !== $encoding) { |
135: | return \mb_substr($string, $start, $length, $encoding); |
136: | } |
137: | } |
138: | |
139: | // Special case for substr/mb_substr discrepancy |
140: | if ('' === $string) { |
141: | return ''; |
142: | } |
143: | |
144: | return \substr($string, $start, isset($length) ? $length : PHP_INT_MAX); |
145: | } |
146: | |
147: | public static function detectEncoding($string) |
148: | { |
149: | if (\function_exists('mb_detect_encoding')) { |
150: | if ($ret = \mb_detect_encoding($string, self::$char_encodings, true)) { |
151: | return $ret; |
152: | } |
153: | } |
154: | |
155: | // Pretty much every character encoding uses first 32 bytes as control |
156: | // characters. If it's not a multi-byte format it's safe to say matching |
157: | // any control character besides tab, nl, and cr means it's binary. |
158: | if (\preg_match('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/', $string)) { |
159: | return false; |
160: | } |
161: | |
162: | if (\function_exists('iconv')) { |
163: | foreach (self::$legacy_encodings as $encoding) { |
164: | if (@\iconv($encoding, $encoding, $string) === $string) { |
165: | return $encoding; |
166: | } |
167: | } |
168: | } elseif (!\function_exists('mb_detect_encoding')) { // @codeCoverageIgnore |
169: | // If a user has neither mb_detect_encoding, nor iconv, nor the |
170: | // polyfills, there's not much we can do about it... |
171: | // Pretend it's ASCII and pray the browser renders it properly. |
172: | return 'ASCII'; // @codeCoverageIgnore |
173: | } |
174: | |
175: | return false; |
176: | } |
177: | } |
178: |