1: <?php
2:
3: /*
4: * The MIT License (MIT)
5: *
6: * Copyright (c) 2013 Jonathan Vollebregt (jnvsor@gmail.com), Rokas Šleinius (raveren@gmail.com)
7: *
8: * Permission is hereby granted, free of charge, to any person obtaining a copy of
9: * this software and associated documentation files (the "Software"), to deal in
10: * the Software without restriction, including without limitation the rights to
11: * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
12: * the Software, and to permit persons to whom the Software is furnished to do so,
13: * subject to the following conditions:
14: *
15: * The above copyright notice and this permission notice shall be included in all
16: * copies or substantial portions of the Software.
17: *
18: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
20: * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
21: * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
22: * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23: * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24: */
25:
26: namespace Kint\Object;
27:
28: class BlobObject extends BasicObject
29: {
30: /**
31: * @var array Character encodings to detect
32: *
33: * @see https://secure.php.net/function.mb-detect-order
34: *
35: * In practice, mb_detect_encoding can only successfully determine the
36: * difference between the following common charsets at once without
37: * breaking things for one of the other charsets:
38: * - ASCII
39: * - UTF-8
40: * - SJIS
41: * - EUC-JP
42: *
43: * The order of the charsets is significant. If you put UTF-8 before ASCII
44: * it will never match ASCII, because UTF-8 is a superset of ASCII.
45: * Similarly, SJIS and EUC-JP frequently match UTF-8 strings, so you should
46: * check UTF-8 first. SJIS and EUC-JP seem to work either way, but SJIS is
47: * more common so it should probably be first.
48: *
49: * While you're free to experiment with other charsets, remember to keep
50: * this behavior in mind when setting up your char_encodings array.
51: *
52: * This depends on the mbstring extension
53: */
54: public static $char_encodings = array(
55: 'ASCII',
56: 'UTF-8',
57: );
58:
59: /**
60: * @var array Legacy character encodings to detect
61: *
62: * @see https://secure.php.net/function.iconv
63: *
64: * Assuming the other encoding checks fail, this will perform a
65: * simple iconv conversion to check for invalid bytes. If any are
66: * found it will not match.
67: *
68: * This can be useful for ambiguous single byte encodings like
69: * windows-125x and iso-8859-x which have practically undetectable
70: * differences because they use every single byte available.
71: *
72: * This is *NOT* reliable and should not be trusted implicitly. As
73: * with char_encodings, the order of the charsets is significant.
74: *
75: * This depends on the iconv extension
76: */
77: public static $legacy_encodings = array();
78:
79: public $type = 'string';
80: public $encoding = false;
81: public $hints = array('string');
82:
83: public function getType()
84: {
85: if (false === $this->encoding) {
86: return 'binary '.$this->type;
87: }
88:
89: if ('ASCII' === $this->encoding) {
90: return $this->type;
91: }
92:
93: return $this->encoding.' '.$this->type;
94: }
95:
96: public function getValueShort()
97: {
98: if ($rep = $this->value) {
99: return '"'.$rep->contents.'"';
100: }
101: }
102:
103: public function transplant(BasicObject $old)
104: {
105: parent::transplant($old);
106:
107: if ($old instanceof self) {
108: $this->encoding = $old->encoding;
109: }
110: }
111:
112: public static function strlen($string, $encoding = false)
113: {
114: if (\function_exists('mb_strlen')) {
115: if (false === $encoding) {
116: $encoding = self::detectEncoding($string);
117: }
118:
119: if ($encoding && 'ASCII' !== $encoding) {
120: return \mb_strlen($string, $encoding);
121: }
122: }
123:
124: return \strlen($string);
125: }
126:
127: public static function substr($string, $start, $length = null, $encoding = false)
128: {
129: if (\function_exists('mb_substr')) {
130: if (false === $encoding) {
131: $encoding = self::detectEncoding($string);
132: }
133:
134: if ($encoding && 'ASCII' !== $encoding) {
135: return \mb_substr($string, $start, $length, $encoding);
136: }
137: }
138:
139: // Special case for substr/mb_substr discrepancy
140: if ('' === $string) {
141: return '';
142: }
143:
144: return \substr($string, $start, isset($length) ? $length : PHP_INT_MAX);
145: }
146:
147: public static function detectEncoding($string)
148: {
149: if (\function_exists('mb_detect_encoding')) {
150: if ($ret = \mb_detect_encoding($string, self::$char_encodings, true)) {
151: return $ret;
152: }
153: }
154:
155: // Pretty much every character encoding uses first 32 bytes as control
156: // characters. If it's not a multi-byte format it's safe to say matching
157: // any control character besides tab, nl, and cr means it's binary.
158: if (\preg_match('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/', $string)) {
159: return false;
160: }
161:
162: if (\function_exists('iconv')) {
163: foreach (self::$legacy_encodings as $encoding) {
164: if (@\iconv($encoding, $encoding, $string) === $string) {
165: return $encoding;
166: }
167: }
168: } elseif (!\function_exists('mb_detect_encoding')) { // @codeCoverageIgnore
169: // If a user has neither mb_detect_encoding, nor iconv, nor the
170: // polyfills, there's not much we can do about it...
171: // Pretend it's ASCII and pray the browser renders it properly.
172: return 'ASCII'; // @codeCoverageIgnore
173: }
174:
175: return false;
176: }
177: }
178: