| 1: | <?php |
| 2: | |
| 3: | /* |
| 4: | * The MIT License (MIT) |
| 5: | * |
| 6: | * Copyright (c) 2013 Jonathan Vollebregt (jnvsor@gmail.com), Rokas Šleinius (raveren@gmail.com) |
| 7: | * |
| 8: | * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| 9: | * this software and associated documentation files (the "Software"), to deal in |
| 10: | * the Software without restriction, including without limitation the rights to |
| 11: | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
| 12: | * the Software, and to permit persons to whom the Software is furnished to do so, |
| 13: | * subject to the following conditions: |
| 14: | * |
| 15: | * The above copyright notice and this permission notice shall be included in all |
| 16: | * copies or substantial portions of the Software. |
| 17: | * |
| 18: | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19: | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
| 20: | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
| 21: | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
| 22: | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 23: | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 24: | */ |
| 25: | |
| 26: | namespace Kint\Object; |
| 27: | |
| 28: | class BlobObject extends BasicObject |
| 29: | { |
| 30: | /** |
| 31: | * @var array Character encodings to detect |
| 32: | * |
| 33: | * @see https://secure.php.net/function.mb-detect-order |
| 34: | * |
| 35: | * In practice, mb_detect_encoding can only successfully determine the |
| 36: | * difference between the following common charsets at once without |
| 37: | * breaking things for one of the other charsets: |
| 38: | * - ASCII |
| 39: | * - UTF-8 |
| 40: | * - SJIS |
| 41: | * - EUC-JP |
| 42: | * |
| 43: | * The order of the charsets is significant. If you put UTF-8 before ASCII |
| 44: | * it will never match ASCII, because UTF-8 is a superset of ASCII. |
| 45: | * Similarly, SJIS and EUC-JP frequently match UTF-8 strings, so you should |
| 46: | * check UTF-8 first. SJIS and EUC-JP seem to work either way, but SJIS is |
| 47: | * more common so it should probably be first. |
| 48: | * |
| 49: | * While you're free to experiment with other charsets, remember to keep |
| 50: | * this behavior in mind when setting up your char_encodings array. |
| 51: | * |
| 52: | * This depends on the mbstring extension |
| 53: | */ |
| 54: | public static $char_encodings = array( |
| 55: | 'ASCII', |
| 56: | 'UTF-8', |
| 57: | ); |
| 58: | |
| 59: | /** |
| 60: | * @var array Legacy character encodings to detect |
| 61: | * |
| 62: | * @see https://secure.php.net/function.iconv |
| 63: | * |
| 64: | * Assuming the other encoding checks fail, this will perform a |
| 65: | * simple iconv conversion to check for invalid bytes. If any are |
| 66: | * found it will not match. |
| 67: | * |
| 68: | * This can be useful for ambiguous single byte encodings like |
| 69: | * windows-125x and iso-8859-x which have practically undetectable |
| 70: | * differences because they use every single byte available. |
| 71: | * |
| 72: | * This is *NOT* reliable and should not be trusted implicitly. As |
| 73: | * with char_encodings, the order of the charsets is significant. |
| 74: | * |
| 75: | * This depends on the iconv extension |
| 76: | */ |
| 77: | public static $legacy_encodings = array(); |
| 78: | |
| 79: | public $type = 'string'; |
| 80: | public $encoding = false; |
| 81: | public $hints = array('string'); |
| 82: | |
| 83: | public function getType() |
| 84: | { |
| 85: | if (false === $this->encoding) { |
| 86: | return 'binary '.$this->type; |
| 87: | } |
| 88: | |
| 89: | if ('ASCII' === $this->encoding) { |
| 90: | return $this->type; |
| 91: | } |
| 92: | |
| 93: | return $this->encoding.' '.$this->type; |
| 94: | } |
| 95: | |
| 96: | public function getValueShort() |
| 97: | { |
| 98: | if ($rep = $this->value) { |
| 99: | return '"'.$rep->contents.'"'; |
| 100: | } |
| 101: | } |
| 102: | |
| 103: | public function transplant(BasicObject $old) |
| 104: | { |
| 105: | parent::transplant($old); |
| 106: | |
| 107: | if ($old instanceof self) { |
| 108: | $this->encoding = $old->encoding; |
| 109: | } |
| 110: | } |
| 111: | |
| 112: | public static function strlen($string, $encoding = false) |
| 113: | { |
| 114: | if (\function_exists('mb_strlen')) { |
| 115: | if (false === $encoding) { |
| 116: | $encoding = self::detectEncoding($string); |
| 117: | } |
| 118: | |
| 119: | if ($encoding && 'ASCII' !== $encoding) { |
| 120: | return \mb_strlen($string, $encoding); |
| 121: | } |
| 122: | } |
| 123: | |
| 124: | return \strlen($string); |
| 125: | } |
| 126: | |
| 127: | public static function substr($string, $start, $length = null, $encoding = false) |
| 128: | { |
| 129: | if (\function_exists('mb_substr')) { |
| 130: | if (false === $encoding) { |
| 131: | $encoding = self::detectEncoding($string); |
| 132: | } |
| 133: | |
| 134: | if ($encoding && 'ASCII' !== $encoding) { |
| 135: | return \mb_substr($string, $start, $length, $encoding); |
| 136: | } |
| 137: | } |
| 138: | |
| 139: | // Special case for substr/mb_substr discrepancy |
| 140: | if ('' === $string) { |
| 141: | return ''; |
| 142: | } |
| 143: | |
| 144: | return \substr($string, $start, isset($length) ? $length : PHP_INT_MAX); |
| 145: | } |
| 146: | |
| 147: | public static function detectEncoding($string) |
| 148: | { |
| 149: | if (\function_exists('mb_detect_encoding')) { |
| 150: | if ($ret = \mb_detect_encoding($string, self::$char_encodings, true)) { |
| 151: | return $ret; |
| 152: | } |
| 153: | } |
| 154: | |
| 155: | // Pretty much every character encoding uses first 32 bytes as control |
| 156: | // characters. If it's not a multi-byte format it's safe to say matching |
| 157: | // any control character besides tab, nl, and cr means it's binary. |
| 158: | if (\preg_match('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/', $string)) { |
| 159: | return false; |
| 160: | } |
| 161: | |
| 162: | if (\function_exists('iconv')) { |
| 163: | foreach (self::$legacy_encodings as $encoding) { |
| 164: | if (@\iconv($encoding, $encoding, $string) === $string) { |
| 165: | return $encoding; |
| 166: | } |
| 167: | } |
| 168: | } elseif (!\function_exists('mb_detect_encoding')) { // @codeCoverageIgnore |
| 169: | // If a user has neither mb_detect_encoding, nor iconv, nor the |
| 170: | // polyfills, there's not much we can do about it... |
| 171: | // Pretend it's ASCII and pray the browser renders it properly. |
| 172: | return 'ASCII'; // @codeCoverageIgnore |
| 173: | } |
| 174: | |
| 175: | return false; |
| 176: | } |
| 177: | } |
| 178: |