| 1: | <?php | 
| 2: | |
| 3: | /* | 
| 4: | * The MIT License (MIT) | 
| 5: | * | 
| 6: | * Copyright (c) 2013 Jonathan Vollebregt (jnvsor@gmail.com), Rokas Šleinius (raveren@gmail.com) | 
| 7: | * | 
| 8: | * Permission is hereby granted, free of charge, to any person obtaining a copy of | 
| 9: | * this software and associated documentation files (the "Software"), to deal in | 
| 10: | * the Software without restriction, including without limitation the rights to | 
| 11: | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | 
| 12: | * the Software, and to permit persons to whom the Software is furnished to do so, | 
| 13: | * subject to the following conditions: | 
| 14: | * | 
| 15: | * The above copyright notice and this permission notice shall be included in all | 
| 16: | * copies or substantial portions of the Software. | 
| 17: | * | 
| 18: | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
| 19: | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | 
| 20: | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | 
| 21: | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | 
| 22: | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
| 23: | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | 
| 24: | */ | 
| 25: | |
| 26: | namespace Kint\Object; | 
| 27: | |
| 28: | class BlobObject extends BasicObject | 
| 29: | { | 
| 30: | /** | 
| 31: | * @var array Character encodings to detect | 
| 32: | * | 
| 33: | * @see https://secure.php.net/function.mb-detect-order | 
| 34: | * | 
| 35: | * In practice, mb_detect_encoding can only successfully determine the | 
| 36: | * difference between the following common charsets at once without | 
| 37: | * breaking things for one of the other charsets: | 
| 38: | * - ASCII | 
| 39: | * - UTF-8 | 
| 40: | * - SJIS | 
| 41: | * - EUC-JP | 
| 42: | * | 
| 43: | * The order of the charsets is significant. If you put UTF-8 before ASCII | 
| 44: | * it will never match ASCII, because UTF-8 is a superset of ASCII. | 
| 45: | * Similarly, SJIS and EUC-JP frequently match UTF-8 strings, so you should | 
| 46: | * check UTF-8 first. SJIS and EUC-JP seem to work either way, but SJIS is | 
| 47: | * more common so it should probably be first. | 
| 48: | * | 
| 49: | * While you're free to experiment with other charsets, remember to keep | 
| 50: | * this behavior in mind when setting up your char_encodings array. | 
| 51: | * | 
| 52: | * This depends on the mbstring extension | 
| 53: | */ | 
| 54: | public static $char_encodings = array( | 
| 55: | 'ASCII', | 
| 56: | 'UTF-8', | 
| 57: | ); | 
| 58: | |
| 59: | /** | 
| 60: | * @var array Legacy character encodings to detect | 
| 61: | * | 
| 62: | * @see https://secure.php.net/function.iconv | 
| 63: | * | 
| 64: | * Assuming the other encoding checks fail, this will perform a | 
| 65: | * simple iconv conversion to check for invalid bytes. If any are | 
| 66: | * found it will not match. | 
| 67: | * | 
| 68: | * This can be useful for ambiguous single byte encodings like | 
| 69: | * windows-125x and iso-8859-x which have practically undetectable | 
| 70: | * differences because they use every single byte available. | 
| 71: | * | 
| 72: | * This is *NOT* reliable and should not be trusted implicitly. As | 
| 73: | * with char_encodings, the order of the charsets is significant. | 
| 74: | * | 
| 75: | * This depends on the iconv extension | 
| 76: | */ | 
| 77: | public static $legacy_encodings = array(); | 
| 78: | |
| 79: | public $type = 'string'; | 
| 80: | public $encoding = false; | 
| 81: | public $hints = array('string'); | 
| 82: | |
| 83: | public function getType() | 
| 84: | { | 
| 85: | if (false === $this->encoding) { | 
| 86: | return 'binary '.$this->type; | 
| 87: | } | 
| 88: | |
| 89: | if ('ASCII' === $this->encoding) { | 
| 90: | return $this->type; | 
| 91: | } | 
| 92: | |
| 93: | return $this->encoding.' '.$this->type; | 
| 94: | } | 
| 95: | |
| 96: | public function getValueShort() | 
| 97: | { | 
| 98: | if ($rep = $this->value) { | 
| 99: | return '"'.$rep->contents.'"'; | 
| 100: | } | 
| 101: | } | 
| 102: | |
| 103: | public function transplant(BasicObject $old) | 
| 104: | { | 
| 105: | parent::transplant($old); | 
| 106: | |
| 107: | if ($old instanceof self) { | 
| 108: | $this->encoding = $old->encoding; | 
| 109: | } | 
| 110: | } | 
| 111: | |
| 112: | public static function strlen($string, $encoding = false) | 
| 113: | { | 
| 114: | if (\function_exists('mb_strlen')) { | 
| 115: | if (false === $encoding) { | 
| 116: | $encoding = self::detectEncoding($string); | 
| 117: | } | 
| 118: | |
| 119: | if ($encoding && 'ASCII' !== $encoding) { | 
| 120: | return \mb_strlen($string, $encoding); | 
| 121: | } | 
| 122: | } | 
| 123: | |
| 124: | return \strlen($string); | 
| 125: | } | 
| 126: | |
| 127: | public static function substr($string, $start, $length = null, $encoding = false) | 
| 128: | { | 
| 129: | if (\function_exists('mb_substr')) { | 
| 130: | if (false === $encoding) { | 
| 131: | $encoding = self::detectEncoding($string); | 
| 132: | } | 
| 133: | |
| 134: | if ($encoding && 'ASCII' !== $encoding) { | 
| 135: | return \mb_substr($string, $start, $length, $encoding); | 
| 136: | } | 
| 137: | } | 
| 138: | |
| 139: | // Special case for substr/mb_substr discrepancy | 
| 140: | if ('' === $string) { | 
| 141: | return ''; | 
| 142: | } | 
| 143: | |
| 144: | return \substr($string, $start, isset($length) ? $length : PHP_INT_MAX); | 
| 145: | } | 
| 146: | |
| 147: | public static function detectEncoding($string) | 
| 148: | { | 
| 149: | if (\function_exists('mb_detect_encoding')) { | 
| 150: | if ($ret = \mb_detect_encoding($string, self::$char_encodings, true)) { | 
| 151: | return $ret; | 
| 152: | } | 
| 153: | } | 
| 154: | |
| 155: | // Pretty much every character encoding uses first 32 bytes as control | 
| 156: | // characters. If it's not a multi-byte format it's safe to say matching | 
| 157: | // any control character besides tab, nl, and cr means it's binary. | 
| 158: | if (\preg_match('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/', $string)) { | 
| 159: | return false; | 
| 160: | } | 
| 161: | |
| 162: | if (\function_exists('iconv')) { | 
| 163: | foreach (self::$legacy_encodings as $encoding) { | 
| 164: | if (@\iconv($encoding, $encoding, $string) === $string) { | 
| 165: | return $encoding; | 
| 166: | } | 
| 167: | } | 
| 168: | } elseif (!\function_exists('mb_detect_encoding')) { // @codeCoverageIgnore | 
| 169: | // If a user has neither mb_detect_encoding, nor iconv, nor the | 
| 170: | // polyfills, there's not much we can do about it... | 
| 171: | // Pretend it's ASCII and pray the browser renders it properly. | 
| 172: | return 'ASCII'; // @codeCoverageIgnore | 
| 173: | } | 
| 174: | |
| 175: | return false; | 
| 176: | } | 
| 177: | } | 
| 178: |