| 1: | <?php
|
| 2: | namespace Geekwright\RegDom;
|
| 3: |
|
| 4: | |
| 5: | |
| 6: | |
| 7: | |
| 8: | |
| 9: | |
| 10: | |
| 11: | |
| 12: | |
| 13: |
|
| 14: | class PublicSuffixList
|
| 15: | {
|
| 16: | protected $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat';
|
| 17: | protected $localPSL = 'public_suffix_list.dat';
|
| 18: | protected $cachedPrefix = 'cached_';
|
| 19: |
|
| 20: | protected $tree;
|
| 21: | protected $url;
|
| 22: | protected $dataDir = '/../data/';
|
| 23: |
|
| 24: | |
| 25: | |
| 26: | |
| 27: |
|
| 28: | public function __construct($url = null)
|
| 29: | {
|
| 30: | $this->setURL($url);
|
| 31: | }
|
| 32: |
|
| 33: | |
| 34: | |
| 35: | |
| 36: | |
| 37: | |
| 38: | |
| 39: |
|
| 40: | public function setURL($url)
|
| 41: | {
|
| 42: | $this->url = $url;
|
| 43: | $this->tree = null;
|
| 44: | }
|
| 45: |
|
| 46: | |
| 47: | |
| 48: | |
| 49: | |
| 50: | |
| 51: |
|
| 52: | protected function setFallbackURL()
|
| 53: | {
|
| 54: | $this->setLocalPSLName($this->url);
|
| 55: | if (null === $this->url) {
|
| 56: | $this->url = file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL;
|
| 57: | }
|
| 58: | }
|
| 59: |
|
| 60: | |
| 61: | |
| 62: | |
| 63: | |
| 64: | |
| 65: | |
| 66: |
|
| 67: | protected function loadTree()
|
| 68: | {
|
| 69: | $this->setFallbackURL();
|
| 70: |
|
| 71: | $this->tree = $this->readCachedPSL($this->url);
|
| 72: | if (false !== $this->tree) {
|
| 73: | return;
|
| 74: | }
|
| 75: |
|
| 76: | $this->tree = array();
|
| 77: | $list = $this->readPSL();
|
| 78: |
|
| 79: | if (false===$list) {
|
| 80: | throw new \RuntimeException('Cannot read ' . $this->url);
|
| 81: | }
|
| 82: |
|
| 83: | $this->parsePSL($list);
|
| 84: | $this->cachePSL($this->url);
|
| 85: | }
|
| 86: |
|
| 87: | |
| 88: | |
| 89: | |
| 90: | |
| 91: | |
| 92: | |
| 93: |
|
| 94: | protected function parsePSL($fileData)
|
| 95: | {
|
| 96: | $lines = explode("\n", $fileData);
|
| 97: |
|
| 98: | foreach ($lines as $line) {
|
| 99: | if ($this->startsWith($line, "//") || $line == '') {
|
| 100: | continue;
|
| 101: | }
|
| 102: |
|
| 103: |
|
| 104: | $tldParts = explode('.', $line);
|
| 105: |
|
| 106: | $this->buildSubDomain($this->tree, $tldParts);
|
| 107: | }
|
| 108: | }
|
| 109: |
|
| 110: | |
| 111: | |
| 112: | |
| 113: | |
| 114: | |
| 115: | |
| 116: | |
| 117: |
|
| 118: | protected function startsWith($search, $startString)
|
| 119: | {
|
| 120: | return (0 === strpos($search, $startString));
|
| 121: | }
|
| 122: |
|
| 123: | |
| 124: | |
| 125: | |
| 126: | |
| 127: | |
| 128: | |
| 129: | |
| 130: |
|
| 131: | protected function buildSubDomain(&$node, $tldParts)
|
| 132: | {
|
| 133: | $dom = trim(array_pop($tldParts));
|
| 134: |
|
| 135: | $isNotDomain = false;
|
| 136: | if ($this->startsWith($dom, "!")) {
|
| 137: | $dom = substr($dom, 1);
|
| 138: | $isNotDomain = true;
|
| 139: | }
|
| 140: |
|
| 141: | if (!array_key_exists($dom, $node)) {
|
| 142: | if ($isNotDomain) {
|
| 143: | $node[$dom] = array("!" => "");
|
| 144: | } else {
|
| 145: | $node[$dom] = array();
|
| 146: | }
|
| 147: | }
|
| 148: |
|
| 149: | if (!$isNotDomain && count($tldParts) > 0) {
|
| 150: | $this->buildSubDomain($node[$dom], $tldParts);
|
| 151: | }
|
| 152: | }
|
| 153: |
|
| 154: | |
| 155: | |
| 156: | |
| 157: | |
| 158: | |
| 159: |
|
| 160: | public function getTree()
|
| 161: | {
|
| 162: | if (null===$this->tree) {
|
| 163: | $this->loadTree();
|
| 164: | }
|
| 165: | return $this->tree;
|
| 166: | }
|
| 167: |
|
| 168: | |
| 169: | |
| 170: | |
| 171: | |
| 172: | |
| 173: |
|
| 174: | protected function readPSL()
|
| 175: | {
|
| 176: | $parts = parse_url($this->url);
|
| 177: | $remote = isset($parts['scheme']) || isset($parts['host']);
|
| 178: |
|
| 179: | $newPSL = file_get_contents(($remote ? '' : __DIR__) . $this->url);
|
| 180: | if (false !== $newPSL) {
|
| 181: | if ($remote) {
|
| 182: | $this->saveLocalPSL($newPSL);
|
| 183: | }
|
| 184: | return $newPSL;
|
| 185: | }
|
| 186: |
|
| 187: |
|
| 188: | if (function_exists('curl_init') && false !== ($curlHandle = curl_init())) {
|
| 189: | curl_setopt($curlHandle, CURLOPT_URL, $this->url);
|
| 190: | curl_setopt($curlHandle, CURLOPT_FAILONERROR, true);
|
| 191: | curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1);
|
| 192: | curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, 5);
|
| 193: | $curlReturn = curl_exec($curlHandle);
|
| 194: | curl_close($curlHandle);
|
| 195: | if (false !== $curlReturn) {
|
| 196: | if ($remote) {
|
| 197: | $this->saveLocalPSL($curlReturn);
|
| 198: | }
|
| 199: | return $curlReturn;
|
| 200: | }
|
| 201: | }
|
| 202: | return false;
|
| 203: | }
|
| 204: |
|
| 205: | |
| 206: | |
| 207: | |
| 208: | |
| 209: | |
| 210: | |
| 211: |
|
| 212: | protected function getCacheFileName($url)
|
| 213: | {
|
| 214: | return __DIR__ . $this->dataDir . $this->cachedPrefix . md5($url);
|
| 215: | }
|
| 216: |
|
| 217: | |
| 218: | |
| 219: | |
| 220: | |
| 221: | |
| 222: | |
| 223: |
|
| 224: | protected function readCachedPSL($url)
|
| 225: | {
|
| 226: | $cacheFile = $this->getCacheFileName($url);
|
| 227: | if (file_exists($cacheFile)) {
|
| 228: | $cachedTree = file_get_contents($cacheFile);
|
| 229: | if((int) PHP_VERSION_ID < 70000) {
|
| 230: | return unserialize($cachedTree);
|
| 231: | }
|
| 232: | return unserialize($cachedTree, array('allowed_classes' => false));
|
| 233: | }
|
| 234: | return false;
|
| 235: | }
|
| 236: |
|
| 237: | |
| 238: | |
| 239: | |
| 240: | |
| 241: | |
| 242: | |
| 243: |
|
| 244: | protected function cachePSL($url)
|
| 245: | {
|
| 246: | return file_put_contents($this->getCacheFileName($url), serialize($this->tree));
|
| 247: | }
|
| 248: |
|
| 249: | |
| 250: | |
| 251: | |
| 252: | |
| 253: | |
| 254: | |
| 255: |
|
| 256: | protected function saveLocalPSL($fileContents)
|
| 257: | {
|
| 258: | return file_put_contents(__DIR__ . $this->localPSL, $fileContents);
|
| 259: | }
|
| 260: |
|
| 261: | |
| 262: | |
| 263: | |
| 264: | |
| 265: | |
| 266: | |
| 267: |
|
| 268: | protected function setLocalPSLName($url)
|
| 269: | {
|
| 270: | if (null === $url) {
|
| 271: | $url = $this->sourceURL;
|
| 272: | }
|
| 273: | $parts = parse_url($url);
|
| 274: | $fileName = basename($parts['path']);
|
| 275: | $this->localPSL = $this->dataDir . $fileName;
|
| 276: | }
|
| 277: |
|
| 278: | |
| 279: | |
| 280: | |
| 281: | |
| 282: | |
| 283: | |
| 284: |
|
| 285: | public function clearDataDirectory($cacheOnly = false)
|
| 286: | {
|
| 287: | $dir = __DIR__ . $this->dataDir;
|
| 288: | if (is_dir($dir)) {
|
| 289: | if ($dirHandle = opendir($dir)) {
|
| 290: | while (($file = readdir($dirHandle)) !== false) {
|
| 291: | if (filetype($dir . $file) === 'file'
|
| 292: | && (false === $cacheOnly || $this->startsWith($file, $this->cachedPrefix))) {
|
| 293: | unlink($dir . $file);
|
| 294: | }
|
| 295: | }
|
| 296: | closedir($dirHandle);
|
| 297: | }
|
| 298: | }
|
| 299: | }
|
| 300: | }
|
| 301: | |