1: <?php
2: namespace Geekwright\RegDom;
3:
4: 5: 6: 7: 8: 9: 10: 11: 12: 13:
14: class PublicSuffixList
15: {
16: protected $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat';
17: protected $localPSL = 'public_suffix_list.dat';
18: protected $cachedPrefix = 'cached_';
19:
20: protected $tree;
21: protected $url;
22: protected $dataDir = '/../data/';
23:
24: 25: 26: 27:
28: public function __construct($url = null)
29: {
30: $this->setURL($url);
31: }
32:
33: 34: 35: 36: 37: 38: 39:
40: public function setURL($url)
41: {
42: $this->url = $url;
43: $this->tree = null;
44: }
45:
46: 47: 48: 49: 50: 51:
52: protected function setFallbackURL()
53: {
54: $this->setLocalPSLName($this->url);
55: if (null === $this->url) {
56: $this->url = file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL;
57: }
58: }
59:
60: 61: 62: 63: 64: 65: 66:
67: protected function loadTree()
68: {
69: $this->setFallbackURL();
70:
71: $this->tree = $this->readCachedPSL($this->url);
72: if (false !== $this->tree) {
73: return;
74: }
75:
76: $this->tree = array();
77: $list = $this->readPSL();
78:
79: if (false===$list) {
80: $e = new \RuntimeException('Cannot read ' . $this->url);
81: throw $e;
82: }
83:
84: $this->parsePSL($list);
85: $this->cachePSL($this->url);
86: }
87:
88: 89: 90: 91: 92: 93: 94:
95: protected function parsePSL($fileData)
96: {
97: $lines = explode("\n", $fileData);
98:
99: foreach ($lines as $line) {
100: if ($this->startsWith($line, "//") || $line == '') {
101: continue;
102: }
103:
104:
105: $tldParts = explode('.', $line);
106:
107: $this->buildSubDomain($this->tree, $tldParts);
108: }
109: }
110:
111: 112: 113: 114: 115: 116: 117: 118:
119: protected function startsWith($search, $startString)
120: {
121: return (substr($search, 0, strlen($startString)) == $startString);
122: }
123:
124: 125: 126: 127: 128: 129: 130: 131:
132: protected function buildSubDomain(&$node, $tldParts)
133: {
134: $dom = trim(array_pop($tldParts));
135:
136: $isNotDomain = false;
137: if ($this->startsWith($dom, "!")) {
138: $dom = substr($dom, 1);
139: $isNotDomain = true;
140: }
141:
142: if (!array_key_exists($dom, $node)) {
143: if ($isNotDomain) {
144: $node[$dom] = array("!" => "");
145: } else {
146: $node[$dom] = array();
147: }
148: }
149:
150: if (!$isNotDomain && count($tldParts) > 0) {
151: $this->buildSubDomain($node[$dom], $tldParts);
152: }
153: }
154:
155: 156: 157: 158: 159:
160: public function getTree()
161: {
162: if (null===$this->tree) {
163: $this->loadTree();
164: }
165: return $this->tree;
166: }
167:
168: 169: 170: 171: 172: 173:
174: protected function readPSL()
175: {
176: $parts = parse_url($this->url);
177: $remote = isset($parts['scheme']) || isset($parts['host']);
178:
179: $newPSL = file_get_contents(($remote ? '' : __DIR__) . $this->url);
180: if (false !== $newPSL) {
181: if ($remote) {
182: $this->saveLocalPSL($newPSL);
183: }
184: return $newPSL;
185: }
186:
187:
188: if (function_exists('curl_init') && false !== ($curlHandle = curl_init())) {
189: curl_setopt($curlHandle, CURLOPT_URL, $this->url);
190: curl_setopt($curlHandle, CURLOPT_FAILONERROR, true);
191: curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1);
192: curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, 5);
193: $curlReturn = curl_exec($curlHandle);
194: curl_close($curlHandle);
195: if (false !== $curlReturn) {
196: if ($remote) {
197: $this->saveLocalPSL($curlReturn);
198: }
199: return $curlReturn;
200: }
201: }
202: return false;
203: }
204:
205: 206: 207: 208: 209: 210: 211:
212: protected function getCacheFileName($url)
213: {
214: return __DIR__ . $this->dataDir . $this->cachedPrefix . md5($url);
215: }
216:
217: 218: 219: 220: 221: 222: 223:
224: protected function readCachedPSL($url)
225: {
226: $cacheFile = $this->getCacheFileName($url);
227: if (file_exists($cacheFile)) {
228: $cachedTree = file_get_contents($cacheFile);
229: return unserialize($cachedTree);
230: }
231: return false;
232: }
233:
234: 235: 236: 237: 238: 239: 240:
241: protected function cachePSL($url)
242: {
243: return file_put_contents($this->getCacheFileName($url), serialize($this->tree));
244: }
245:
246: 247: 248: 249: 250: 251: 252:
253: protected function saveLocalPSL($fileContents)
254: {
255: return file_put_contents(__DIR__ . $this->localPSL, $fileContents);
256: }
257:
258: 259: 260: 261: 262: 263: 264:
265: protected function setLocalPSLName($url)
266: {
267: if (null === $url) {
268: $url = $this->sourceURL;
269: }
270: $parts = parse_url($url);
271: $fileName = basename($parts['path']);
272: $this->localPSL = $this->dataDir . $fileName;
273: }
274:
275: 276: 277: 278: 279: 280: 281:
282: public function clearDataDirectory($cacheOnly = false)
283: {
284: $dir = __DIR__ . $this->dataDir;
285: if (is_dir($dir)) {
286: if ($dirHandle = opendir($dir)) {
287: while (($file = readdir($dirHandle)) !== false) {
288: if (filetype($dir . $file) === 'file'
289: && (false === $cacheOnly || $this->startsWith($file, $this->cachedPrefix)))
290: {
291: unlink($dir . $file);
292: }
293: }
294: closedir($dirHandle);
295: }
296: }
297: }
298: }
299: