1: | <?php
|
2: | namespace Geekwright\RegDom;
|
3: |
|
4: | |
5: | |
6: | |
7: | |
8: | |
9: | |
10: | |
11: | |
12: | |
13: |
|
14: | class PublicSuffixList
|
15: | {
|
16: | protected $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat';
|
17: | protected $localPSL = 'public_suffix_list.dat';
|
18: | protected $cachedPrefix = 'cached_';
|
19: |
|
20: | protected $tree;
|
21: | protected $url;
|
22: | protected $dataDir = '/../data/';
|
23: |
|
24: | |
25: | |
26: | |
27: |
|
28: | public function __construct($url = null)
|
29: | {
|
30: | $this->setURL($url);
|
31: | }
|
32: |
|
33: | |
34: | |
35: | |
36: | |
37: | |
38: | |
39: |
|
40: | public function setURL($url)
|
41: | {
|
42: | $this->url = $url;
|
43: | $this->tree = null;
|
44: | }
|
45: |
|
46: | |
47: | |
48: | |
49: | |
50: | |
51: |
|
52: | protected function setFallbackURL()
|
53: | {
|
54: | $this->setLocalPSLName($this->url);
|
55: | if (null === $this->url) {
|
56: | $this->url = file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL;
|
57: | }
|
58: | }
|
59: |
|
60: | |
61: | |
62: | |
63: | |
64: | |
65: | |
66: |
|
67: | protected function loadTree()
|
68: | {
|
69: | $this->setFallbackURL();
|
70: |
|
71: | $this->tree = $this->readCachedPSL($this->url);
|
72: | if (false !== $this->tree) {
|
73: | return;
|
74: | }
|
75: |
|
76: | $this->tree = array();
|
77: | $list = $this->readPSL();
|
78: |
|
79: | if (false===$list) {
|
80: | throw new \RuntimeException('Cannot read ' . $this->url);
|
81: | }
|
82: |
|
83: | $this->parsePSL($list);
|
84: | $this->cachePSL($this->url);
|
85: | }
|
86: |
|
87: | |
88: | |
89: | |
90: | |
91: | |
92: | |
93: |
|
94: | protected function parsePSL($fileData)
|
95: | {
|
96: | $lines = explode("\n", $fileData);
|
97: |
|
98: | foreach ($lines as $line) {
|
99: | if ($this->startsWith($line, "//") || $line == '') {
|
100: | continue;
|
101: | }
|
102: |
|
103: |
|
104: | $tldParts = explode('.', $line);
|
105: |
|
106: | $this->buildSubDomain($this->tree, $tldParts);
|
107: | }
|
108: | }
|
109: |
|
110: | |
111: | |
112: | |
113: | |
114: | |
115: | |
116: | |
117: |
|
118: | protected function startsWith($search, $startString)
|
119: | {
|
120: | return (0 === strpos($search, $startString));
|
121: | }
|
122: |
|
123: | |
124: | |
125: | |
126: | |
127: | |
128: | |
129: | |
130: |
|
131: | protected function buildSubDomain(&$node, $tldParts)
|
132: | {
|
133: | $dom = trim(array_pop($tldParts));
|
134: |
|
135: | $isNotDomain = false;
|
136: | if ($this->startsWith($dom, "!")) {
|
137: | $dom = substr($dom, 1);
|
138: | $isNotDomain = true;
|
139: | }
|
140: |
|
141: | if (!array_key_exists($dom, $node)) {
|
142: | if ($isNotDomain) {
|
143: | $node[$dom] = array("!" => "");
|
144: | } else {
|
145: | $node[$dom] = array();
|
146: | }
|
147: | }
|
148: |
|
149: | if (!$isNotDomain && count($tldParts) > 0) {
|
150: | $this->buildSubDomain($node[$dom], $tldParts);
|
151: | }
|
152: | }
|
153: |
|
154: | |
155: | |
156: | |
157: | |
158: | |
159: |
|
160: | public function getTree()
|
161: | {
|
162: | if (null===$this->tree) {
|
163: | $this->loadTree();
|
164: | }
|
165: | return $this->tree;
|
166: | }
|
167: |
|
168: | |
169: | |
170: | |
171: | |
172: | |
173: |
|
174: | protected function readPSL()
|
175: | {
|
176: | $parts = parse_url($this->url);
|
177: | $remote = isset($parts['scheme']) || isset($parts['host']);
|
178: |
|
179: | $newPSL = file_get_contents(($remote ? '' : __DIR__) . $this->url);
|
180: | if (false !== $newPSL) {
|
181: | if ($remote) {
|
182: | $this->saveLocalPSL($newPSL);
|
183: | }
|
184: | return $newPSL;
|
185: | }
|
186: |
|
187: |
|
188: | if (function_exists('curl_init') && false !== ($curlHandle = curl_init())) {
|
189: | curl_setopt($curlHandle, CURLOPT_URL, $this->url);
|
190: | curl_setopt($curlHandle, CURLOPT_FAILONERROR, true);
|
191: | curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1);
|
192: | curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, 5);
|
193: | $curlReturn = curl_exec($curlHandle);
|
194: | curl_close($curlHandle);
|
195: | if (false !== $curlReturn) {
|
196: | if ($remote) {
|
197: | $this->saveLocalPSL($curlReturn);
|
198: | }
|
199: | return $curlReturn;
|
200: | }
|
201: | }
|
202: | return false;
|
203: | }
|
204: |
|
205: | |
206: | |
207: | |
208: | |
209: | |
210: | |
211: |
|
212: | protected function getCacheFileName($url)
|
213: | {
|
214: | return __DIR__ . $this->dataDir . $this->cachedPrefix . md5($url);
|
215: | }
|
216: |
|
217: | |
218: | |
219: | |
220: | |
221: | |
222: | |
223: |
|
224: | protected function readCachedPSL($url)
|
225: | {
|
226: | $cacheFile = $this->getCacheFileName($url);
|
227: | if (file_exists($cacheFile)) {
|
228: | $cachedTree = file_get_contents($cacheFile);
|
229: | if((int) PHP_VERSION_ID < 70000) {
|
230: | return unserialize($cachedTree);
|
231: | }
|
232: | return unserialize($cachedTree, array('allowed_classes' => false));
|
233: | }
|
234: | return false;
|
235: | }
|
236: |
|
237: | |
238: | |
239: | |
240: | |
241: | |
242: | |
243: |
|
244: | protected function cachePSL($url)
|
245: | {
|
246: | return file_put_contents($this->getCacheFileName($url), serialize($this->tree));
|
247: | }
|
248: |
|
249: | |
250: | |
251: | |
252: | |
253: | |
254: | |
255: |
|
256: | protected function saveLocalPSL($fileContents)
|
257: | {
|
258: | return file_put_contents(__DIR__ . $this->localPSL, $fileContents);
|
259: | }
|
260: |
|
261: | |
262: | |
263: | |
264: | |
265: | |
266: | |
267: |
|
268: | protected function setLocalPSLName($url)
|
269: | {
|
270: | if (null === $url) {
|
271: | $url = $this->sourceURL;
|
272: | }
|
273: | $parts = parse_url($url);
|
274: | $fileName = basename($parts['path']);
|
275: | $this->localPSL = $this->dataDir . $fileName;
|
276: | }
|
277: |
|
278: | |
279: | |
280: | |
281: | |
282: | |
283: | |
284: |
|
285: | public function clearDataDirectory($cacheOnly = false)
|
286: | {
|
287: | $dir = __DIR__ . $this->dataDir;
|
288: | if (is_dir($dir)) {
|
289: | if ($dirHandle = opendir($dir)) {
|
290: | while (($file = readdir($dirHandle)) !== false) {
|
291: | if (filetype($dir . $file) === 'file'
|
292: | && (false === $cacheOnly || $this->startsWith($file, $this->cachedPrefix))) {
|
293: | unlink($dir . $file);
|
294: | }
|
295: | }
|
296: | closedir($dirHandle);
|
297: | }
|
298: | }
|
299: | }
|
300: | }
|
301: | |