1: <?php
2: /* patch to keep Snoopy working in PHP8 for now */
3: if (!function_exists('each')) {
4: /**
5: * @param array $a
6: *
7: * @return array|false
8: */
9: function each(&$a)
10: {
11: $key = key($a);
12: $value = current($a);
13: next($a);
14: return is_null($key) ? false : array(1 => $value, 'value' => $value, 0 => $key, 'key' => $key);
15: }
16: }
17:
18: /*************************************************
19: *
20: * Snoopy - the PHP net client
21: * Author: Monte Ohrt <monte@ohrt.com>
22: * Copyright (c): 1999-2014, all rights reserved
23: * Version: 1.2.5
24: * This library is free software; you can redistribute it and/or
25: * modify it under the terms of the GNU Lesser General Public
26: * License as published by the Free Software Foundation; either
27: * version 2.1 of the License, or (at your option) any later version.
28: *
29: * This library is distributed in the hope that it will be useful,
30: * but WITHOUT ANY WARRANTY; without even the implied warranty of
31: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32: * Lesser General Public License for more details.
33: *
34: * You should have received a copy of the GNU Lesser General Public
35: * License along with this library; if not, write to the Free Software
36: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
37: *
38: * You may contact the author of Snoopy by e-mail at:
39: * monte@ohrt.com
40: *
41: * The latest version of Snoopy can be obtained from:
42: * http://snoopy.sourceforge.net/
43: *
44: * @deprecated please, find another way
45: *************************************************/
46: class Snoopy
47: {
48: public function __construct()
49: {
50: $GLOBALS['xoopsLogger']->addDeprecated("Use of Snoopy in XOOPS is deprecated and has been replaced in core with XoopsHttpGet. Snoopy will be removed in future versions..");
51: }
52:
53: /**** Public variables ****/
54:
55: /* user definable vars */
56:
57: var $host = "www.php.net"; // host name we are connecting to
58: var $port = 80; // port we are connecting to
59: var $proxy_host = ""; // proxy host to use
60: var $proxy_port = ""; // proxy port to use
61: var $proxy_user = ""; // proxy user to use
62: var $proxy_pass = ""; // proxy password to use
63:
64: var $agent = "Snoopy v1.2.5"; // agent we masquerade as
65: var $referer = ""; // referer info to pass
66: var $cookies = array(); // array of cookies to pass
67: // $cookies["username"]="joe";
68: var $rawheaders = array(); // array of raw headers to send
69: // $rawheaders["Content-type"]="text/html";
70:
71: var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
72: var $lastredirectaddr = ""; // contains address of last redirected address
73: var $offsiteok = true; // allows redirection off-site
74: var $maxframes = 0; // frame content depth maximum. 0 = disallow
75: var $expandlinks = true; // expand links to fully qualified URLs.
76: // this only applies to fetchlinks()
77: // submitlinks(), and submittext()
78: var $passcookies = true; // pass set cookies back through redirects
79: // NOTE: this currently does not respect
80: // dates, domains or paths.
81:
82: var $user = ""; // user for http authentication
83: var $pass = ""; // password for http authentication
84:
85: // http accept types
86: var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
87:
88: var $results = ""; // where the content is put
89:
90: var $error = ""; // error messages sent here
91: var $response_code = ""; // response code returned from server
92: var $headers = array(); // headers returned from server sent here
93: var $maxlength = 500000; // max return data length (body)
94: var $read_timeout = 0; // timeout on read operations, in seconds
95: // supported only since PHP 4 Beta 4
96: // set to 0 to disallow timeouts
97: var $timed_out = false; // if a read operation timed out
98: var $status = 0; // http request status
99:
100: var $temp_dir = "/tmp"; // temporary directory that the webserver
101: // has permission to write to.
102: // under Windows, this should be C:\temp
103:
104: var $curl_path = "/usr/bin/curl";
105: // Snoopy will use cURL for fetching
106: // SSL content if a full system path to
107: // the cURL binary is supplied here.
108: // set to false if you do not have
109: // cURL installed. See http://curl.haxx.se
110: // for details on installing cURL.
111: // Snoopy does *not* use the cURL
112: // library functions built into php,
113: // as these functions are not stable
114: // as of this Snoopy release.
115:
116: // send Accept-encoding: gzip?
117: var $use_gzip = true;
118: /**** Private variables ****/
119:
120: var $_maxlinelen = 4096; // max line length (headers)
121:
122: var $_httpmethod = "GET"; // default http request method
123: var $_httpversion = "HTTP/1.0"; // default http request version
124: var $_submit_method = "POST"; // default submit method
125: var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
126: var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
127: var $_redirectaddr = false; // will be set if page fetched is a redirect
128: var $_redirectdepth = 0; // increments on an http redirect
129: var $_frameurls = array(); // frame src urls
130: var $_framedepth = 0; // increments on frame depth
131:
132: var $_isproxy = false; // set if using a proxy server
133: var $_fp_timeout = 30; // timeout for socket connection
134:
135: /*======================================================================*\
136: Function: fetch
137: Purpose: fetch the contents of a web page
138: (and possibly other protocols in the
139: future like ftp, nntp, gopher, etc.)
140: Input: $URI the location of the page to fetch
141: Output: $this->results the output text from the fetch
142: \*======================================================================*/
143:
144: function fetch($URI)
145: {
146:
147: //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
148: $URI_PARTS = parse_url($URI);
149: if (!empty($URI_PARTS["user"]))
150: $this->user = $URI_PARTS["user"];
151: if (!empty($URI_PARTS["pass"]))
152: $this->pass = $URI_PARTS["pass"];
153: if (empty($URI_PARTS["query"]))
154: $URI_PARTS["query"] = '';
155: if (empty($URI_PARTS["path"]))
156: $URI_PARTS["path"] = '';
157:
158: switch (strtolower($URI_PARTS["scheme"])) {
159: case "http":
160: $this->host = $URI_PARTS["host"];
161: if (!empty($URI_PARTS["port"]))
162: $this->port = $URI_PARTS["port"];
163: if ($this->_connect($fp)) {
164: if ($this->_isproxy) {
165: // using proxy, send entire URI
166: $this->_httprequest($URI, $fp, $URI, $this->_httpmethod);
167: } else {
168: $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
169: // no proxy, send only the path
170: $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
171: }
172:
173: $this->_disconnect($fp);
174:
175: if ($this->_redirectaddr) {
176: /* url was redirected, check if we've hit the max depth */
177: if ($this->maxredirs > $this->_redirectdepth) {
178: // only follow redirect if it's on this site, or offsiteok is true
179: if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
180: /* follow the redirect */
181: $this->_redirectdepth++;
182: $this->lastredirectaddr = $this->_redirectaddr;
183: $this->fetch($this->_redirectaddr);
184: }
185: }
186: }
187:
188: if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
189: $frameurls = $this->_frameurls;
190: $this->_frameurls = array();
191:
192: while (list(, $frameurl) = each($frameurls)) {
193: if ($this->_framedepth < $this->maxframes) {
194: $this->fetch($frameurl);
195: $this->_framedepth++;
196: } else
197: break;
198: }
199: }
200: } else {
201: return false;
202: }
203: return true;
204: break;
205: case "https":
206: if (!$this->curl_path)
207: return false;
208: if (function_exists("is_executable"))
209: if (!is_executable($this->curl_path))
210: return false;
211: $this->host = $URI_PARTS["host"];
212: if (!empty($URI_PARTS["port"]))
213: $this->port = $URI_PARTS["port"];
214: if ($this->_isproxy) {
215: // using proxy, send entire URI
216: $this->_httpsrequest($URI, $URI, $this->_httpmethod);
217: } else {
218: $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
219: // no proxy, send only the path
220: $this->_httpsrequest($path, $URI, $this->_httpmethod);
221: }
222:
223: if ($this->_redirectaddr) {
224: /* url was redirected, check if we've hit the max depth */
225: if ($this->maxredirs > $this->_redirectdepth) {
226: // only follow redirect if it's on this site, or offsiteok is true
227: if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
228: /* follow the redirect */
229: $this->_redirectdepth++;
230: $this->lastredirectaddr = $this->_redirectaddr;
231: $this->fetch($this->_redirectaddr);
232: }
233: }
234: }
235:
236: if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
237: $frameurls = $this->_frameurls;
238: $this->_frameurls = array();
239:
240: while (list(, $frameurl) = each($frameurls)) {
241: if ($this->_framedepth < $this->maxframes) {
242: $this->fetch($frameurl);
243: $this->_framedepth++;
244: } else
245: break;
246: }
247: }
248: return true;
249: break;
250: default:
251: // not a valid protocol
252: $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
253: return false;
254: break;
255: }
256: return true;
257: }
258:
259: /*======================================================================*\
260: Function: submit
261: Purpose: submit an http form
262: Input: $URI the location to post the data
263: $formvars the formvars to use.
264: format: $formvars["var"] = "val";
265: $formfiles an array of files to submit
266: format: $formfiles["var"] = "/dir/filename.ext";
267: Output: $this->results the text output from the post
268: \*======================================================================*/
269:
270: function submit($URI, $formvars = "", $formfiles = "")
271: {
272: unset($postdata);
273:
274: $postdata = $this->_prepare_post_body($formvars, $formfiles);
275:
276: $URI_PARTS = parse_url($URI);
277: if (!empty($URI_PARTS["user"]))
278: $this->user = $URI_PARTS["user"];
279: if (!empty($URI_PARTS["pass"]))
280: $this->pass = $URI_PARTS["pass"];
281: if (empty($URI_PARTS["query"]))
282: $URI_PARTS["query"] = '';
283: if (empty($URI_PARTS["path"]))
284: $URI_PARTS["path"] = '';
285:
286: switch (strtolower($URI_PARTS["scheme"])) {
287: case "http":
288: $this->host = $URI_PARTS["host"];
289: if (!empty($URI_PARTS["port"]))
290: $this->port = $URI_PARTS["port"];
291: if ($this->_connect($fp)) {
292: if ($this->_isproxy) {
293: // using proxy, send entire URI
294: $this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
295: } else {
296: $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
297: // no proxy, send only the path
298: $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
299: }
300:
301: $this->_disconnect($fp);
302:
303: if ($this->_redirectaddr) {
304: /* url was redirected, check if we've hit the max depth */
305: if ($this->maxredirs > $this->_redirectdepth) {
306: if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr))
307: $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]);
308:
309: // only follow redirect if it's on this site, or offsiteok is true
310: if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
311: /* follow the redirect */
312: $this->_redirectdepth++;
313: $this->lastredirectaddr = $this->_redirectaddr;
314: if (strpos($this->_redirectaddr, "?") > 0)
315: $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
316: else
317: $this->submit($this->_redirectaddr, $formvars, $formfiles);
318: }
319: }
320: }
321:
322: if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
323: $frameurls = $this->_frameurls;
324: $this->_frameurls = array();
325:
326: while (list(, $frameurl) = each($frameurls)) {
327: if ($this->_framedepth < $this->maxframes) {
328: $this->fetch($frameurl);
329: $this->_framedepth++;
330: } else
331: break;
332: }
333: }
334:
335: } else {
336: return false;
337: }
338: return true;
339: break;
340: case "https":
341: if (!$this->curl_path)
342: return false;
343: if (function_exists("is_executable"))
344: if (!is_executable($this->curl_path))
345: return false;
346: $this->host = $URI_PARTS["host"];
347: if (!empty($URI_PARTS["port"]))
348: $this->port = $URI_PARTS["port"];
349: if ($this->_isproxy) {
350: // using proxy, send entire URI
351: $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
352: } else {
353: $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
354: // no proxy, send only the path
355: $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
356: }
357:
358: if ($this->_redirectaddr) {
359: /* url was redirected, check if we've hit the max depth */
360: if ($this->maxredirs > $this->_redirectdepth) {
361: if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr))
362: $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]);
363:
364: // only follow redirect if it's on this site, or offsiteok is true
365: if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
366: /* follow the redirect */
367: $this->_redirectdepth++;
368: $this->lastredirectaddr = $this->_redirectaddr;
369: if (strpos($this->_redirectaddr, "?") > 0)
370: $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
371: else
372: $this->submit($this->_redirectaddr, $formvars, $formfiles);
373: }
374: }
375: }
376:
377: if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
378: $frameurls = $this->_frameurls;
379: $this->_frameurls = array();
380:
381: while (list(, $frameurl) = each($frameurls)) {
382: if ($this->_framedepth < $this->maxframes) {
383: $this->fetch($frameurl);
384: $this->_framedepth++;
385: } else
386: break;
387: }
388: }
389: return true;
390: break;
391:
392: default:
393: // not a valid protocol
394: $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
395: return false;
396: break;
397: }
398: return true;
399: }
400:
401: /*======================================================================*\
402: Function: fetchlinks
403: Purpose: fetch the links from a web page
404: Input: $URI where you are fetching from
405: Output: $this->results an array of the URLs
406: \*======================================================================*/
407:
408: function fetchlinks($URI)
409: {
410: if ($this->fetch($URI)) {
411: if ($this->lastredirectaddr)
412: $URI = $this->lastredirectaddr;
413: if (is_array($this->results)) {
414: for ($x = 0; $x < count($this->results); $x++)
415: $this->results[$x] = $this->_striplinks($this->results[$x]);
416: } else
417: $this->results = $this->_striplinks($this->results);
418:
419: if ($this->expandlinks)
420: $this->results = $this->_expandlinks($this->results, $URI);
421: return true;
422: } else
423: return false;
424: }
425:
426: /*======================================================================*\
427: Function: fetchform
428: Purpose: fetch the form elements from a web page
429: Input: $URI where you are fetching from
430: Output: $this->results the resulting html form
431: \*======================================================================*/
432:
433: function fetchform($URI)
434: {
435:
436: if ($this->fetch($URI)) {
437:
438: if (is_array($this->results)) {
439: for ($x = 0; $x < count($this->results); $x++)
440: $this->results[$x] = $this->_stripform($this->results[$x]);
441: } else
442: $this->results = $this->_stripform($this->results);
443:
444: return true;
445: } else
446: return false;
447: }
448:
449:
450: /*======================================================================*\
451: Function: fetchtext
452: Purpose: fetch the text from a web page, stripping the links
453: Input: $URI where you are fetching from
454: Output: $this->results the text from the web page
455: \*======================================================================*/
456:
457: function fetchtext($URI)
458: {
459: if ($this->fetch($URI)) {
460: if (is_array($this->results)) {
461: for ($x = 0; $x < count($this->results); $x++)
462: $this->results[$x] = $this->_striptext($this->results[$x]);
463: } else
464: $this->results = $this->_striptext($this->results);
465: return true;
466: } else
467: return false;
468: }
469:
470: /*======================================================================*\
471: Function: submitlinks
472: Purpose: grab links from a form submission
473: Input: $URI where you are submitting from
474: Output: $this->results an array of the links from the post
475: \*======================================================================*/
476:
477: function submitlinks($URI, $formvars = "", $formfiles = "")
478: {
479: if ($this->submit($URI, $formvars, $formfiles)) {
480: if ($this->lastredirectaddr)
481: $URI = $this->lastredirectaddr;
482: if (is_array($this->results)) {
483: for ($x = 0; $x < count($this->results); $x++) {
484: $this->results[$x] = $this->_striplinks($this->results[$x]);
485: if ($this->expandlinks)
486: $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
487: }
488: } else {
489: $this->results = $this->_striplinks($this->results);
490: if ($this->expandlinks)
491: $this->results = $this->_expandlinks($this->results, $URI);
492: }
493: return true;
494: } else
495: return false;
496: }
497:
498: /*======================================================================*\
499: Function: submittext
500: Purpose: grab text from a form submission
501: Input: $URI where you are submitting from
502: Output: $this->results the text from the web page
503: \*======================================================================*/
504:
505: function submittext($URI, $formvars = "", $formfiles = "")
506: {
507: if ($this->submit($URI, $formvars, $formfiles)) {
508: if ($this->lastredirectaddr)
509: $URI = $this->lastredirectaddr;
510: if (is_array($this->results)) {
511: for ($x = 0; $x < count($this->results); $x++) {
512: $this->results[$x] = $this->_striptext($this->results[$x]);
513: if ($this->expandlinks)
514: $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
515: }
516: } else {
517: $this->results = $this->_striptext($this->results);
518: if ($this->expandlinks)
519: $this->results = $this->_expandlinks($this->results, $URI);
520: }
521: return true;
522: } else
523: return false;
524: }
525:
526:
527: /*======================================================================*\
528: Function: set_submit_multipart
529: Purpose: Set the form submission content type to
530: multipart/form-data
531: \*======================================================================*/
532: function set_submit_multipart()
533: {
534: $this->_submit_type = "multipart/form-data";
535: }
536:
537:
538: /*======================================================================*\
539: Function: set_submit_normal
540: Purpose: Set the form submission content type to
541: application/x-www-form-urlencoded
542: \*======================================================================*/
543: function set_submit_normal()
544: {
545: $this->_submit_type = "application/x-www-form-urlencoded";
546: }
547:
548:
549:
550:
551: /*======================================================================*\
552: Private functions
553: \*======================================================================*/
554:
555:
556: /*======================================================================*\
557: Function: _striplinks
558: Purpose: strip the hyperlinks from an HTML document
559: Input: $document document to strip.
560: Output: $match an array of the links
561: \*======================================================================*/
562:
563: function _striplinks($document)
564: {
565: preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
566: ([\"\'])? # find single or double quote
567: (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
568: # quote, otherwise match up to next space
569: 'isx", $document, $links);
570:
571:
572: // catenate the non-empty matches from the conditional subpattern
573:
574: while (list($key, $val) = each($links[2])) {
575: if (!empty($val))
576: $match[] = $val;
577: }
578:
579: while (list($key, $val) = each($links[3])) {
580: if (!empty($val))
581: $match[] = $val;
582: }
583:
584: // return the links
585: return $match;
586: }
587:
588: /*======================================================================*\
589: Function: _stripform
590: Purpose: strip the form elements from an HTML document
591: Input: $document document to strip.
592: Output: $match an array of the links
593: \*======================================================================*/
594:
595: function _stripform($document)
596: {
597: preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements);
598:
599: // catenate the matches
600: $match = implode("\r\n", $elements[0]);
601:
602: // return the links
603: return $match;
604: }
605:
606:
607: /*======================================================================*\
608: Function: _striptext
609: Purpose: strip the text from an HTML document
610: Input: $document document to strip.
611: Output: $text the resulting text
612: \*======================================================================*/
613:
614: function _striptext($document)
615: {
616:
617: // I didn't use preg eval (//e) since that is only available in PHP 4.0.
618: // so, list your entities one by one here. I included some of the
619: // more common ones.
620:
621: $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
622: "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
623: "'([\r\n])[\s]+'", // strip out white space
624: "'&(quot|#34|#034|#x22);'i", // replace html entities
625: "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
626: "'&(lt|#60|#060|#x3c);'i",
627: "'&(gt|#62|#062|#x3e);'i",
628: "'&(nbsp|#160|#xa0);'i",
629: "'&(iexcl|#161);'i",
630: "'&(cent|#162);'i",
631: "'&(pound|#163);'i",
632: "'&(copy|#169);'i",
633: "'&(reg|#174);'i",
634: "'&(deg|#176);'i",
635: "'&(#39|#039|#x27);'",
636: "'&(euro|#8364);'i", // europe
637: "'&a(uml|UML);'", // german
638: "'&o(uml|UML);'",
639: "'&u(uml|UML);'",
640: "'&A(uml|UML);'",
641: "'&O(uml|UML);'",
642: "'&U(uml|UML);'",
643: "'&szlig;'i",
644: );
645: $replace = array("",
646: "",
647: "\\1",
648: "\"",
649: "&",
650: "<",
651: ">",
652: " ",
653: chr(161),
654: chr(162),
655: chr(163),
656: chr(169),
657: chr(174),
658: chr(176),
659: chr(39),
660: chr(128),
661: "ä",
662: "ö",
663: "ü",
664: "Ä",
665: "Ö",
666: "Ü",
667: "ß",
668: );
669:
670: $text = preg_replace($search, $replace, $document);
671:
672: return $text;
673: }
674:
675: /*======================================================================*\
676: Function: _expandlinks
677: Purpose: expand each link into a fully qualified URL
678: Input: $links the links to qualify
679: $URI the full URI to get the base from
680: Output: $expandedLinks the expanded links
681: \*======================================================================*/
682:
683: function _expandlinks($links, $URI)
684: {
685:
686: preg_match("/^[^\?]+/", $URI, $match);
687:
688: $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]);
689: $match = preg_replace("|/$|", "", $match);
690: $match_part = parse_url($match);
691: $match_root =
692: $match_part["scheme"] . "://" . $match_part["host"];
693:
694: $search = array("|^http://" . preg_quote($this->host) . "|i",
695: "|^(\/)|i",
696: "|^(?!http://)(?!mailto:)|i",
697: "|/\./|",
698: "|/[^\/]+/\.\./|"
699: );
700:
701: $replace = array("",
702: $match_root . "/",
703: $match . "/",
704: "/",
705: "/"
706: );
707:
708: $expandedLinks = preg_replace($search, $replace, $links);
709:
710: return $expandedLinks;
711: }
712:
713: /*======================================================================*\
714: Function: _httprequest
715: Purpose: go get the http data from the server
716: Input: $url the url to fetch
717: $fp the current open file pointer
718: $URI the full URI
719: $body body contents to send if any (POST)
720: Output:
721: \*======================================================================*/
722:
723: function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "")
724: {
725: $cookie_headers = '';
726: if ($this->passcookies && $this->_redirectaddr)
727: $this->setcookies();
728:
729: $URI_PARTS = parse_url($URI);
730: if (empty($url))
731: $url = "/";
732: $headers = $http_method . " " . $url . " " . $this->_httpversion . "\r\n";
733: if (!empty($this->agent))
734: $headers .= "User-Agent: " . $this->agent . "\r\n";
735: if (!empty($this->host) && !isset($this->rawheaders['Host'])) {
736: $headers .= "Host: " . $this->host;
737: if (!empty($this->port) && $this->port != '80')
738: $headers .= ":" . $this->port;
739: $headers .= "\r\n";
740: }
741: if (!empty($this->accept))
742: $headers .= "Accept: " . $this->accept . "\r\n";
743: if ($this->use_gzip) {
744: // make sure PHP was built with --with-zlib
745: // and we can handle gzipp'ed data
746: if (function_exists('gzinflate')) {
747: $headers .= "Accept-encoding: gzip\r\n";
748: } else {
749: trigger_error(
750: "use_gzip is on, but PHP was built without zlib support." .
751: " Requesting file(s) without gzip encoding.",
752: E_USER_NOTICE);
753: }
754: }
755: if (!empty($this->referer))
756: $headers .= "Referer: " . $this->referer . "\r\n";
757: if (!empty($this->cookies)) {
758: if (!is_array($this->cookies))
759: $this->cookies = (array)$this->cookies;
760:
761: reset($this->cookies);
762: if (count($this->cookies) > 0) {
763: $cookie_headers .= 'Cookie: ';
764: foreach ($this->cookies as $cookieKey => $cookieVal) {
765: $cookie_headers .= $cookieKey . "=" . urlencode($cookieVal) . "; ";
766: }
767: $headers .= substr($cookie_headers, 0, -2) . "\r\n";
768: }
769: }
770: if (!empty($this->rawheaders)) {
771: if (!is_array($this->rawheaders))
772: $this->rawheaders = (array)$this->rawheaders;
773: while (list($headerKey, $headerVal) = each($this->rawheaders))
774: $headers .= $headerKey . ": " . $headerVal . "\r\n";
775: }
776: if (!empty($content_type)) {
777: $headers .= "Content-type: $content_type";
778: if ($content_type == "multipart/form-data")
779: $headers .= "; boundary=" . $this->_mime_boundary;
780: $headers .= "\r\n";
781: }
782: if (!empty($body))
783: $headers .= "Content-length: " . strlen($body) . "\r\n";
784: if (!empty($this->user) || !empty($this->pass))
785: $headers .= "Authorization: Basic " . base64_encode($this->user . ":" . $this->pass) . "\r\n";
786:
787: //add proxy auth headers
788: if (!empty($this->proxy_user))
789: $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass) . "\r\n";
790:
791:
792: $headers .= "\r\n";
793:
794: // set the read timeout if needed
795: if ($this->read_timeout > 0)
796: socket_set_timeout($fp, $this->read_timeout);
797: $this->timed_out = false;
798:
799: fwrite($fp, $headers . $body, strlen($headers . $body));
800:
801: $this->_redirectaddr = false;
802: unset($this->headers);
803:
804: // content was returned gzip encoded?
805: $is_gzipped = false;
806:
807: while ($currentHeader = fgets($fp, $this->_maxlinelen)) {
808: if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
809: $this->status = -100;
810: return false;
811: }
812:
813: if ($currentHeader == "\r\n")
814: break;
815:
816: // if a header begins with Location: or URI:, set the redirect
817: if (preg_match("/^(Location:|URI:)/i", $currentHeader)) {
818: // get URL portion of the redirect
819: preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches);
820: // look for :// in the Location header to see if hostname is included
821: if (!preg_match("|\:\/\/|", $matches[2])) {
822: // no host in the path, so prepend
823: $this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port;
824: // eliminate double slash
825: if (!preg_match("|^/|", $matches[2]))
826: $this->_redirectaddr .= "/" . $matches[2];
827: else
828: $this->_redirectaddr .= $matches[2];
829: } else
830: $this->_redirectaddr = $matches[2];
831: }
832:
833: if (preg_match("|^HTTP/|", $currentHeader)) {
834: if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) {
835: $this->status = $status[1];
836: }
837: $this->response_code = $currentHeader;
838: }
839:
840: if (preg_match("/Content-Encoding: gzip/", $currentHeader)) {
841: $is_gzipped = true;
842: }
843:
844: $this->headers[] = $currentHeader;
845: }
846:
847: $results = '';
848: do {
849: $_data = fread($fp, $this->maxlength);
850: if (strlen($_data) == 0) {
851: break;
852: }
853: $results .= $_data;
854: } while (true);
855:
856: // gunzip
857: if ($is_gzipped) {
858: // per https://www.php.net/manual/en/function.gzencode.php
859: $results = substr($results, 10);
860: $results = gzinflate($results);
861: }
862:
863: if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
864: $this->status = -100;
865: return false;
866: }
867:
868: // check if there is a a redirect meta tag
869:
870: if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) {
871: $this->_redirectaddr = $this->_expandlinks($match[1], $URI);
872: }
873:
874: // have we hit our frame depth and is there frame src to fetch?
875: if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) {
876: $this->results[] = $results;
877: for ($x = 0; $x < count($match[1]); $x++)
878: $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host);
879: } // have we already fetched framed content?
880: elseif (is_array($this->results))
881: $this->results[] = $results;
882: // no framed content
883: else
884: $this->results = $results;
885:
886: return true;
887: }
888:
889: /*======================================================================*\
890: Function: _httpsrequest
891: Purpose: go get the https data from the server using curl
892: Input: $url the url to fetch
893: $URI the full URI
894: $body body contents to send if any (POST)
895: Output:
896: \*======================================================================*/
897:
898: function _httpsrequest($url, $URI, $http_method, $content_type = "", $body = "")
899: {
900: if ($this->passcookies && $this->_redirectaddr)
901: $this->setcookies();
902:
903: $headers = array();
904:
905: $URI_PARTS = parse_url($URI);
906: if (empty($url))
907: $url = "/";
908: // GET ... header not needed for curl
909: //$headers[] = $http_method." ".$url." ".$this->_httpversion;
910: if (!empty($this->agent))
911: $headers[] = "User-Agent: " . $this->agent;
912: if (!empty($this->host))
913: if (!empty($this->port))
914: $headers[] = "Host: " . $this->host . ":" . $this->port;
915: else
916: $headers[] = "Host: " . $this->host;
917: if (!empty($this->accept))
918: $headers[] = "Accept: " . $this->accept;
919: if (!empty($this->referer))
920: $headers[] = "Referer: " . $this->referer;
921: if (!empty($this->cookies)) {
922: if (!is_array($this->cookies))
923: $this->cookies = (array)$this->cookies;
924:
925: reset($this->cookies);
926: if (count($this->cookies) > 0) {
927: $cookie_str = 'Cookie: ';
928: foreach ($this->cookies as $cookieKey => $cookieVal) {
929: $cookie_str .= $cookieKey . "=" . urlencode($cookieVal) . "; ";
930: }
931: $headers[] = substr($cookie_str, 0, -2);
932: }
933: }
934: if (!empty($this->rawheaders)) {
935: if (!is_array($this->rawheaders))
936: $this->rawheaders = (array)$this->rawheaders;
937: while (list($headerKey, $headerVal) = each($this->rawheaders))
938: $headers[] = $headerKey . ": " . $headerVal;
939: }
940: if (!empty($content_type)) {
941: if ($content_type == "multipart/form-data")
942: $headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary;
943: else
944: $headers[] = "Content-type: $content_type";
945: }
946: if (!empty($body))
947: $headers[] = "Content-length: " . strlen($body);
948: if (!empty($this->user) || !empty($this->pass))
949: $headers[] = "Authorization: BASIC " . base64_encode($this->user . ":" . $this->pass);
950:
951: for ($curr_header = 0; $curr_header < count($headers); $curr_header++) {
952: $safer_header = strtr($headers[$curr_header], "\"", " ");
953: $cmdline_params .= " -H \"" . $safer_header . "\"";
954: }
955:
956: if (!empty($body))
957: $cmdline_params .= " -d \"$body\"";
958:
959: if ($this->read_timeout > 0)
960: $cmdline_params .= " -m " . $this->read_timeout;
961:
962: $headerfile = tempnam($temp_dir, "sno");
963:
964: exec($this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " " . escapeshellarg($URI), $results, $return);
965:
966: if ($return) {
967: $this->error = "Error: cURL could not retrieve the document, error $return.";
968: return false;
969: }
970:
971:
972: $results = implode("\r\n", $results);
973:
974: $result_headers = file("$headerfile");
975:
976: $this->_redirectaddr = false;
977: unset($this->headers);
978:
979: for ($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) {
980:
981: // if a header begins with Location: or URI:, set the redirect
982: if (preg_match("/^(Location: |URI: )/i", $result_headers[$currentHeader])) {
983: // get URL portion of the redirect
984: preg_match("/^(Location: |URI:)\s+(.*)/", chop($result_headers[$currentHeader]), $matches);
985: // look for :// in the Location header to see if hostname is included
986: if (!preg_match("|\:\/\/|", $matches[2])) {
987: // no host in the path, so prepend
988: $this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port;
989: // eliminate double slash
990: if (!preg_match("|^/|", $matches[2]))
991: $this->_redirectaddr .= "/" . $matches[2];
992: else
993: $this->_redirectaddr .= $matches[2];
994: } else
995: $this->_redirectaddr = $matches[2];
996: }
997:
998: if (preg_match("|^HTTP/|", $result_headers[$currentHeader])) {
999: $this->response_code = $result_headers[$currentHeader];
1000: if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match)) {
1001: $this->status = $match[1];
1002: }
1003: }
1004:
1005: $this->headers[] = $result_headers[$currentHeader];
1006: }
1007:
1008: // check if there is a a redirect meta tag
1009:
1010: if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) {
1011: $this->_redirectaddr = $this->_expandlinks($match[1], $URI);
1012: }
1013:
1014: // have we hit our frame depth and is there frame src to fetch?
1015: if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) {
1016: $this->results[] = $results;
1017: for ($x = 0; $x < count($match[1]); $x++)
1018: $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host);
1019: } // have we already fetched framed content?
1020: elseif (is_array($this->results))
1021: $this->results[] = $results;
1022: // no framed content
1023: else
1024: $this->results = $results;
1025:
1026: unlink("$headerfile");
1027:
1028: return true;
1029: }
1030:
1031: /*======================================================================*\
1032: Function: setcookies()
1033: Purpose: set cookies for a redirection
1034: \*======================================================================*/
1035:
1036: function setcookies()
1037: {
1038: for ($x = 0; $x < count($this->headers); $x++) {
1039: if (preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match))
1040: $this->cookies[$match[1]] = urldecode($match[2]);
1041: }
1042: }
1043:
1044:
1045: /*======================================================================*\
1046: Function: _check_timeout
1047: Purpose: checks whether timeout has occurred
1048: Input: $fp file pointer
1049: \*======================================================================*/
1050:
1051: function _check_timeout($fp)
1052: {
1053: if ($this->read_timeout > 0) {
1054: $fp_status = socket_get_status($fp);
1055: if ($fp_status["timed_out"]) {
1056: $this->timed_out = true;
1057: return true;
1058: }
1059: }
1060: return false;
1061: }
1062:
1063: /*======================================================================*\
1064: Function: _connect
1065: Purpose: make a socket connection
1066: Input: $fp file pointer
1067: \*======================================================================*/
1068:
1069: function _connect(&$fp)
1070: {
1071: if (!empty($this->proxy_host) && !empty($this->proxy_port)) {
1072: $this->_isproxy = true;
1073:
1074: $host = $this->proxy_host;
1075: $port = $this->proxy_port;
1076: } else {
1077: $host = $this->host;
1078: $port = $this->port;
1079: }
1080:
1081: $this->status = 0;
1082:
1083: if ($fp = fsockopen(
1084: $host,
1085: $port,
1086: $errno,
1087: $errstr,
1088: $this->_fp_timeout
1089: )
1090: ) {
1091: // socket connection succeeded
1092:
1093: return true;
1094: } else {
1095: // socket connection failed
1096: $this->status = $errno;
1097: switch ($errno) {
1098: case -3:
1099: $this->error = "socket creation failed (-3)";
1100: case -4:
1101: $this->error = "dns lookup failure (-4)";
1102: case -5:
1103: $this->error = "connection refused or timed out (-5)";
1104: default:
1105: $this->error = "connection failed (" . $errno . ")";
1106: }
1107: return false;
1108: }
1109: }
1110:
1111: /*======================================================================*\
1112: Function: _disconnect
1113: Purpose: disconnect a socket connection
1114: Input: $fp file pointer
1115: \*======================================================================*/
1116:
1117: function _disconnect($fp)
1118: {
1119: return (fclose($fp));
1120: }
1121:
1122:
1123: /*======================================================================*\
1124: Function: _prepare_post_body
1125: Purpose: Prepare post body according to encoding type
1126: Input: $formvars - form variables
1127: $formfiles - form upload files
1128: Output: post body
1129: \*======================================================================*/
1130:
1131: function _prepare_post_body($formvars, $formfiles)
1132: {
1133: settype($formvars, "array");
1134: settype($formfiles, "array");
1135: $postdata = '';
1136:
1137: if (count($formvars) == 0 && count($formfiles) == 0)
1138: return;
1139:
1140: switch ($this->_submit_type) {
1141: case "application/x-www-form-urlencoded":
1142: reset($formvars);
1143: while (list($key, $val) = each($formvars)) {
1144: if (is_array($val) || is_object($val)) {
1145: while (list($cur_key, $cur_val) = each($val)) {
1146: $postdata .= urlencode($key) . "[]=" . urlencode($cur_val) . "&";
1147: }
1148: } else
1149: $postdata .= urlencode($key) . "=" . urlencode($val) . "&";
1150: }
1151: break;
1152:
1153: case "multipart/form-data":
1154: $this->_mime_boundary = "Snoopy" . md5(uniqid(microtime()));
1155:
1156: reset($formvars);
1157: while (list($key, $val) = each($formvars)) {
1158: if (is_array($val) || is_object($val)) {
1159: while (list($cur_key, $cur_val) = each($val)) {
1160: $postdata .= "--" . $this->_mime_boundary . "\r\n";
1161: $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1162: $postdata .= "$cur_val\r\n";
1163: }
1164: } else {
1165: $postdata .= "--" . $this->_mime_boundary . "\r\n";
1166: $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1167: $postdata .= "$val\r\n";
1168: }
1169: }
1170:
1171: reset($formfiles);
1172: while (list($field_name, $file_names) = each($formfiles)) {
1173: settype($file_names, "array");
1174: while (list(, $file_name) = each($file_names)) {
1175: if (!is_readable($file_name)) continue;
1176:
1177: $fp = fopen($file_name, "r");
1178: $file_content = fread($fp, filesize($file_name));
1179: fclose($fp);
1180: $base_name = basename($file_name);
1181:
1182: $postdata .= "--" . $this->_mime_boundary . "\r\n";
1183: $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1184: $postdata .= "$file_content\r\n";
1185: }
1186: }
1187: $postdata .= "--" . $this->_mime_boundary . "--\r\n";
1188: break;
1189: }
1190:
1191: return $postdata;
1192: }
1193: }
1194:
1195: ?>
1196: