1: <?php
2:
3: /*************************************************
4: *
5: * Snoopy - the PHP net client
6: * Author: Monte Ohrt <monte@ohrt.com>
7: * Copyright (c): 1999-2014, all rights reserved
8: * Version: 2.0.0
9: * This library is free software; you can redistribute it and/or
10: * modify it under the terms of the GNU Lesser General Public
11: * License as published by the Free Software Foundation; either
12: * version 2.1 of the License, or (at your option) any later version.
13: *
14: * This library is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17: * Lesser General Public License for more details.
18: *
19: * You should have received a copy of the GNU Lesser General Public
20: * License along with this library; if not, write to the Free Software
21: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22: *
23: * You may contact the author of Snoopy by e-mail at:
24: * monte@ohrt.com
25: *
26: * The latest version of Snoopy can be obtained from:
27: * http://snoopy.sourceforge.net/
28: *************************************************/
29: class Snoopy
30: {
31: /**** Public variables ****/
32:
33: /* user definable vars */
34:
35: var $scheme = 'http'; // http or https
36: var $host = "www.php.net"; // host name we are connecting to
37: var $port = 80; // port we are connecting to
38: var $proxy_host = ""; // proxy host to use
39: var $proxy_port = ""; // proxy port to use
40: var $proxy_user = ""; // proxy user to use
41: var $proxy_pass = ""; // proxy password to use
42:
43: var $agent = "Snoopy v2.0.0"; // agent we masquerade as
44: var $referer = ""; // referer info to pass
45: var $cookies = array(); // array of cookies to pass
46: // $cookies["username"]="joe";
47: var $rawheaders = array(); // array of raw headers to send
48: // $rawheaders["Content-type"]="text/html";
49:
50: var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
51: var $lastredirectaddr = ""; // contains address of last redirected address
52: var $offsiteok = true; // allows redirection off-site
53: var $maxframes = 0; // frame content depth maximum. 0 = disallow
54: var $expandlinks = true; // expand links to fully qualified URLs.
55: // this only applies to fetchlinks()
56: // submitlinks(), and submittext()
57: var $passcookies = true; // pass set cookies back through redirects
58: // NOTE: this currently does not respect
59: // dates, domains or paths.
60:
61: var $user = ""; // user for http authentication
62: var $pass = ""; // password for http authentication
63:
64: // http accept types
65: var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
66:
67: var $results = ""; // where the content is put
68:
69: var $error = ""; // error messages sent here
70: var $response_code = ""; // response code returned from server
71: var $headers = array(); // headers returned from server sent here
72: var $maxlength = 500000; // max return data length (body)
73: var $read_timeout = 0; // timeout on read operations, in seconds
74: // supported only since PHP 4 Beta 4
75: // set to 0 to disallow timeouts
76: var $timed_out = false; // if a read operation timed out
77: var $status = 0; // http request status
78:
79: var $temp_dir = "/tmp"; // temporary directory that the webserver
80: // has permission to write to.
81: // under Windows, this should be C:\temp
82:
83: var $curl_path = false;
84: // deprecated, snoopy no longer uses curl for https requests,
85: // but instead requires the openssl extension.
86:
87: // send Accept-encoding: gzip?
88: var $use_gzip = true;
89:
90: // file or directory with CA certificates to verify remote host with
91: var $cafile;
92: var $capath;
93:
94: /**** Private variables ****/
95:
96: var $_maxlinelen = 4096; // max line length (headers)
97:
98: var $_httpmethod = "GET"; // default http request method
99: var $_httpversion = "HTTP/1.0"; // default http request version
100: var $_submit_method = "POST"; // default submit method
101: var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
102: var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
103: var $_redirectaddr = false; // will be set if page fetched is a redirect
104: var $_redirectdepth = 0; // increments on an http redirect
105: var $_frameurls = array(); // frame src urls
106: var $_framedepth = 0; // increments on frame depth
107:
108: var $_isproxy = false; // set if using a proxy server
109: var $_fp_timeout = 30; // timeout for socket connection
110:
111: /*======================================================================*\
112: Function: fetch
113: Purpose: fetch the contents of a web page
114: (and possibly other protocols in the
115: future like ftp, nntp, gopher, etc.)
116: Input: $URI the location of the page to fetch
117: Output: $this->results the output text from the fetch
118: \*======================================================================*/
119:
120: function fetch($URI)
121: {
122:
123: $URI_PARTS = parse_url($URI);
124: if (!empty($URI_PARTS["user"]))
125: $this->user = $URI_PARTS["user"];
126: if (!empty($URI_PARTS["pass"]))
127: $this->pass = $URI_PARTS["pass"];
128: if (empty($URI_PARTS["query"]))
129: $URI_PARTS["query"] = '';
130: if (empty($URI_PARTS["path"]))
131: $URI_PARTS["path"] = '';
132:
133: $fp = null;
134:
135: switch (strtolower($URI_PARTS["scheme"])) {
136: case "https":
137: if (!extension_loaded('openssl')) {
138: trigger_error("openssl extension required for HTTPS", E_USER_ERROR);
139: exit;
140: }
141: $this->port = 443;
142: case "http":
143: $this->scheme = strtolower($URI_PARTS["scheme"]);
144: $this->host = $URI_PARTS["host"];
145: if (!empty($URI_PARTS["port"]))
146: $this->port = $URI_PARTS["port"];
147: if ($this->_connect($fp)) {
148: if ($this->_isproxy) {
149: // using proxy, send entire URI
150: $this->_httprequest($URI, $fp, $URI, $this->_httpmethod);
151: } else {
152: $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
153: // no proxy, send only the path
154: $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155: }
156:
157: $this->_disconnect($fp);
158:
159: if ($this->_redirectaddr) {
160: /* url was redirected, check if we've hit the max depth */
161: if ($this->maxredirs > $this->_redirectdepth) {
162: // only follow redirect if it's on this site, or offsiteok is true
163: if (preg_match("|^https?://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
164: /* follow the redirect */
165: $this->_redirectdepth++;
166: $this->lastredirectaddr = $this->_redirectaddr;
167: $this->fetch($this->_redirectaddr);
168: }
169: }
170: }
171:
172: if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
173: $frameurls = $this->_frameurls;
174: $this->_frameurls = array();
175:
176: while (list(, $frameurl) = each($frameurls)) {
177: if ($this->_framedepth < $this->maxframes) {
178: $this->fetch($frameurl);
179: $this->_framedepth++;
180: } else
181: break;
182: }
183: }
184: } else {
185: return false;
186: }
187: return $this;
188: break;
189: default:
190: // not a valid protocol
191: $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
192: return false;
193: break;
194: }
195: return $this;
196: }
197:
198: /*======================================================================*\
199: Function: submit
200: Purpose: submit an http(s) form
201: Input: $URI the location to post the data
202: $formvars the formvars to use.
203: format: $formvars["var"] = "val";
204: $formfiles an array of files to submit
205: format: $formfiles["var"] = "/dir/filename.ext";
206: Output: $this->results the text output from the post
207: \*======================================================================*/
208:
209: function submit($URI, $formvars = "", $formfiles = "")
210: {
211: unset($postdata);
212:
213: $postdata = $this->_prepare_post_body($formvars, $formfiles);
214:
215: $URI_PARTS = parse_url($URI);
216: if (!empty($URI_PARTS["user"]))
217: $this->user = $URI_PARTS["user"];
218: if (!empty($URI_PARTS["pass"]))
219: $this->pass = $URI_PARTS["pass"];
220: if (empty($URI_PARTS["query"]))
221: $URI_PARTS["query"] = '';
222: if (empty($URI_PARTS["path"]))
223: $URI_PARTS["path"] = '';
224:
225: switch (strtolower($URI_PARTS["scheme"])) {
226: case "https":
227: if (!extension_loaded('openssl')) {
228: trigger_error("openssl extension required for HTTPS", E_USER_ERROR);
229: exit;
230: }
231: $this->port = 443;
232: case "http":
233: $this->scheme = strtolower($URI_PARTS["scheme"]);
234: $this->host = $URI_PARTS["host"];
235: if (!empty($URI_PARTS["port"]))
236: $this->port = $URI_PARTS["port"];
237: if ($this->_connect($fp)) {
238: if ($this->_isproxy) {
239: // using proxy, send entire URI
240: $this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
241: } else {
242: $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
243: // no proxy, send only the path
244: $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
245: }
246:
247: $this->_disconnect($fp);
248:
249: if ($this->_redirectaddr) {
250: /* url was redirected, check if we've hit the max depth */
251: if ($this->maxredirs > $this->_redirectdepth) {
252: if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr))
253: $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]);
254:
255: // only follow redirect if it's on this site, or offsiteok is true
256: if (preg_match("|^https?://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
257: /* follow the redirect */
258: $this->_redirectdepth++;
259: $this->lastredirectaddr = $this->_redirectaddr;
260: if (strpos($this->_redirectaddr, "?") > 0)
261: $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
262: else
263: $this->submit($this->_redirectaddr, $formvars, $formfiles);
264: }
265: }
266: }
267:
268: if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
269: $frameurls = $this->_frameurls;
270: $this->_frameurls = array();
271:
272: while (list(, $frameurl) = each($frameurls)) {
273: if ($this->_framedepth < $this->maxframes) {
274: $this->fetch($frameurl);
275: $this->_framedepth++;
276: } else
277: break;
278: }
279: }
280:
281: } else {
282: return false;
283: }
284: return $this;
285: break;
286: default:
287: // not a valid protocol
288: $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
289: return false;
290: break;
291: }
292: return $this;
293: }
294:
295: /*======================================================================*\
296: Function: fetchlinks
297: Purpose: fetch the links from a web page
298: Input: $URI where you are fetching from
299: Output: $this->results an array of the URLs
300: \*======================================================================*/
301:
302: function fetchlinks($URI)
303: {
304: if ($this->fetch($URI) !== false) {
305: if ($this->lastredirectaddr)
306: $URI = $this->lastredirectaddr;
307: if (is_array($this->results)) {
308: for ($x = 0; $x < count($this->results); $x++)
309: $this->results[$x] = $this->_striplinks($this->results[$x]);
310: } else
311: $this->results = $this->_striplinks($this->results);
312:
313: if ($this->expandlinks)
314: $this->results = $this->_expandlinks($this->results, $URI);
315: return $this;
316: } else
317: return false;
318: }
319:
320: /*======================================================================*\
321: Function: fetchform
322: Purpose: fetch the form elements from a web page
323: Input: $URI where you are fetching from
324: Output: $this->results the resulting html form
325: \*======================================================================*/
326:
327: function fetchform($URI)
328: {
329:
330: if ($this->fetch($URI) !== false) {
331:
332: if (is_array($this->results)) {
333: for ($x = 0; $x < count($this->results); $x++)
334: $this->results[$x] = $this->_stripform($this->results[$x]);
335: } else
336: $this->results = $this->_stripform($this->results);
337:
338: return $this;
339: } else
340: return false;
341: }
342:
343:
344: /*======================================================================*\
345: Function: fetchtext
346: Purpose: fetch the text from a web page, stripping the links
347: Input: $URI where you are fetching from
348: Output: $this->results the text from the web page
349: \*======================================================================*/
350:
351: function fetchtext($URI)
352: {
353: if ($this->fetch($URI) !== false) {
354: if (is_array($this->results)) {
355: for ($x = 0; $x < count($this->results); $x++)
356: $this->results[$x] = $this->_striptext($this->results[$x]);
357: } else
358: $this->results = $this->_striptext($this->results);
359: return $this;
360: } else
361: return false;
362: }
363:
364: /*======================================================================*\
365: Function: submitlinks
366: Purpose: grab links from a form submission
367: Input: $URI where you are submitting from
368: Output: $this->results an array of the links from the post
369: \*======================================================================*/
370:
371: function submitlinks($URI, $formvars = "", $formfiles = "")
372: {
373: if ($this->submit($URI, $formvars, $formfiles) !== false) {
374: if ($this->lastredirectaddr)
375: $URI = $this->lastredirectaddr;
376: if (is_array($this->results)) {
377: for ($x = 0; $x < count($this->results); $x++) {
378: $this->results[$x] = $this->_striplinks($this->results[$x]);
379: if ($this->expandlinks)
380: $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
381: }
382: } else {
383: $this->results = $this->_striplinks($this->results);
384: if ($this->expandlinks)
385: $this->results = $this->_expandlinks($this->results, $URI);
386: }
387: return $this;
388: } else
389: return false;
390: }
391:
392: /*======================================================================*\
393: Function: submittext
394: Purpose: grab text from a form submission
395: Input: $URI where you are submitting from
396: Output: $this->results the text from the web page
397: \*======================================================================*/
398:
399: function submittext($URI, $formvars = "", $formfiles = "")
400: {
401: if ($this->submit($URI, $formvars, $formfiles) !== false) {
402: if ($this->lastredirectaddr)
403: $URI = $this->lastredirectaddr;
404: if (is_array($this->results)) {
405: for ($x = 0; $x < count($this->results); $x++) {
406: $this->results[$x] = $this->_striptext($this->results[$x]);
407: if ($this->expandlinks)
408: $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
409: }
410: } else {
411: $this->results = $this->_striptext($this->results);
412: if ($this->expandlinks)
413: $this->results = $this->_expandlinks($this->results, $URI);
414: }
415: return $this;
416: } else
417: return false;
418: }
419:
420:
421: /*======================================================================*\
422: Function: set_submit_multipart
423: Purpose: Set the form submission content type to
424: multipart/form-data
425: \*======================================================================*/
426: function set_submit_multipart()
427: {
428: $this->_submit_type = "multipart/form-data";
429: return $this;
430: }
431:
432:
433: /*======================================================================*\
434: Function: set_submit_normal
435: Purpose: Set the form submission content type to
436: application/x-www-form-urlencoded
437: \*======================================================================*/
438: function set_submit_normal()
439: {
440: $this->_submit_type = "application/x-www-form-urlencoded";
441: return $this;
442: }
443:
444:
445:
446:
447: /*======================================================================*\
448: Private functions
449: \*======================================================================*/
450:
451:
452: /*======================================================================*\
453: Function: _striplinks
454: Purpose: strip the hyperlinks from an html document
455: Input: $document document to strip.
456: Output: $match an array of the links
457: \*======================================================================*/
458:
459: function _striplinks($document)
460: {
461: preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
462: ([\"\'])? # find single or double quote
463: (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
464: # quote, otherwise match up to next space
465: 'isx", $document, $links);
466:
467:
468: // catenate the non-empty matches from the conditional subpattern
469:
470: while (list($key, $val) = each($links[2])) {
471: if (!empty($val))
472: $match[] = $val;
473: }
474:
475: while (list($key, $val) = each($links[3])) {
476: if (!empty($val))
477: $match[] = $val;
478: }
479:
480: // return the links
481: return $match;
482: }
483:
484: /*======================================================================*\
485: Function: _stripform
486: Purpose: strip the form elements from an html document
487: Input: $document document to strip.
488: Output: $match an array of the links
489: \*======================================================================*/
490:
491: function _stripform($document)
492: {
493: preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements);
494:
495: // catenate the matches
496: $match = implode("\r\n", $elements[0]);
497:
498: // return the links
499: return $match;
500: }
501:
502:
503: /*======================================================================*\
504: Function: _striptext
505: Purpose: strip the text from an html document
506: Input: $document document to strip.
507: Output: $text the resulting text
508: \*======================================================================*/
509:
510: function _striptext($document)
511: {
512:
513: // I didn't use preg eval (//e) since that is only available in PHP 4.0.
514: // so, list your entities one by one here. I included some of the
515: // more common ones.
516:
517: $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
518: "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
519: "'([\r\n])[\s]+'", // strip out white space
520: "'&(quot|#34|#034|#x22);'i", // replace html entities
521: "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
522: "'&(lt|#60|#060|#x3c);'i",
523: "'&(gt|#62|#062|#x3e);'i",
524: "'&(nbsp|#160|#xa0);'i",
525: "'&(iexcl|#161);'i",
526: "'&(cent|#162);'i",
527: "'&(pound|#163);'i",
528: "'&(copy|#169);'i",
529: "'&(reg|#174);'i",
530: "'&(deg|#176);'i",
531: "'&(#39|#039|#x27);'",
532: "'&(euro|#8364);'i", // europe
533: "'&a(uml|UML);'", // german
534: "'&o(uml|UML);'",
535: "'&u(uml|UML);'",
536: "'&A(uml|UML);'",
537: "'&O(uml|UML);'",
538: "'&U(uml|UML);'",
539: "'ß'i",
540: );
541: $replace = array("",
542: "",
543: "\\1",
544: "\"",
545: "&",
546: "<",
547: ">",
548: " ",
549: chr(161),
550: chr(162),
551: chr(163),
552: chr(169),
553: chr(174),
554: chr(176),
555: chr(39),
556: chr(128),
557: "ä",
558: "ö",
559: "ü",
560: "Ä",
561: "Ö",
562: "Ü",
563: "ß",
564: );
565:
566: $text = preg_replace($search, $replace, $document);
567:
568: return $text;
569: }
570:
571: /*======================================================================*\
572: Function: _expandlinks
573: Purpose: expand each link into a fully qualified URL
574: Input: $links the links to qualify
575: $URI the full URI to get the base from
576: Output: $expandedLinks the expanded links
577: \*======================================================================*/
578:
579: function _expandlinks($links, $URI)
580: {
581:
582: preg_match("/^[^\?]+/", $URI, $match);
583:
584: $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]);
585: $match = preg_replace("|/$|", "", $match);
586: $match_part = parse_url($match);
587: $match_root =
588: $match_part["scheme"] . "://" . $match_part["host"];
589:
590: $search = array("|^http://" . preg_quote($this->host) . "|i",
591: "|^(\/)|i",
592: "|^(?!http://)(?!mailto:)|i",
593: "|/\./|",
594: "|/[^\/]+/\.\./|"
595: );
596:
597: $replace = array("",
598: $match_root . "/",
599: $match . "/",
600: "/",
601: "/"
602: );
603:
604: $expandedLinks = preg_replace($search, $replace, $links);
605:
606: return $expandedLinks;
607: }
608:
609: /*======================================================================*\
610: Function: _httprequest
611: Purpose: go get the http(s) data from the server
612: Input: $url the url to fetch
613: $fp the current open file pointer
614: $URI the full URI
615: $body body contents to send if any (POST)
616: Output:
617: \*======================================================================*/
618:
619: function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "")
620: {
621: $cookie_headers = '';
622: if ($this->passcookies && $this->_redirectaddr)
623: $this->setcookies();
624:
625: $URI_PARTS = parse_url($URI);
626: if (empty($url))
627: $url = "/";
628: $headers = $http_method . " " . $url . " " . $this->_httpversion . "\r\n";
629: if (!empty($this->host) && !isset($this->rawheaders['Host'])) {
630: $headers .= "Host: " . $this->host;
631: if (!empty($this->port) && $this->port != '80')
632: $headers .= ":" . $this->port;
633: $headers .= "\r\n";
634: }
635: if (!empty($this->agent))
636: $headers .= "User-Agent: " . $this->agent . "\r\n";
637: if (!empty($this->accept))
638: $headers .= "Accept: " . $this->accept . "\r\n";
639: if ($this->use_gzip) {
640: // make sure PHP was built with --with-zlib
641: // and we can handle gzipp'ed data
642: if (function_exists('gzinflate')) {
643: $headers .= "Accept-encoding: gzip\r\n";
644: } else {
645: trigger_error(
646: "use_gzip is on, but PHP was built without zlib support." .
647: " Requesting file(s) without gzip encoding.",
648: E_USER_NOTICE);
649: }
650: }
651: if (!empty($this->referer))
652: $headers .= "Referer: " . $this->referer . "\r\n";
653: if (!empty($this->cookies)) {
654: if (!is_array($this->cookies))
655: $this->cookies = (array)$this->cookies;
656:
657: reset($this->cookies);
658: if (count($this->cookies) > 0) {
659: $cookie_headers .= 'Cookie: ';
660: foreach ($this->cookies as $cookieKey => $cookieVal) {
661: $cookie_headers .= $cookieKey . "=" . urlencode($cookieVal) . "; ";
662: }
663: $headers .= substr($cookie_headers, 0, -2) . "\r\n";
664: }
665: }
666: if (!empty($this->rawheaders)) {
667: if (!is_array($this->rawheaders))
668: $this->rawheaders = (array)$this->rawheaders;
669: while (list($headerKey, $headerVal) = each($this->rawheaders))
670: $headers .= $headerKey . ": " . $headerVal . "\r\n";
671: }
672: if (!empty($content_type)) {
673: $headers .= "Content-type: $content_type";
674: if ($content_type == "multipart/form-data")
675: $headers .= "; boundary=" . $this->_mime_boundary;
676: $headers .= "\r\n";
677: }
678: if (!empty($body))
679: $headers .= "Content-length: " . strlen($body) . "\r\n";
680: if (!empty($this->user) || !empty($this->pass))
681: $headers .= "Authorization: Basic " . base64_encode($this->user . ":" . $this->pass) . "\r\n";
682:
683: //add proxy auth headers
684: if (!empty($this->proxy_user))
685: $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass) . "\r\n";
686:
687:
688: $headers .= "\r\n";
689:
690: // set the read timeout if needed
691: if ($this->read_timeout > 0)
692: socket_set_timeout($fp, $this->read_timeout);
693: $this->timed_out = false;
694:
695: fwrite($fp, $headers . $body, strlen($headers . $body));
696:
697: $this->_redirectaddr = false;
698: unset($this->headers);
699:
700: // content was returned gzip encoded?
701: $is_gzipped = false;
702:
703: while ($currentHeader = fgets($fp, $this->_maxlinelen)) {
704: if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
705: $this->status = -100;
706: return false;
707: }
708:
709: if ($currentHeader == "\r\n")
710: break;
711:
712: // if a header begins with Location: or URI:, set the redirect
713: if (preg_match("/^(Location:|URI:)/i", $currentHeader)) {
714: // get URL portion of the redirect
715: preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches);
716: // look for :// in the Location header to see if hostname is included
717: if (!preg_match("|\:\/\/|", $matches[2])) {
718: // no host in the path, so prepend
719: $this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port;
720: // eliminate double slash
721: if (!preg_match("|^/|", $matches[2]))
722: $this->_redirectaddr .= "/" . $matches[2];
723: else
724: $this->_redirectaddr .= $matches[2];
725: } else
726: $this->_redirectaddr = $matches[2];
727: }
728:
729: if (preg_match("|^HTTP/|", $currentHeader)) {
730: if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) {
731: $this->status = $status[1];
732: }
733: $this->response_code = $currentHeader;
734: }
735:
736: if (preg_match("/Content-Encoding: gzip/", $currentHeader)) {
737: $is_gzipped = true;
738: }
739:
740: $this->headers[] = $currentHeader;
741: }
742:
743: $results = '';
744: do {
745: $_data = fread($fp, $this->maxlength);
746: if (strlen($_data) == 0) {
747: break;
748: }
749: $results .= $_data;
750: } while (true);
751:
752: // gunzip
753: if ($is_gzipped) {
754: // per http://www.php.net/manual/en/function.gzencode.php
755: $results = substr($results, 10);
756: $results = gzinflate($results);
757: }
758:
759: if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
760: $this->status = -100;
761: return false;
762: }
763:
764: // check if there is a a redirect meta tag
765:
766: if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) {
767: $this->_redirectaddr = $this->_expandlinks($match[1], $URI);
768: }
769:
770: // have we hit our frame depth and is there frame src to fetch?
771: if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) {
772: $this->results[] = $results;
773: for ($x = 0; $x < count($match[1]); $x++)
774: $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host);
775: } // have we already fetched framed content?
776: elseif (is_array($this->results))
777: $this->results[] = $results;
778: // no framed content
779: else
780: $this->results = $results;
781:
782: return $this;
783: }
784:
785: /*======================================================================*\
786: Function: setcookies()
787: Purpose: set cookies for a redirection
788: \*======================================================================*/
789:
790: function setcookies()
791: {
792: for ($x = 0; $x < count($this->headers); $x++) {
793: if (preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match))
794: $this->cookies[$match[1]] = urldecode($match[2]);
795: }
796: return $this;
797: }
798:
799:
800: /*======================================================================*\
801: Function: _check_timeout
802: Purpose: checks whether timeout has occurred
803: Input: $fp file pointer
804: \*======================================================================*/
805:
806: function _check_timeout($fp)
807: {
808: if ($this->read_timeout > 0) {
809: $fp_status = socket_get_status($fp);
810: if ($fp_status["timed_out"]) {
811: $this->timed_out = true;
812: return true;
813: }
814: }
815: return false;
816: }
817:
818: /*======================================================================*\
819: Function: _connect
820: Purpose: make a socket connection
821: Input: $fp file pointer
822: \*======================================================================*/
823:
824: function _connect(&$fp)
825: {
826: if (!empty($this->proxy_host) && !empty($this->proxy_port)) {
827: $this->_isproxy = true;
828:
829: $host = $this->proxy_host;
830: $port = $this->proxy_port;
831:
832: if ($this->scheme == 'https') {
833: trigger_error("HTTPS connections over proxy are currently not supported", E_USER_ERROR);
834: exit;
835: }
836: } else {
837: $host = $this->host;
838: $port = $this->port;
839: }
840:
841: $this->status = 0;
842:
843: $context_opts = array();
844:
845: if ($this->scheme == 'https') {
846: // if cafile or capath is specified, enable certificate
847: // verification (including name checks)
848: if (isset($this->cafile) || isset($this->capath)) {
849: $context_opts['ssl'] = array(
850: 'verify_peer' => true,
851: 'CN_match' => $this->host,
852: 'disable_compression' => true,
853: );
854:
855: if (isset($this->cafile))
856: $context_opts['ssl']['cafile'] = $this->cafile;
857: if (isset($this->capath))
858: $context_opts['ssl']['capath'] = $this->capath;
859: }
860:
861: $host = 'ssl://' . $host;
862: }
863:
864: $context = stream_context_create($context_opts);
865:
866: if (version_compare(PHP_VERSION, '5.0.0', '>')) {
867: if($this->scheme == 'http')
868: $host = "tcp://" . $host;
869: $fp = stream_socket_client(
870: "$host:$port",
871: $errno,
872: $errmsg,
873: $this->_fp_timeout,
874: STREAM_CLIENT_CONNECT,
875: $context);
876: } else {
877: $fp = fsockopen(
878: $host,
879: $port,
880: $errno,
881: $errstr,
882: $this->_fp_timeout,
883: $context);
884: }
885:
886: if ($fp) {
887: // socket connection succeeded
888: return true;
889: } else {
890: // socket connection failed
891: $this->status = $errno;
892: switch ($errno) {
893: case -3:
894: $this->error = "socket creation failed (-3)";
895: case -4:
896: $this->error = "dns lookup failure (-4)";
897: case -5:
898: $this->error = "connection refused or timed out (-5)";
899: default:
900: $this->error = "connection failed (" . $errno . ")";
901: }
902: return false;
903: }
904: }
905:
906: /*======================================================================*\
907: Function: _disconnect
908: Purpose: disconnect a socket connection
909: Input: $fp file pointer
910: \*======================================================================*/
911:
912: function _disconnect($fp)
913: {
914: return (fclose($fp));
915: }
916:
917:
918: /*======================================================================*\
919: Function: _prepare_post_body
920: Purpose: Prepare post body according to encoding type
921: Input: $formvars - form variables
922: $formfiles - form upload files
923: Output: post body
924: \*======================================================================*/
925:
926: function _prepare_post_body($formvars, $formfiles)
927: {
928: settype($formvars, "array");
929: settype($formfiles, "array");
930: $postdata = '';
931:
932: if (count($formvars) == 0 && count($formfiles) == 0)
933: return;
934:
935: switch ($this->_submit_type) {
936: case "application/x-www-form-urlencoded":
937: reset($formvars);
938: while (list($key, $val) = each($formvars)) {
939: if (is_array($val) || is_object($val)) {
940: while (list($cur_key, $cur_val) = each($val)) {
941: $postdata .= urlencode($key) . "[]=" . urlencode($cur_val) . "&";
942: }
943: } else
944: $postdata .= urlencode($key) . "=" . urlencode($val) . "&";
945: }
946: break;
947:
948: case "multipart/form-data":
949: $this->_mime_boundary = "Snoopy" . md5(uniqid(microtime()));
950:
951: reset($formvars);
952: while (list($key, $val) = each($formvars)) {
953: if (is_array($val) || is_object($val)) {
954: while (list($cur_key, $cur_val) = each($val)) {
955: $postdata .= "--" . $this->_mime_boundary . "\r\n";
956: $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
957: $postdata .= "$cur_val\r\n";
958: }
959: } else {
960: $postdata .= "--" . $this->_mime_boundary . "\r\n";
961: $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
962: $postdata .= "$val\r\n";
963: }
964: }
965:
966: reset($formfiles);
967: while (list($field_name, $file_names) = each($formfiles)) {
968: settype($file_names, "array");
969: while (list(, $file_name) = each($file_names)) {
970: if (!is_readable($file_name)) continue;
971:
972: $fp = fopen($file_name, "r");
973: $file_content = fread($fp, filesize($file_name));
974: fclose($fp);
975: $base_name = basename($file_name);
976:
977: $postdata .= "--" . $this->_mime_boundary . "\r\n";
978: $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
979: $postdata .= "$file_content\r\n";
980: }
981: }
982: $postdata .= "--" . $this->_mime_boundary . "--\r\n";
983: break;
984: }
985:
986: return $postdata;
987: }
988:
989: /*======================================================================*\
990: Function: getResults
991: Purpose: return the results of a request
992: Output: string results
993: \*======================================================================*/
994:
995: function getResults()
996: {
997: return $this->results;
998: }
999: }
1000:
1001: ?>
1002: