XOOPS  2.6.0
snoopy.php
Go to the documentation of this file.
1 <?php
2 
3 /*************************************************
4  *
5  * Snoopy - the PHP net client
6  * Author: Monte Ohrt <monte@ohrt.com>
7  * Copyright (c): 1999-2014, all rights reserved
8  * Version: 2.0.0
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  *
23  * You may contact the author of Snoopy by e-mail at:
24  * monte@ohrt.com
25  *
26  * The latest version of Snoopy can be obtained from:
27  * http://snoopy.sourceforge.net/
28  *************************************************/
29 class Snoopy
30 {
31  /**** Public variables ****/
32 
33  /* user definable vars */
34 
35  var $scheme = 'http'; // http or https
36  var $host = "www.php.net"; // host name we are connecting to
37  var $port = 80; // port we are connecting to
38  var $proxy_host = ""; // proxy host to use
39  var $proxy_port = ""; // proxy port to use
40  var $proxy_user = ""; // proxy user to use
41  var $proxy_pass = ""; // proxy password to use
42 
43  var $agent = "Snoopy v2.0.0"; // agent we masquerade as
44  var $referer = ""; // referer info to pass
45  var $cookies = array(); // array of cookies to pass
46  // $cookies["username"]="joe";
47  var $rawheaders = array(); // array of raw headers to send
48  // $rawheaders["Content-type"]="text/html";
49 
50  var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
51  var $lastredirectaddr = ""; // contains address of last redirected address
52  var $offsiteok = true; // allows redirection off-site
53  var $maxframes = 0; // frame content depth maximum. 0 = disallow
54  var $expandlinks = true; // expand links to fully qualified URLs.
55  // this only applies to fetchlinks()
56  // submitlinks(), and submittext()
57  var $passcookies = true; // pass set cookies back through redirects
58  // NOTE: this currently does not respect
59  // dates, domains or paths.
60 
61  var $user = ""; // user for http authentication
62  var $pass = ""; // password for http authentication
63 
64  // http accept types
65  var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
66 
67  var $results = ""; // where the content is put
68 
69  var $error = ""; // error messages sent here
70  var $response_code = ""; // response code returned from server
71  var $headers = array(); // headers returned from server sent here
72  var $maxlength = 500000; // max return data length (body)
73  var $read_timeout = 0; // timeout on read operations, in seconds
74  // supported only since PHP 4 Beta 4
75  // set to 0 to disallow timeouts
76  var $timed_out = false; // if a read operation timed out
77  var $status = 0; // http request status
78 
79  var $temp_dir = "/tmp"; // temporary directory that the webserver
80  // has permission to write to.
81  // under Windows, this should be C:\temp
82 
83  var $curl_path = false;
84  // deprecated, snoopy no longer uses curl for https requests,
85  // but instead requires the openssl extension.
86 
87  // send Accept-encoding: gzip?
88  var $use_gzip = true;
89 
90  // file or directory with CA certificates to verify remote host with
91  var $cafile;
92  var $capath;
93 
94  /**** Private variables ****/
95 
96  var $_maxlinelen = 4096; // max line length (headers)
97 
98  var $_httpmethod = "GET"; // default http request method
99  var $_httpversion = "HTTP/1.0"; // default http request version
100  var $_submit_method = "POST"; // default submit method
101  var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
102  var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
103  var $_redirectaddr = false; // will be set if page fetched is a redirect
104  var $_redirectdepth = 0; // increments on an http redirect
105  var $_frameurls = array(); // frame src urls
106  var $_framedepth = 0; // increments on frame depth
107 
108  var $_isproxy = false; // set if using a proxy server
109  var $_fp_timeout = 30; // timeout for socket connection
110 
111  /*======================================================================*\
112  Function: fetch
113  Purpose: fetch the contents of a web page
114  (and possibly other protocols in the
115  future like ftp, nntp, gopher, etc.)
116  Input: $URI the location of the page to fetch
117  Output: $this->results the output text from the fetch
118  \*======================================================================*/
119 
120  function fetch($URI)
121  {
122 
123  $URI_PARTS = parse_url($URI);
124  if (!empty($URI_PARTS["user"]))
125  $this->user = $URI_PARTS["user"];
126  if (!empty($URI_PARTS["pass"]))
127  $this->pass = $URI_PARTS["pass"];
128  if (empty($URI_PARTS["query"]))
129  $URI_PARTS["query"] = '';
130  if (empty($URI_PARTS["path"]))
131  $URI_PARTS["path"] = '';
132 
133  $fp = null;
134 
135  switch (strtolower($URI_PARTS["scheme"])) {
136  case "https":
137  if (!extension_loaded('openssl')) {
138  trigger_error("openssl extension required for HTTPS", E_USER_ERROR);
139  exit;
140  }
141  $this->port = 443;
142  case "http":
143  $this->scheme = strtolower($URI_PARTS["scheme"]);
144  $this->host = $URI_PARTS["host"];
145  if (!empty($URI_PARTS["port"]))
146  $this->port = $URI_PARTS["port"];
147  if ($this->_connect($fp)) {
148  if ($this->_isproxy) {
149  // using proxy, send entire URI
150  $this->_httprequest($URI, $fp, $URI, $this->_httpmethod);
151  } else {
152  $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
153  // no proxy, send only the path
154  $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155  }
156 
157  $this->_disconnect($fp);
158 
159  if ($this->_redirectaddr) {
160  /* url was redirected, check if we've hit the max depth */
161  if ($this->maxredirs > $this->_redirectdepth) {
162  // only follow redirect if it's on this site, or offsiteok is true
163  if (preg_match("|^https?://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
164  /* follow the redirect */
165  $this->_redirectdepth++;
166  $this->lastredirectaddr = $this->_redirectaddr;
167  $this->fetch($this->_redirectaddr);
168  }
169  }
170  }
171 
172  if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
173  $frameurls = $this->_frameurls;
174  $this->_frameurls = array();
175 
176  while (list(, $frameurl) = each($frameurls)) {
177  if ($this->_framedepth < $this->maxframes) {
178  $this->fetch($frameurl);
179  $this->_framedepth++;
180  } else
181  break;
182  }
183  }
184  } else {
185  return false;
186  }
187  return $this;
188  break;
189  default:
190  // not a valid protocol
191  $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
192  return false;
193  break;
194  }
195  return $this;
196  }
197 
198  /*======================================================================*\
199  Function: submit
200  Purpose: submit an http(s) form
201  Input: $URI the location to post the data
202  $formvars the formvars to use.
203  format: $formvars["var"] = "val";
204  $formfiles an array of files to submit
205  format: $formfiles["var"] = "/dir/filename.ext";
206  Output: $this->results the text output from the post
207  \*======================================================================*/
208 
209  function submit($URI, $formvars = "", $formfiles = "")
210  {
211  unset($postdata);
212 
213  $postdata = $this->_prepare_post_body($formvars, $formfiles);
214 
215  $URI_PARTS = parse_url($URI);
216  if (!empty($URI_PARTS["user"]))
217  $this->user = $URI_PARTS["user"];
218  if (!empty($URI_PARTS["pass"]))
219  $this->pass = $URI_PARTS["pass"];
220  if (empty($URI_PARTS["query"]))
221  $URI_PARTS["query"] = '';
222  if (empty($URI_PARTS["path"]))
223  $URI_PARTS["path"] = '';
224 
225  switch (strtolower($URI_PARTS["scheme"])) {
226  case "https":
227  if (!extension_loaded('openssl')) {
228  trigger_error("openssl extension required for HTTPS", E_USER_ERROR);
229  exit;
230  }
231  $this->port = 443;
232  case "http":
233  $this->scheme = strtolower($URI_PARTS["scheme"]);
234  $this->host = $URI_PARTS["host"];
235  if (!empty($URI_PARTS["port"]))
236  $this->port = $URI_PARTS["port"];
237  if ($this->_connect($fp)) {
238  if ($this->_isproxy) {
239  // using proxy, send entire URI
240  $this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
241  } else {
242  $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
243  // no proxy, send only the path
244  $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
245  }
246 
247  $this->_disconnect($fp);
248 
249  if ($this->_redirectaddr) {
250  /* url was redirected, check if we've hit the max depth */
251  if ($this->maxredirs > $this->_redirectdepth) {
252  if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr))
253  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]);
254 
255  // only follow redirect if it's on this site, or offsiteok is true
256  if (preg_match("|^https?://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
257  /* follow the redirect */
258  $this->_redirectdepth++;
259  $this->lastredirectaddr = $this->_redirectaddr;
260  if (strpos($this->_redirectaddr, "?") > 0)
261  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
262  else
263  $this->submit($this->_redirectaddr, $formvars, $formfiles);
264  }
265  }
266  }
267 
268  if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
269  $frameurls = $this->_frameurls;
270  $this->_frameurls = array();
271 
272  while (list(, $frameurl) = each($frameurls)) {
273  if ($this->_framedepth < $this->maxframes) {
274  $this->fetch($frameurl);
275  $this->_framedepth++;
276  } else
277  break;
278  }
279  }
280 
281  } else {
282  return false;
283  }
284  return $this;
285  break;
286  default:
287  // not a valid protocol
288  $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
289  return false;
290  break;
291  }
292  return $this;
293  }
294 
295  /*======================================================================*\
296  Function: fetchlinks
297  Purpose: fetch the links from a web page
298  Input: $URI where you are fetching from
299  Output: $this->results an array of the URLs
300  \*======================================================================*/
301 
302  function fetchlinks($URI)
303  {
304  if ($this->fetch($URI) !== false) {
305  if ($this->lastredirectaddr)
307  if (is_array($this->results)) {
308  for ($x = 0; $x < count($this->results); $x++)
309  $this->results[$x] = $this->_striplinks($this->results[$x]);
310  } else
311  $this->results = $this->_striplinks($this->results);
312 
313  if ($this->expandlinks)
314  $this->results = $this->_expandlinks($this->results, $URI);
315  return $this;
316  } else
317  return false;
318  }
319 
320  /*======================================================================*\
321  Function: fetchform
322  Purpose: fetch the form elements from a web page
323  Input: $URI where you are fetching from
324  Output: $this->results the resulting html form
325  \*======================================================================*/
326 
327  function fetchform($URI)
328  {
329 
330  if ($this->fetch($URI) !== false) {
331 
332  if (is_array($this->results)) {
333  for ($x = 0; $x < count($this->results); $x++)
334  $this->results[$x] = $this->_stripform($this->results[$x]);
335  } else
336  $this->results = $this->_stripform($this->results);
337 
338  return $this;
339  } else
340  return false;
341  }
342 
343 
344  /*======================================================================*\
345  Function: fetchtext
346  Purpose: fetch the text from a web page, stripping the links
347  Input: $URI where you are fetching from
348  Output: $this->results the text from the web page
349  \*======================================================================*/
350 
351  function fetchtext($URI)
352  {
353  if ($this->fetch($URI) !== false) {
354  if (is_array($this->results)) {
355  for ($x = 0; $x < count($this->results); $x++)
356  $this->results[$x] = $this->_striptext($this->results[$x]);
357  } else
358  $this->results = $this->_striptext($this->results);
359  return $this;
360  } else
361  return false;
362  }
363 
364  /*======================================================================*\
365  Function: submitlinks
366  Purpose: grab links from a form submission
367  Input: $URI where you are submitting from
368  Output: $this->results an array of the links from the post
369  \*======================================================================*/
370 
371  function submitlinks($URI, $formvars = "", $formfiles = "")
372  {
373  if ($this->submit($URI, $formvars, $formfiles) !== false) {
374  if ($this->lastredirectaddr)
376  if (is_array($this->results)) {
377  for ($x = 0; $x < count($this->results); $x++) {
378  $this->results[$x] = $this->_striplinks($this->results[$x]);
379  if ($this->expandlinks)
380  $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
381  }
382  } else {
383  $this->results = $this->_striplinks($this->results);
384  if ($this->expandlinks)
385  $this->results = $this->_expandlinks($this->results, $URI);
386  }
387  return $this;
388  } else
389  return false;
390  }
391 
392  /*======================================================================*\
393  Function: submittext
394  Purpose: grab text from a form submission
395  Input: $URI where you are submitting from
396  Output: $this->results the text from the web page
397  \*======================================================================*/
398 
399  function submittext($URI, $formvars = "", $formfiles = "")
400  {
401  if ($this->submit($URI, $formvars, $formfiles) !== false) {
402  if ($this->lastredirectaddr)
404  if (is_array($this->results)) {
405  for ($x = 0; $x < count($this->results); $x++) {
406  $this->results[$x] = $this->_striptext($this->results[$x]);
407  if ($this->expandlinks)
408  $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
409  }
410  } else {
411  $this->results = $this->_striptext($this->results);
412  if ($this->expandlinks)
413  $this->results = $this->_expandlinks($this->results, $URI);
414  }
415  return $this;
416  } else
417  return false;
418  }
419 
420 
421  /*======================================================================*\
422  Function: set_submit_multipart
423  Purpose: Set the form submission content type to
424  multipart/form-data
425  \*======================================================================*/
427  {
428  $this->_submit_type = "multipart/form-data";
429  return $this;
430  }
431 
432 
433  /*======================================================================*\
434  Function: set_submit_normal
435  Purpose: Set the form submission content type to
436  application/x-www-form-urlencoded
437  \*======================================================================*/
438  function set_submit_normal()
439  {
440  $this->_submit_type = "application/x-www-form-urlencoded";
441  return $this;
442  }
443 
444 
445 
446 
447  /*======================================================================*\
448  Private functions
449  \*======================================================================*/
450 
451 
452  /*======================================================================*\
453  Function: _striplinks
454  Purpose: strip the hyperlinks from an html document
455  Input: $document document to strip.
456  Output: $match an array of the links
457  \*======================================================================*/
458 
459  function _striplinks($document)
460  {
461  preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
462  ([\"\'])? # find single or double quote
463  (?(1) (.*?)\\1 | ([^\s>]+)) # if quote found, match up to next matching
464  # quote, otherwise match up to next space
465  'isx", $document, $links);
466 
467 
468  // catenate the non-empty matches from the conditional subpattern
469 
470  while (list($key, $val) = each($links[2])) {
471  if (!empty($val))
472  $match[] = $val;
473  }
474 
475  while (list($key, $val) = each($links[3])) {
476  if (!empty($val))
477  $match[] = $val;
478  }
479 
480  // return the links
481  return $match;
482  }
483 
484  /*======================================================================*\
485  Function: _stripform
486  Purpose: strip the form elements from an html document
487  Input: $document document to strip.
488  Output: $match an array of the links
489  \*======================================================================*/
490 
491  function _stripform($document)
492  {
493  preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements);
494 
495  // catenate the matches
496  $match = implode("\r\n", $elements[0]);
497 
498  // return the links
499  return $match;
500  }
501 
502 
503  /*======================================================================*\
504  Function: _striptext
505  Purpose: strip the text from an html document
506  Input: $document document to strip.
507  Output: $text the resulting text
508  \*======================================================================*/
509 
510  function _striptext($document)
511  {
512 
513  // I didn't use preg eval (//e) since that is only available in PHP 4.0.
514  // so, list your entities one by one here. I included some of the
515  // more common ones.
516 
517  $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
518  "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
519  "'([\r\n])[\s]+'", // strip out white space
520  "'&(quot|#34|#034|#x22);'i", // replace html entities
521  "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
522  "'&(lt|#60|#060|#x3c);'i",
523  "'&(gt|#62|#062|#x3e);'i",
524  "'&(nbsp|#160|#xa0);'i",
525  "'&(iexcl|#161);'i",
526  "'&(cent|#162);'i",
527  "'&(pound|#163);'i",
528  "'&(copy|#169);'i",
529  "'&(reg|#174);'i",
530  "'&(deg|#176);'i",
531  "'&(#39|#039|#x27);'",
532  "'&(euro|#8364);'i", // europe
533  "'&a(uml|UML);'", // german
534  "'&o(uml|UML);'",
535  "'&u(uml|UML);'",
536  "'&A(uml|UML);'",
537  "'&O(uml|UML);'",
538  "'&U(uml|UML);'",
539  "'&szlig;'i",
540  );
541  $replace = array("",
542  "",
543  "\\1",
544  "\"",
545  "&",
546  "<",
547  ">",
548  " ",
549  chr(161),
550  chr(162),
551  chr(163),
552  chr(169),
553  chr(174),
554  chr(176),
555  chr(39),
556  chr(128),
557  "ä",
558  "ö",
559  "ü",
560  "Ä",
561  "Ö",
562  "Ãœ",
563  "ß",
564  );
565 
566  $text = preg_replace($search, $replace, $document);
567 
568  return $text;
569  }
570 
571  /*======================================================================*\
572  Function: _expandlinks
573  Purpose: expand each link into a fully qualified URL
574  Input: $links the links to qualify
575  $URI the full URI to get the base from
576  Output: $expandedLinks the expanded links
577  \*======================================================================*/
578 
579  function _expandlinks($links, $URI)
580  {
581 
582  preg_match("/^[^\?]+/", $URI, $match);
583 
584  $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]);
585  $match = preg_replace("|/$|", "", $match);
586  $match_part = parse_url($match);
587  $match_root =
588  $match_part["scheme"] . "://" . $match_part["host"];
589 
590  $search = array("|^http://" . preg_quote($this->host) . "|i",
591  "|^(\/)|i",
592  "|^(?!http://)(?!mailto:)|i",
593  "|/\./|",
594  "|/[^\/]+/\.\./|"
595  );
596 
597  $replace = array("",
598  $match_root . "/",
599  $match . "/",
600  "/",
601  "/"
602  );
603 
604  $expandedLinks = preg_replace($search, $replace, $links);
605 
606  return $expandedLinks;
607  }
608 
609  /*======================================================================*\
610  Function: _httprequest
611  Purpose: go get the http(s) data from the server
612  Input: $url the url to fetch
613  $fp the current open file pointer
614  $URI the full URI
615  $body body contents to send if any (POST)
616  Output:
617  \*======================================================================*/
618 
619  function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "")
620  {
621  $cookie_headers = '';
622  if ($this->passcookies && $this->_redirectaddr)
623  $this->setcookies();
624 
625  $URI_PARTS = parse_url($URI);
626  if (empty($url))
627  $url = "/";
628  $headers = $http_method . " " . $url . " " . $this->_httpversion . "\r\n";
629  if (!empty($this->host) && !isset($this->rawheaders['Host'])) {
630  $headers .= "Host: " . $this->host;
631  if (!empty($this->port) && $this->port != '80')
632  $headers .= ":" . $this->port;
633  $headers .= "\r\n";
634  }
635  if (!empty($this->agent))
636  $headers .= "User-Agent: " . $this->agent . "\r\n";
637  if (!empty($this->accept))
638  $headers .= "Accept: " . $this->accept . "\r\n";
639  if ($this->use_gzip) {
640  // make sure PHP was built with --with-zlib
641  // and we can handle gzipp'ed data
642  if (function_exists('gzinflate')) {
643  $headers .= "Accept-encoding: gzip\r\n";
644  } else {
645  trigger_error(
646  "use_gzip is on, but PHP was built without zlib support." .
647  " Requesting file(s) without gzip encoding.",
648  E_USER_NOTICE);
649  }
650  }
651  if (!empty($this->referer))
652  $headers .= "Referer: " . $this->referer . "\r\n";
653  if (!empty($this->cookies)) {
654  if (!is_array($this->cookies))
655  $this->cookies = (array)$this->cookies;
656 
657  reset($this->cookies);
658  if (count($this->cookies) > 0) {
659  $cookie_headers .= 'Cookie: ';
660  foreach ($this->cookies as $cookieKey => $cookieVal) {
661  $cookie_headers .= $cookieKey . "=" . urlencode($cookieVal) . "; ";
662  }
663  $headers .= substr($cookie_headers, 0, -2) . "\r\n";
664  }
665  }
666  if (!empty($this->rawheaders)) {
667  if (!is_array($this->rawheaders))
668  $this->rawheaders = (array)$this->rawheaders;
669  while (list($headerKey, $headerVal) = each($this->rawheaders))
670  $headers .= $headerKey . ": " . $headerVal . "\r\n";
671  }
672  if (!empty($content_type)) {
673  $headers .= "Content-type: $content_type";
674  if ($content_type == "multipart/form-data")
675  $headers .= "; boundary=" . $this->_mime_boundary;
676  $headers .= "\r\n";
677  }
678  if (!empty($body))
679  $headers .= "Content-length: " . strlen($body) . "\r\n";
680  if (!empty($this->user) || !empty($this->pass))
681  $headers .= "Authorization: Basic " . base64_encode($this->user . ":" . $this->pass) . "\r\n";
682 
683  //add proxy auth headers
684  if (!empty($this->proxy_user))
685  $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass) . "\r\n";
686 
687 
688  $headers .= "\r\n";
689 
690  // set the read timeout if needed
691  if ($this->read_timeout > 0)
692  socket_set_timeout($fp, $this->read_timeout);
693  $this->timed_out = false;
694 
695  fwrite($fp, $headers . $body, strlen($headers . $body));
696 
697  $this->_redirectaddr = false;
698  unset($this->headers);
699 
700  // content was returned gzip encoded?
701  $is_gzipped = false;
702 
703  while ($currentHeader = fgets($fp, $this->_maxlinelen)) {
704  if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
705  $this->status = -100;
706  return false;
707  }
708 
709  if ($currentHeader == "\r\n")
710  break;
711 
712  // if a header begins with Location: or URI:, set the redirect
713  if (preg_match("/^(Location:|URI:)/i", $currentHeader)) {
714  // get URL portion of the redirect
715  preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches);
716  // look for :// in the Location header to see if hostname is included
717  if (!preg_match("|\:\/\/|", $matches[2])) {
718  // no host in the path, so prepend
719  $this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port;
720  // eliminate double slash
721  if (!preg_match("|^/|", $matches[2]))
722  $this->_redirectaddr .= "/" . $matches[2];
723  else
724  $this->_redirectaddr .= $matches[2];
725  } else
726  $this->_redirectaddr = $matches[2];
727  }
728 
729  if (preg_match("|^HTTP/|", $currentHeader)) {
730  if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) {
731  $this->status = $status[1];
732  }
733  $this->response_code = $currentHeader;
734  }
735 
736  if (preg_match("/Content-Encoding: gzip/", $currentHeader)) {
737  $is_gzipped = true;
738  }
739 
740  $this->headers[] = $currentHeader;
741  }
742 
743  $results = '';
744  do {
745  $_data = fread($fp, $this->maxlength);
746  if (strlen($_data) == 0) {
747  break;
748  }
749  $results .= $_data;
750  } while (true);
751 
752  // gunzip
753  if ($is_gzipped) {
754  // per http://www.php.net/manual/en/function.gzencode.php
755  $results = substr($results, 10);
756  $results = gzinflate($results);
757  }
758 
759  if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
760  $this->status = -100;
761  return false;
762  }
763 
764  // check if there is a a redirect meta tag
765 
766  if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) {
767  $this->_redirectaddr = $this->_expandlinks($match[1], $URI);
768  }
769 
770  // have we hit our frame depth and is there frame src to fetch?
771  if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\">]+)'i", $results, $match)) {
772  $this->results[] = $results;
773  for ($x = 0; $x < count($match[1]); $x++)
774  $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host);
775  } // have we already fetched framed content?
776  elseif (is_array($this->results))
777  $this->results[] = $results;
778  // no framed content
779  else
780  $this->results = $results;
781 
782  return $this;
783  }
784 
785  /*======================================================================*\
786  Function: setcookies()
787  Purpose: set cookies for a redirection
788  \*======================================================================*/
789 
790  function setcookies()
791  {
792  for ($x = 0; $x < count($this->headers); $x++) {
793  if (preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match))
794  $this->cookies[$match[1]] = urldecode($match[2]);
795  }
796  return $this;
797  }
798 
799 
800  /*======================================================================*\
801  Function: _check_timeout
802  Purpose: checks whether timeout has occurred
803  Input: $fp file pointer
804  \*======================================================================*/
805 
806  function _check_timeout($fp)
807  {
808  if ($this->read_timeout > 0) {
809  $fp_status = socket_get_status($fp);
810  if ($fp_status["timed_out"]) {
811  $this->timed_out = true;
812  return true;
813  }
814  }
815  return false;
816  }
817 
818  /*======================================================================*\
819  Function: _connect
820  Purpose: make a socket connection
821  Input: $fp file pointer
822  \*======================================================================*/
823 
824  function _connect(&$fp)
825  {
826  if (!empty($this->proxy_host) && !empty($this->proxy_port)) {
827  $this->_isproxy = true;
828 
829  $host = $this->proxy_host;
830  $port = $this->proxy_port;
831 
832  if ($this->scheme == 'https') {
833  trigger_error("HTTPS connections over proxy are currently not supported", E_USER_ERROR);
834  exit;
835  }
836  } else {
837  $host = $this->host;
838  $port = $this->port;
839  }
840 
841  $this->status = 0;
842 
843  $context_opts = array();
844 
845  if ($this->scheme == 'https') {
846  // if cafile or capath is specified, enable certificate
847  // verification (including name checks)
848  if (isset($this->cafile) || isset($this->capath)) {
849  $context_opts['ssl'] = array(
850  'verify_peer' => true,
851  'CN_match' => $this->host,
852  'disable_compression' => true,
853  );
854 
855  if (isset($this->cafile))
856  $context_opts['ssl']['cafile'] = $this->cafile;
857  if (isset($this->capath))
858  $context_opts['ssl']['capath'] = $this->capath;
859  }
860 
861  $host = 'ssl://' . $host;
862  }
863 
864  $context = stream_context_create($context_opts);
865 
866  if (version_compare(PHP_VERSION, '5.0.0', '>')) {
867  if($this->scheme == 'http')
868  $host = "tcp://" . $host;
869  $fp = stream_socket_client(
870  "$host:$port",
871  $errno,
872  $errmsg,
873  $this->_fp_timeout,
874  STREAM_CLIENT_CONNECT,
875  $context);
876  } else {
877  $fp = fsockopen(
878  $host,
879  $port,
880  $errno,
881  $errstr,
882  $this->_fp_timeout,
883  $context);
884  }
885 
886  if ($fp) {
887  // socket connection succeeded
888  return true;
889  } else {
890  // socket connection failed
891  $this->status = $errno;
892  switch ($errno) {
893  case -3:
894  $this->error = "socket creation failed (-3)";
895  case -4:
896  $this->error = "dns lookup failure (-4)";
897  case -5:
898  $this->error = "connection refused or timed out (-5)";
899  default:
900  $this->error = "connection failed (" . $errno . ")";
901  }
902  return false;
903  }
904  }
905 
906  /*======================================================================*\
907  Function: _disconnect
908  Purpose: disconnect a socket connection
909  Input: $fp file pointer
910  \*======================================================================*/
911 
912  function _disconnect($fp)
913  {
914  return (fclose($fp));
915  }
916 
917 
918  /*======================================================================*\
919  Function: _prepare_post_body
920  Purpose: Prepare post body according to encoding type
921  Input: $formvars - form variables
922  $formfiles - form upload files
923  Output: post body
924  \*======================================================================*/
925 
926  function _prepare_post_body($formvars, $formfiles)
927  {
928  settype($formvars, "array");
929  settype($formfiles, "array");
930  $postdata = '';
931 
932  if (count($formvars) == 0 && count($formfiles) == 0)
933  return;
934 
935  switch ($this->_submit_type) {
936  case "application/x-www-form-urlencoded":
937  reset($formvars);
938  while (list($key, $val) = each($formvars)) {
939  if (is_array($val) || is_object($val)) {
940  while (list($cur_key, $cur_val) = each($val)) {
941  $postdata .= urlencode($key) . "[]=" . urlencode($cur_val) . "&";
942  }
943  } else
944  $postdata .= urlencode($key) . "=" . urlencode($val) . "&";
945  }
946  break;
947 
948  case "multipart/form-data":
949  $this->_mime_boundary = "Snoopy" . md5(uniqid(microtime()));
950 
951  reset($formvars);
952  while (list($key, $val) = each($formvars)) {
953  if (is_array($val) || is_object($val)) {
954  while (list($cur_key, $cur_val) = each($val)) {
955  $postdata .= "--" . $this->_mime_boundary . "\r\n";
956  $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
957  $postdata .= "$cur_val\r\n";
958  }
959  } else {
960  $postdata .= "--" . $this->_mime_boundary . "\r\n";
961  $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
962  $postdata .= "$val\r\n";
963  }
964  }
965 
966  reset($formfiles);
967  while (list($field_name, $file_names) = each($formfiles)) {
968  settype($file_names, "array");
969  while (list(, $file_name) = each($file_names)) {
970  if (!is_readable($file_name)) continue;
971 
972  $fp = fopen($file_name, "r");
973  $file_content = fread($fp, filesize($file_name));
974  fclose($fp);
975  $base_name = basename($file_name);
976 
977  $postdata .= "--" . $this->_mime_boundary . "\r\n";
978  $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
979  $postdata .= "$file_content\r\n";
980  }
981  }
982  $postdata .= "--" . $this->_mime_boundary . "--\r\n";
983  break;
984  }
985 
986  return $postdata;
987  }
988 
989  /*======================================================================*\
990  Function: getResults
991  Purpose: return the results of a request
992  Output: string results
993  \*======================================================================*/
994 
995  function getResults()
996  {
997  return $this->results;
998  }
999 }
1000 
1001 ?>
_httprequest($url, $fp, $URI, $http_method, $content_type="", $body="")
Definition: snoopy.php:619
$status
Definition: snoopy.php:77
$passcookies
Definition: snoopy.php:57
$path
Definition: execute.php:31
$maxredirs
Definition: snoopy.php:50
$maxframes
Definition: snoopy.php:53
$maxlength
Definition: snoopy.php:72
$_mime_boundary
Definition: snoopy.php:102
$host
Definition: snoopy.php:36
fetchform($URI)
Definition: snoopy.php:327
_expandlinks($links, $URI)
Definition: snoopy.php:579
$_redirectaddr
Definition: snoopy.php:103
$error
Definition: snoopy.php:69
_striptext($document)
Definition: snoopy.php:510
$text
Definition: qrrender.php:27
$accept
Definition: snoopy.php:65
$referer
Definition: snoopy.php:44
$port
Definition: snoopy.php:37
$temp_dir
Definition: snoopy.php:79
_striplinks($document)
Definition: snoopy.php:459
$headers
Definition: snoopy.php:71
$capath
Definition: snoopy.php:92
$search
Definition: index.php:28
$timed_out
Definition: snoopy.php:76
getResults()
Definition: snoopy.php:995
exit
Definition: browse.php:104
setcookies()
Definition: snoopy.php:790
$proxy_host
Definition: snoopy.php:38
$proxy_port
Definition: snoopy.php:39
$_redirectdepth
Definition: snoopy.php:104
$proxy_pass
Definition: snoopy.php:41
$pass
Definition: snoopy.php:62
$rawheaders
Definition: snoopy.php:47
$proxy_user
Definition: snoopy.php:40
fetch($URI)
Definition: snoopy.php:120
$read_timeout
Definition: snoopy.php:73
set_submit_multipart()
Definition: snoopy.php:426
$_maxlinelen
Definition: snoopy.php:96
submitlinks($URI, $formvars="", $formfiles="")
Definition: snoopy.php:371
$lastredirectaddr
Definition: snoopy.php:51
_disconnect($fp)
Definition: snoopy.php:912
$scheme
Definition: snoopy.php:35
$_httpmethod
Definition: snoopy.php:98
submittext($URI, $formvars="", $formfiles="")
Definition: snoopy.php:399
$cookies
Definition: snoopy.php:45
fetchtext($URI)
Definition: snoopy.php:351
submit($URI, $formvars="", $formfiles="")
Definition: snoopy.php:209
$offsiteok
Definition: snoopy.php:52
$_submit_type
Definition: snoopy.php:101
$use_gzip
Definition: snoopy.php:88
$curl_path
Definition: snoopy.php:83
$cafile
Definition: snoopy.php:91
$user
Definition: snoopy.php:61
$_frameurls
Definition: snoopy.php:105
$url
Definition: register.php:72
$results
Definition: snoopy.php:67
$response_code
Definition: snoopy.php:70
fetchlinks($URI)
Definition: snoopy.php:302
$expandlinks
Definition: snoopy.php:54
$_fp_timeout
Definition: snoopy.php:109
$_framedepth
Definition: snoopy.php:106
$agent
Definition: snoopy.php:43
_check_timeout($fp)
Definition: snoopy.php:806
_connect(&$fp)
Definition: snoopy.php:824
_prepare_post_body($formvars, $formfiles)
Definition: snoopy.php:926
_stripform($document)
Definition: snoopy.php:491
$_submit_method
Definition: snoopy.php:100
$_httpversion
Definition: snoopy.php:99
$_isproxy
Definition: snoopy.php:108
set_submit_normal()
Definition: snoopy.php:438