XOOPS 2.5.6  Final
 All Classes Namespaces Files Functions Variables Pages
snoopy.php
Go to the documentation of this file.
1 <?php
2 
3 /*************************************************
4 
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2008 New Digital Group, all rights reserved
8 Version: 1.2.4
9 
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 
24 You may contact the author of Snoopy by e-mail at:
25 monte@ispi.net
26 
27 Or, write to:
28 Monte Ohrt
29 CTO, ispi
30 237 S. 70th suite 220
31 Lincoln, NE 68510
32 
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.net/
35 
36 *************************************************/
37 
38 class Snoopy
39 {
40  /**** Public variables ****/
41 
42  /* user definable vars */
43 
44  var $host = "www.php.net"; // host name we are connecting to
45  var $port = 80; // port we are connecting to
46  var $proxy_host = ""; // proxy host to use
47  var $proxy_port = ""; // proxy port to use
48  var $proxy_user = ""; // proxy user to use
49  var $proxy_pass = ""; // proxy password to use
50 
51  var $agent = "Snoopy v1.2.4"; // agent we masquerade as
52  var $referer = ""; // referer info to pass
53  var $cookies = array(); // array of cookies to pass
54  // $cookies["username"]="joe";
55  var $rawheaders = array(); // array of raw headers to send
56  // $rawheaders["Content-type"]="text/html";
57 
58  var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
59  var $lastredirectaddr = ""; // contains address of last redirected address
60  var $offsiteok = true; // allows redirection off-site
61  var $maxframes = 0; // frame content depth maximum. 0 = disallow
62  var $expandlinks = true; // expand links to fully qualified URLs.
63  // this only applies to fetchlinks()
64  // submitlinks(), and submittext()
65  var $passcookies = true; // pass set cookies back through redirects
66  // NOTE: this currently does not respect
67  // dates, domains or paths.
68 
69  var $user = ""; // user for http authentication
70  var $pass = ""; // password for http authentication
71 
72  // http accept types
73  var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
74 
75  var $results = ""; // where the content is put
76 
77  var $error = ""; // error messages sent here
78  var $response_code = ""; // response code returned from server
79  var $headers = array(); // headers returned from server sent here
80  var $maxlength = 500000; // max return data length (body)
81  var $read_timeout = 0; // timeout on read operations, in seconds
82  // supported only since PHP 4 Beta 4
83  // set to 0 to disallow timeouts
84  var $timed_out = false; // if a read operation timed out
85  var $status = 0; // http request status
86 
87  var $temp_dir = "/tmp"; // temporary directory that the webserver
88  // has permission to write to.
89  // under Windows, this should be C:\temp
90 
91  var $curl_path = "/usr/local/bin/curl";
92  // Snoopy will use cURL for fetching
93  // SSL content if a full system path to
94  // the cURL binary is supplied here.
95  // set to false if you do not have
96  // cURL installed. See http://curl.haxx.se
97  // for details on installing cURL.
98  // Snoopy does *not* use the cURL
99  // library functions built into php,
100  // as these functions are not stable
101  // as of this Snoopy release.
102 
103  /**** Private variables ****/
104 
105  var $_maxlinelen = 4096; // max line length (headers)
106 
107  var $_httpmethod = "GET"; // default http request method
108  var $_httpversion = "HTTP/1.0"; // default http request version
109  var $_submit_method = "POST"; // default submit method
110  var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
111  var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
112  var $_redirectaddr = false; // will be set if page fetched is a redirect
113  var $_redirectdepth = 0; // increments on an http redirect
114  var $_frameurls = array(); // frame src urls
115  var $_framedepth = 0; // increments on frame depth
116 
117  var $_isproxy = false; // set if using a proxy server
118  var $_fp_timeout = 30; // timeout for socket connection
119 
120 /*======================================================================*\
121  Function: fetch
122  Purpose: fetch the contents of a web page
123  (and possibly other protocols in the
124  future like ftp, nntp, gopher, etc.)
125  Input: $URI the location of the page to fetch
126  Output: $this->results the output text from the fetch
127 \*======================================================================*/
128 
129  function fetch($URI)
130  {
131 
132  //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
133  $URI_PARTS = parse_url($URI);
134  if (!empty($URI_PARTS["user"]))
135  $this->user = $URI_PARTS["user"];
136  if (!empty($URI_PARTS["pass"]))
137  $this->pass = $URI_PARTS["pass"];
138  if (empty($URI_PARTS["query"]))
139  $URI_PARTS["query"] = '';
140  if (empty($URI_PARTS["path"]))
141  $URI_PARTS["path"] = '';
142 
143  switch(strtolower($URI_PARTS["scheme"]))
144  {
145  case "http":
146  $this->host = $URI_PARTS["host"];
147  if(!empty($URI_PARTS["port"]))
148  $this->port = $URI_PARTS["port"];
149  if($this->_connect($fp))
150  {
151  if($this->_isproxy)
152  {
153  // using proxy, send entire URI
154  $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
155  }
156  else
157  {
158  $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
159  // no proxy, send only the path
160  $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
161  }
162 
163  $this->_disconnect($fp);
164 
165  if($this->_redirectaddr)
166  {
167  /* url was redirected, check if we've hit the max depth */
168  if($this->maxredirs > $this->_redirectdepth)
169  {
170  // only follow redirect if it's on this site, or offsiteok is true
171  if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
172  {
173  /* follow the redirect */
174  $this->_redirectdepth++;
175  $this->lastredirectaddr=$this->_redirectaddr;
176  $this->fetch($this->_redirectaddr);
177  }
178  }
179  }
180 
181  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
182  {
183  $frameurls = $this->_frameurls;
184  $this->_frameurls = array();
185 
186  while(list(,$frameurl) = each($frameurls))
187  {
188  if($this->_framedepth < $this->maxframes)
189  {
190  $this->fetch($frameurl);
191  $this->_framedepth++;
192  }
193  else
194  break;
195  }
196  }
197  }
198  else
199  {
200  return false;
201  }
202  return true;
203  break;
204  case "https":
205  if(!$this->curl_path)
206  return false;
207  if(function_exists("is_executable"))
208  if (!is_executable($this->curl_path))
209  return false;
210  $this->host = $URI_PARTS["host"];
211  if(!empty($URI_PARTS["port"]))
212  $this->port = $URI_PARTS["port"];
213  if($this->_isproxy)
214  {
215  // using proxy, send entire URI
216  $this->_httpsrequest($URI,$URI,$this->_httpmethod);
217  }
218  else
219  {
220  $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
221  // no proxy, send only the path
222  $this->_httpsrequest($path, $URI, $this->_httpmethod);
223  }
224 
225  if($this->_redirectaddr)
226  {
227  /* url was redirected, check if we've hit the max depth */
228  if($this->maxredirs > $this->_redirectdepth)
229  {
230  // only follow redirect if it's on this site, or offsiteok is true
231  if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
232  {
233  /* follow the redirect */
234  $this->_redirectdepth++;
235  $this->lastredirectaddr=$this->_redirectaddr;
236  $this->fetch($this->_redirectaddr);
237  }
238  }
239  }
240 
241  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
242  {
243  $frameurls = $this->_frameurls;
244  $this->_frameurls = array();
245 
246  while(list(,$frameurl) = each($frameurls))
247  {
248  if($this->_framedepth < $this->maxframes)
249  {
250  $this->fetch($frameurl);
251  $this->_framedepth++;
252  }
253  else
254  break;
255  }
256  }
257  return true;
258  break;
259  default:
260  // not a valid protocol
261  $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
262  return false;
263  break;
264  }
265  return true;
266  }
267 
268 /*======================================================================*\
269  Function: submit
270  Purpose: submit an http form
271  Input: $URI the location to post the data
272  $formvars the formvars to use.
273  format: $formvars["var"] = "val";
274  $formfiles an array of files to submit
275  format: $formfiles["var"] = "/dir/filename.ext";
276  Output: $this->results the text output from the post
277 \*======================================================================*/
278 
279  function submit($URI, $formvars="", $formfiles="")
280  {
281  unset($postdata);
282 
283  $postdata = $this->_prepare_post_body($formvars, $formfiles);
284 
285  $URI_PARTS = parse_url($URI);
286  if (!empty($URI_PARTS["user"]))
287  $this->user = $URI_PARTS["user"];
288  if (!empty($URI_PARTS["pass"]))
289  $this->pass = $URI_PARTS["pass"];
290  if (empty($URI_PARTS["query"]))
291  $URI_PARTS["query"] = '';
292  if (empty($URI_PARTS["path"]))
293  $URI_PARTS["path"] = '';
294 
295  switch(strtolower($URI_PARTS["scheme"]))
296  {
297  case "http":
298  $this->host = $URI_PARTS["host"];
299  if(!empty($URI_PARTS["port"]))
300  $this->port = $URI_PARTS["port"];
301  if($this->_connect($fp))
302  {
303  if($this->_isproxy)
304  {
305  // using proxy, send entire URI
306  $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
307  }
308  else
309  {
310  $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
311  // no proxy, send only the path
312  $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
313  }
314 
315  $this->_disconnect($fp);
316 
317  if($this->_redirectaddr)
318  {
319  /* url was redirected, check if we've hit the max depth */
320  if($this->maxredirs > $this->_redirectdepth)
321  {
322  if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
323  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
324 
325  // only follow redirect if it's on this site, or offsiteok is true
326  if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
327  {
328  /* follow the redirect */
329  $this->_redirectdepth++;
330  $this->lastredirectaddr=$this->_redirectaddr;
331  if( strpos( $this->_redirectaddr, "?" ) > 0 )
332  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
333  else
334  $this->submit($this->_redirectaddr,$formvars, $formfiles);
335  }
336  }
337  }
338 
339  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
340  {
341  $frameurls = $this->_frameurls;
342  $this->_frameurls = array();
343 
344  while(list(,$frameurl) = each($frameurls))
345  {
346  if($this->_framedepth < $this->maxframes)
347  {
348  $this->fetch($frameurl);
349  $this->_framedepth++;
350  }
351  else
352  break;
353  }
354  }
355 
356  }
357  else
358  {
359  return false;
360  }
361  return true;
362  break;
363  case "https":
364  if(!$this->curl_path)
365  return false;
366  if(function_exists("is_executable"))
367  if (!is_executable($this->curl_path))
368  return false;
369  $this->host = $URI_PARTS["host"];
370  if(!empty($URI_PARTS["port"]))
371  $this->port = $URI_PARTS["port"];
372  if($this->_isproxy)
373  {
374  // using proxy, send entire URI
375  $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376  }
377  else
378  {
379  $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
380  // no proxy, send only the path
381  $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
382  }
383 
384  if($this->_redirectaddr)
385  {
386  /* url was redirected, check if we've hit the max depth */
387  if($this->maxredirs > $this->_redirectdepth)
388  {
389  if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
390  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
391 
392  // only follow redirect if it's on this site, or offsiteok is true
393  if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
394  {
395  /* follow the redirect */
396  $this->_redirectdepth++;
397  $this->lastredirectaddr=$this->_redirectaddr;
398  if( strpos( $this->_redirectaddr, "?" ) > 0 )
399  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
400  else
401  $this->submit($this->_redirectaddr,$formvars, $formfiles);
402  }
403  }
404  }
405 
406  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
407  {
408  $frameurls = $this->_frameurls;
409  $this->_frameurls = array();
410 
411  while(list(,$frameurl) = each($frameurls))
412  {
413  if($this->_framedepth < $this->maxframes)
414  {
415  $this->fetch($frameurl);
416  $this->_framedepth++;
417  }
418  else
419  break;
420  }
421  }
422  return true;
423  break;
424 
425  default:
426  // not a valid protocol
427  $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
428  return false;
429  break;
430  }
431  return true;
432  }
433 
434 /*======================================================================*\
435  Function: fetchlinks
436  Purpose: fetch the links from a web page
437  Input: $URI where you are fetching from
438  Output: $this->results an array of the URLs
439 \*======================================================================*/
440 
441  function fetchlinks($URI)
442  {
443  if ($this->fetch($URI))
444  {
445  if($this->lastredirectaddr)
447  if(is_array($this->results))
448  {
449  for($x=0;$x<count($this->results);$x++)
450  $this->results[$x] = $this->_striplinks($this->results[$x]);
451  }
452  else
453  $this->results = $this->_striplinks($this->results);
454 
455  if($this->expandlinks)
456  $this->results = $this->_expandlinks($this->results, $URI);
457  return true;
458  }
459  else
460  return false;
461  }
462 
463 /*======================================================================*\
464  Function: fetchform
465  Purpose: fetch the form elements from a web page
466  Input: $URI where you are fetching from
467  Output: $this->results the resulting html form
468 \*======================================================================*/
469 
470  function fetchform($URI)
471  {
472 
473  if ($this->fetch($URI))
474  {
475 
476  if(is_array($this->results))
477  {
478  for($x=0;$x<count($this->results);$x++)
479  $this->results[$x] = $this->_stripform($this->results[$x]);
480  }
481  else
482  $this->results = $this->_stripform($this->results);
483 
484  return true;
485  }
486  else
487  return false;
488  }
489 
490 
491 /*======================================================================*\
492  Function: fetchtext
493  Purpose: fetch the text from a web page, stripping the links
494  Input: $URI where you are fetching from
495  Output: $this->results the text from the web page
496 \*======================================================================*/
497 
498  function fetchtext($URI)
499  {
500  if($this->fetch($URI))
501  {
502  if(is_array($this->results))
503  {
504  for($x=0;$x<count($this->results);$x++)
505  $this->results[$x] = $this->_striptext($this->results[$x]);
506  }
507  else
508  $this->results = $this->_striptext($this->results);
509  return true;
510  }
511  else
512  return false;
513  }
514 
515 /*======================================================================*\
516  Function: submitlinks
517  Purpose: grab links from a form submission
518  Input: $URI where you are submitting from
519  Output: $this->results an array of the links from the post
520 \*======================================================================*/
521 
522  function submitlinks($URI, $formvars="", $formfiles="")
523  {
524  if($this->submit($URI,$formvars, $formfiles))
525  {
526  if($this->lastredirectaddr)
528  if(is_array($this->results))
529  {
530  for($x=0;$x<count($this->results);$x++)
531  {
532  $this->results[$x] = $this->_striplinks($this->results[$x]);
533  if($this->expandlinks)
534  $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
535  }
536  }
537  else
538  {
539  $this->results = $this->_striplinks($this->results);
540  if($this->expandlinks)
541  $this->results = $this->_expandlinks($this->results,$URI);
542  }
543  return true;
544  }
545  else
546  return false;
547  }
548 
549 /*======================================================================*\
550  Function: submittext
551  Purpose: grab text from a form submission
552  Input: $URI where you are submitting from
553  Output: $this->results the text from the web page
554 \*======================================================================*/
555 
556  function submittext($URI, $formvars = "", $formfiles = "")
557  {
558  if($this->submit($URI,$formvars, $formfiles))
559  {
560  if($this->lastredirectaddr)
562  if(is_array($this->results))
563  {
564  for($x=0;$x<count($this->results);$x++)
565  {
566  $this->results[$x] = $this->_striptext($this->results[$x]);
567  if($this->expandlinks)
568  $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
569  }
570  }
571  else
572  {
573  $this->results = $this->_striptext($this->results);
574  if($this->expandlinks)
575  $this->results = $this->_expandlinks($this->results,$URI);
576  }
577  return true;
578  }
579  else
580  return false;
581  }
582 
583 
584 
585 /*======================================================================*\
586  Function: set_submit_multipart
587  Purpose: Set the form submission content type to
588  multipart/form-data
589 \*======================================================================*/
591  {
592  $this->_submit_type = "multipart/form-data";
593  }
594 
595 
596 /*======================================================================*\
597  Function: set_submit_normal
598  Purpose: Set the form submission content type to
599  application/x-www-form-urlencoded
600 \*======================================================================*/
601  function set_submit_normal()
602  {
603  $this->_submit_type = "application/x-www-form-urlencoded";
604  }
605 
606 
607 // XOOPS2 Hack begin
608 // Added on March 4, 2003 by onokazu@xoops.org
609 /*======================================================================*\
610  Function: set_submit_xml
611  Purpose: Set the submission content type to
612  text/xml
613 \*======================================================================*/
614  function set_submit_xml()
615  {
616  $this->_submit_type = "text/xml";
617  }
618 // XOOPS2 Hack end
619 
620 
621 /*======================================================================*\
622  Private functions
623 \*======================================================================*/
624 
625 
626 /*======================================================================*\
627  Function: _striplinks
628  Purpose: strip the hyperlinks from an html document
629  Input: $document document to strip.
630  Output: $match an array of the links
631 \*======================================================================*/
632 
633  function _striplinks($document)
634  {
635  preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
636  ([\"\'])? # find single or double quote
637  (?(1) (.*?)\\1 | ([^\s>]+)) # if quote found, match up to next matching
638  # quote, otherwise match up to next space
639  'isx",$document,$links);
640 
641 
642  // catenate the non-empty matches from the conditional subpattern
643 
644  while(list($key,$val) = each($links[2]))
645  {
646  if(!empty($val))
647  $match[] = $val;
648  }
649 
650  while(list($key,$val) = each($links[3]))
651  {
652  if(!empty($val))
653  $match[] = $val;
654  }
655 
656  // return the links
657  return $match;
658  }
659 
660 /*======================================================================*\
661  Function: _stripform
662  Purpose: strip the form elements from an html document
663  Input: $document document to strip.
664  Output: $match an array of the links
665 \*======================================================================*/
666 
667  function _stripform($document)
668  {
669  preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
670 
671  // catenate the matches
672  $match = implode("\r\n",$elements[0]);
673 
674  // return the links
675  return $match;
676  }
677 
678 
679 
680 /*======================================================================*\
681  Function: _striptext
682  Purpose: strip the text from an html document
683  Input: $document document to strip.
684  Output: $text the resulting text
685 \*======================================================================*/
686 
687  function _striptext($document)
688  {
689 
690  // I didn't use preg eval (//e) since that is only available in PHP 4.0.
691  // so, list your entities one by one here. I included some of the
692  // more common ones.
693 
694  $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
695  "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
696  "'([\r\n])[\s]+'", // strip out white space
697  "'&(quot|#34|#034|#x22);'i", // replace html entities
698  "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
699  "'&(lt|#60|#060|#x3c);'i",
700  "'&(gt|#62|#062|#x3e);'i",
701  "'&(nbsp|#160|#xa0);'i",
702  "'&(iexcl|#161);'i",
703  "'&(cent|#162);'i",
704  "'&(pound|#163);'i",
705  "'&(copy|#169);'i",
706  "'&(reg|#174);'i",
707  "'&(deg|#176);'i",
708  "'&(#39|#039|#x27);'",
709  "'&(euro|#8364);'i", // europe
710  "'&a(uml|UML);'", // german
711  "'&o(uml|UML);'",
712  "'&u(uml|UML);'",
713  "'&A(uml|UML);'",
714  "'&O(uml|UML);'",
715  "'&U(uml|UML);'",
716  "'&szlig;'i",
717  );
718  $replace = array( "",
719  "",
720  "\\1",
721  "\"",
722  "&",
723  "<",
724  ">",
725  " ",
726  chr(161),
727  chr(162),
728  chr(163),
729  chr(169),
730  chr(174),
731  chr(176),
732  chr(39),
733  chr(128),
734  chr(228),
735  chr(246),
736  chr(252),
737  chr(196),
738  chr(214),
739  chr(220),
740  chr(223),
741  );
742 
743  $text = preg_replace($search,$replace,$document);
744 
745  return $text;
746  }
747 
748 /*======================================================================*\
749  Function: _expandlinks
750  Purpose: expand each link into a fully qualified URL
751  Input: $links the links to qualify
752  $URI the full URI to get the base from
753  Output: $expandedLinks the expanded links
754 \*======================================================================*/
755 
756  function _expandlinks($links,$URI)
757  {
758 
759  preg_match("/^[^\?]+/",$URI,$match);
760 
761  $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
762  $match = preg_replace("|/$|","",$match);
763  $match_part = parse_url($match);
764  $match_root =
765  $match_part["scheme"]."://".$match_part["host"];
766 
767  $search = array( "|^http://".preg_quote($this->host)."|i",
768  "|^(\/)|i",
769  "|^(?!http://)(?!mailto:)|i",
770  "|/\./|",
771  "|/[^\/]+/\.\./|"
772  );
773 
774  $replace = array( "",
775  $match_root."/",
776  $match."/",
777  "/",
778  "/"
779  );
780 
781  $expandedLinks = preg_replace($search,$replace,$links);
782 
783  return $expandedLinks;
784  }
785 
786 /*======================================================================*\
787  Function: _httprequest
788  Purpose: go get the http data from the server
789  Input: $url the url to fetch
790  $fp the current open file pointer
791  $URI the full URI
792  $body body contents to send if any (POST)
793  Output:
794 \*======================================================================*/
795 
796  function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
797  {
798  $cookie_headers = '';
799  if($this->passcookies && $this->_redirectaddr)
800  $this->setcookies();
801 
802  $URI_PARTS = parse_url($URI);
803  if(empty($url))
804  $url = "/";
805  $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
806  if(!empty($this->agent))
807  $headers .= "User-Agent: ".$this->agent."\r\n";
808  if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
809  $headers .= "Host: ".$this->host;
810  if(!empty($this->port))
811  $headers .= ":".$this->port;
812  $headers .= "\r\n";
813  }
814  if(!empty($this->accept))
815  $headers .= "Accept: ".$this->accept."\r\n";
816  if(!empty($this->referer))
817  $headers .= "Referer: ".$this->referer."\r\n";
818  if(!empty($this->cookies))
819  {
820  if(!is_array($this->cookies))
821  $this->cookies = (array)$this->cookies;
822 
823  reset($this->cookies);
824  if ( count($this->cookies) > 0 ) {
825  $cookie_headers .= 'Cookie: ';
826  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
827  $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
828  }
829  $headers .= substr($cookie_headers,0,-2) . "\r\n";
830  }
831  }
832  if(!empty($this->rawheaders))
833  {
834  if(!is_array($this->rawheaders))
835  $this->rawheaders = (array)$this->rawheaders;
836  while(list($headerKey,$headerVal) = each($this->rawheaders))
837  $headers .= $headerKey.": ".$headerVal."\r\n";
838  }
839  if(!empty($content_type)) {
840  $headers .= "Content-type: $content_type";
841  if ($content_type == "multipart/form-data")
842  $headers .= "; boundary=".$this->_mime_boundary;
843  $headers .= "\r\n";
844  }
845  if(!empty($body))
846  $headers .= "Content-length: ".strlen($body)."\r\n";
847  if(!empty($this->user) || !empty($this->pass))
848  $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
849 
850  //add proxy auth headers
851  if(!empty($this->proxy_user))
852  $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
853 
854 
855  $headers .= "\r\n";
856 
857  // set the read timeout if needed
858  if ($this->read_timeout > 0)
859  socket_set_timeout($fp, $this->read_timeout);
860  $this->timed_out = false;
861 
862  fwrite($fp,$headers.$body,strlen($headers.$body));
863 
864  $this->_redirectaddr = false;
865  unset($this->headers);
866 
867  while($currentHeader = fgets($fp,$this->_maxlinelen))
868  {
869  if ($this->read_timeout > 0 && $this->_check_timeout($fp))
870  {
871  $this->status=-100;
872  return false;
873  }
874 
875  if($currentHeader == "\r\n")
876  break;
877 
878  // if a header begins with Location: or URI:, set the redirect
879  if(preg_match("/^(Location:|URI:)/i",$currentHeader))
880  {
881  // get URL portion of the redirect
882  preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
883  // look for :// in the Location header to see if hostname is included
884  if(!preg_match("|\:\/\/|",$matches[2]))
885  {
886  // no host in the path, so prepend
887  $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
888  // eliminate double slash
889  if(!preg_match("|^/|",$matches[2]))
890  $this->_redirectaddr .= "/".$matches[2];
891  else
892  $this->_redirectaddr .= $matches[2];
893  }
894  else
895  $this->_redirectaddr = $matches[2];
896  }
897 
898  if(preg_match("|^HTTP/|",$currentHeader))
899  {
900  if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
901  {
902  $this->status= $status[1];
903  }
904  $this->response_code = $currentHeader;
905  }
906 
907  $this->headers[] = $currentHeader;
908  }
909 
910  $results = '';
911  do {
912  $_data = fread($fp, $this->maxlength);
913  if (strlen($_data) == 0) {
914  break;
915  }
916  $results .= $_data;
917  } while(true);
918 
919  if ($this->read_timeout > 0 && $this->_check_timeout($fp))
920  {
921  $this->status=-100;
922  return false;
923  }
924 
925  // check if there is a a redirect meta tag
926 
927  if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
928 
929  {
930  $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
931  }
932 
933  // have we hit our frame depth and is there frame src to fetch?
934  if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\">]+)'i",$results,$match))
935  {
936  $this->results[] = $results;
937  for($x=0; $x<count($match[1]); $x++)
938  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
939  }
940  // have we already fetched framed content?
941  elseif(is_array($this->results))
942  $this->results[] = $results;
943  // no framed content
944  else
945  $this->results = $results;
946 
947  return true;
948  }
949 
950 /*======================================================================*\
951  Function: _httpsrequest
952  Purpose: go get the https data from the server using curl
953  Input: $url the url to fetch
954  $URI the full URI
955  $body body contents to send if any (POST)
956  Output:
957 \*======================================================================*/
958 
959  function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
960  {
961  if($this->passcookies && $this->_redirectaddr)
962  $this->setcookies();
963 
964  $headers = array();
965 
966  $URI_PARTS = parse_url($URI);
967  if(empty($url))
968  $url = "/";
969  // GET ... header not needed for curl
970  //$headers[] = $http_method." ".$url." ".$this->_httpversion;
971  if(!empty($this->agent))
972  $headers[] = "User-Agent: ".$this->agent;
973  if(!empty($this->host))
974  if(!empty($this->port))
975  $headers[] = "Host: ".$this->host.":".$this->port;
976  else
977  $headers[] = "Host: ".$this->host;
978  if(!empty($this->accept))
979  $headers[] = "Accept: ".$this->accept;
980  if(!empty($this->referer))
981  $headers[] = "Referer: ".$this->referer;
982  if(!empty($this->cookies))
983  {
984  if(!is_array($this->cookies))
985  $this->cookies = (array)$this->cookies;
986 
987  reset($this->cookies);
988  if ( count($this->cookies) > 0 ) {
989  $cookie_str = 'Cookie: ';
990  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
991  $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
992  }
993  $headers[] = substr($cookie_str,0,-2);
994  }
995  }
996  if(!empty($this->rawheaders))
997  {
998  if(!is_array($this->rawheaders))
999  $this->rawheaders = (array)$this->rawheaders;
1000  while(list($headerKey,$headerVal) = each($this->rawheaders))
1001  $headers[] = $headerKey.": ".$headerVal;
1002  }
1003  if(!empty($content_type)) {
1004  if ($content_type == "multipart/form-data")
1005  $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1006  else
1007  $headers[] = "Content-type: $content_type";
1008  }
1009  if(!empty($body))
1010  $headers[] = "Content-length: ".strlen($body);
1011  if(!empty($this->user) || !empty($this->pass))
1012  $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1013 
1014  for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1015  $safer_header = strtr( $headers[$curr_header], "\"", " " );
1016  $cmdline_params .= " -H \"".$safer_header."\"";
1017  }
1018 
1019  if(!empty($body))
1020  $cmdline_params .= " -d \"$body\"";
1021 
1022  if($this->read_timeout > 0)
1023  $cmdline_params .= " -m ".$this->read_timeout;
1024 
1025  $headerfile = tempnam($temp_dir, "sno");
1026 
1027  exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1028 
1029  if($return)
1030  {
1031  $this->error = "Error: cURL could not retrieve the document, error $return.";
1032  return false;
1033  }
1034 
1035 
1036  $results = implode("\r\n",$results);
1037 
1038  $result_headers = file("$headerfile");
1039 
1040  $this->_redirectaddr = false;
1041  unset($this->headers);
1042 
1043  for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1044  {
1045 
1046  // if a header begins with Location: or URI:, set the redirect
1047  if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1048  {
1049  // get URL portion of the redirect
1050  preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1051  // look for :// in the Location header to see if hostname is included
1052  if(!preg_match("|\:\/\/|",$matches[2]))
1053  {
1054  // no host in the path, so prepend
1055  $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1056  // eliminate double slash
1057  if(!preg_match("|^/|",$matches[2]))
1058  $this->_redirectaddr .= "/".$matches[2];
1059  else
1060  $this->_redirectaddr .= $matches[2];
1061  }
1062  else
1063  $this->_redirectaddr = $matches[2];
1064  }
1065 
1066  if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1067  $this->response_code = $result_headers[$currentHeader];
1068 
1069  $this->headers[] = $result_headers[$currentHeader];
1070  }
1071 
1072  // check if there is a a redirect meta tag
1073 
1074  if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1075  {
1076  $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1077  }
1078 
1079  // have we hit our frame depth and is there frame src to fetch?
1080  if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\">]+)'i",$results,$match))
1081  {
1082  $this->results[] = $results;
1083  for($x=0; $x<count($match[1]); $x++)
1084  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1085  }
1086  // have we already fetched framed content?
1087  elseif(is_array($this->results))
1088  $this->results[] = $results;
1089  // no framed content
1090  else
1091  $this->results = $results;
1092 
1093  unlink("$headerfile");
1094 
1095  return true;
1096  }
1097 
1098 /*======================================================================*\
1099  Function: setcookies()
1100  Purpose: set cookies for a redirection
1101 \*======================================================================*/
1102 
1103  function setcookies()
1104  {
1105  for($x=0; $x<count($this->headers); $x++)
1106  {
1107  if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1108  $this->cookies[$match[1]] = urldecode($match[2]);
1109  }
1110  }
1111 
1112 
1113 /*======================================================================*\
1114  Function: _check_timeout
1115  Purpose: checks whether timeout has occurred
1116  Input: $fp file pointer
1117 \*======================================================================*/
1118 
1119  function _check_timeout($fp)
1120  {
1121  if ($this->read_timeout > 0) {
1122  $fp_status = socket_get_status($fp);
1123  if ($fp_status["timed_out"]) {
1124  $this->timed_out = true;
1125  return true;
1126  }
1127  }
1128  return false;
1129  }
1130 
1131 /*======================================================================*\
1132  Function: _connect
1133  Purpose: make a socket connection
1134  Input: $fp file pointer
1135 \*======================================================================*/
1136 
1137  function _connect(&$fp)
1138  {
1139  if(!empty($this->proxy_host) && !empty($this->proxy_port))
1140  {
1141  $this->_isproxy = true;
1142 
1145  }
1146  else
1147  {
1148  $host = $this->host;
1149  $port = $this->port;
1150  }
1151 
1152  $this->status = 0;
1153 
1154  if($fp = fsockopen(
1155  $host,
1156  $port,
1157  $errno,
1158  $errstr,
1159  $this->_fp_timeout
1160  ))
1161  {
1162  // socket connection succeeded
1163 
1164  return true;
1165  }
1166  else
1167  {
1168  // socket connection failed
1169  $this->status = $errno;
1170  switch($errno)
1171  {
1172  case -3:
1173  $this->error="socket creation failed (-3)";
1174  case -4:
1175  $this->error="dns lookup failure (-4)";
1176  case -5:
1177  $this->error="connection refused or timed out (-5)";
1178  default:
1179  $this->error="connection failed (".$errno.")";
1180  }
1181  return false;
1182  }
1183  }
1184 /*======================================================================*\
1185  Function: _disconnect
1186  Purpose: disconnect a socket connection
1187  Input: $fp file pointer
1188 \*======================================================================*/
1189 
1190  function _disconnect($fp)
1191  {
1192  return(fclose($fp));
1193  }
1194 
1195 
1196 /*======================================================================*\
1197  Function: _prepare_post_body
1198  Purpose: Prepare post body according to encoding type
1199  Input: $formvars - form variables
1200  $formfiles - form upload files
1201  Output: post body
1202 \*======================================================================*/
1203 
1204  function _prepare_post_body($formvars, $formfiles)
1205  {
1206  settype($formvars, "array");
1207  settype($formfiles, "array");
1208  $postdata = '';
1209 
1210  if (count($formvars) == 0 && count($formfiles) == 0)
1211  return;
1212 
1213  switch ($this->_submit_type) {
1214  case "application/x-www-form-urlencoded":
1215  reset($formvars);
1216  while(list($key,$val) = each($formvars)) {
1217  if (is_array($val) || is_object($val)) {
1218  while (list($cur_key, $cur_val) = each($val)) {
1219  $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1220  }
1221  } else
1222  $postdata .= urlencode($key)."=".urlencode($val)."&";
1223  }
1224  break;
1225 
1226  case "multipart/form-data":
1227  $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1228 
1229  reset($formvars);
1230  while(list($key,$val) = each($formvars)) {
1231  if (is_array($val) || is_object($val)) {
1232  while (list($cur_key, $cur_val) = each($val)) {
1233  $postdata .= "--".$this->_mime_boundary."\r\n";
1234  $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1235  $postdata .= "$cur_val\r\n";
1236  }
1237  } else {
1238  $postdata .= "--".$this->_mime_boundary."\r\n";
1239  $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1240  $postdata .= "$val\r\n";
1241  }
1242  }
1243 
1244  reset($formfiles);
1245  while (list($field_name, $file_names) = each($formfiles)) {
1246  settype($file_names, "array");
1247  while (list(, $file_name) = each($file_names)) {
1248  if (!is_readable($file_name)) continue;
1249 
1250  $fp = fopen($file_name, "r");
1251  $file_content = fread($fp, filesize($file_name));
1252  fclose($fp);
1253  $base_name = basename($file_name);
1254 
1255  $postdata .= "--".$this->_mime_boundary."\r\n";
1256  $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1257  $postdata .= "$file_content\r\n";
1258  }
1259  }
1260  $postdata .= "--".$this->_mime_boundary."--\r\n";
1261  break;
1262  // XOOPS2 Hack begin
1263  // Added on March 4, 2003 by onokazu@xoops.org
1264  case "text/xml":
1265  default:
1266  $postdata = $formvars[0];
1267  break;
1268  // XOOPS2 Hack end
1269  }
1270 
1271  return $postdata;
1272  }
1273 }
1274 
1275 ?>