1 <?php 2 3 /************************************************* 4 5 Snoopy - the PHP net client 6 Author: Monte Ohrt <monte@ispi.net> 7 Copyright (c): 1999-2008 New Digital Group, all rights reserved 8 Version: 1.2.4 9 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 You may contact the author of Snoopy by e-mail at: 25 monte@ohrt.com 26 27 The latest version of Snoopy can be obtained from: 28 http://snoopy.sourceforge.net/ 29 30 *************************************************/ 31 32 class Snoopy 33 { 34 /**** Public variables ****/ 35 36 /* user definable vars */ 37 38 var $host = "www.php.net"; // host name we are connecting to 39 var $port = 80; // port we are connecting to 40 var $proxy_host = ""; // proxy host to use 41 var $proxy_port = ""; // proxy port to use 42 var $proxy_user = ""; // proxy user to use 43 var $proxy_pass = ""; // proxy password to use 44 45 var $agent = "Snoopy v1.2.4"; // agent we masquerade as 46 var $referer = ""; // referer info to pass 47 var $cookies = array(); // array of cookies to pass 48 // $cookies["username"]="joe"; 49 var $rawheaders = array(); // array of raw headers to send 50 // $rawheaders["Content-type"]="text/html"; 51 52 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow 53 var $lastredirectaddr = ""; // contains address of last redirected address 54 var $offsiteok = true; // allows redirection off-site 55 var $maxframes = 0; // frame content depth maximum. 0 = disallow 56 var $expandlinks = true; // expand links to fully qualified URLs. 57 // this only applies to fetchlinks() 58 // submitlinks(), and submittext() 59 var $passcookies = true; // pass set cookies back through redirects 60 // NOTE: this currently does not respect 61 // dates, domains or paths. 62 63 var $user = ""; // user for http authentication 64 var $pass = ""; // password for http authentication 65 66 // http accept types 67 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; 68 69 var $results = ""; // where the content is put 70 71 var $error = ""; // error messages sent here 72 var $response_code = ""; // response code returned from server 73 var $headers = array(); // headers returned from server sent here 74 var $maxlength = 500000; // max return data length (body) 75 var $read_timeout = 0; // timeout on read operations, in seconds 76 // supported only since PHP 4 Beta 4 77 // set to 0 to disallow timeouts 78 var $timed_out = false; // if a read operation timed out 79 var $status = 0; // http request status 80 81 var $temp_dir = "/tmp"; // temporary directory that the webserver 82 // has permission to write to. 83 // under Windows, this should be C:\temp 84 85 var $curl_path = "/usr/local/bin/curl"; 86 // Snoopy will use cURL for fetching 87 // SSL content if a full system path to 88 // the cURL binary is supplied here. 89 // set to false if you do not have 90 // cURL installed. See http://curl.haxx.se 91 // for details on installing cURL. 92 // Snoopy does *not* use the cURL 93 // library functions built into php, 94 // as these functions are not stable 95 // as of this Snoopy release. 96 97 /**** Private variables ****/ 98 99 var $_maxlinelen = 4096; // max line length (headers) 100 101 var $_httpmethod = "GET"; // default http request method 102 var $_httpversion = "HTTP/1.0"; // default http request version 103 var $_submit_method = "POST"; // default submit method 104 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type 105 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type 106 var $_redirectaddr = false; // will be set if page fetched is a redirect 107 var $_redirectdepth = 0; // increments on an http redirect 108 var $_frameurls = array(); // frame src urls 109 var $_framedepth = 0; // increments on frame depth 110 111 var $_isproxy = false; // set if using a proxy server 112 var $_fp_timeout = 30; // timeout for socket connection 113 114 /*======================================================================*\ 115 Function: fetch 116 Purpose: fetch the contents of a web page 117 (and possibly other protocols in the 118 future like ftp, nntp, gopher, etc.) 119 Input: $URI the location of the page to fetch 120 Output: $this->results the output text from the fetch 121 \*======================================================================*/ 122 123 function fetch($URI) 124 { 125 126 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); 127 $URI_PARTS = parse_url($URI); 128 if (!empty($URI_PARTS["user"])) 129 $this->user = $URI_PARTS["user"]; 130 if (!empty($URI_PARTS["pass"])) 131 $this->pass = $URI_PARTS["pass"]; 132 if (empty($URI_PARTS["query"])) 133 $URI_PARTS["query"] = ''; 134 if (empty($URI_PARTS["path"])) 135 $URI_PARTS["path"] = ''; 136 137 switch(strtolower($URI_PARTS["scheme"])) 138 { 139 case "http": 140 $this->host = $URI_PARTS["host"]; 141 if(!empty($URI_PARTS["port"])) 142 $this->port = $URI_PARTS["port"]; 143 if($this->_connect($fp)) 144 { 145 if($this->_isproxy) 146 { 147 // using proxy, send entire URI 148 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); 149 } 150 else 151 { 152 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 153 // no proxy, send only the path 154 $this->_httprequest($path, $fp, $URI, $this->_httpmethod); 155 } 156 157 $this->_disconnect($fp); 158 159 if($this->_redirectaddr) 160 { 161 /* url was redirected, check if we've hit the max depth */ 162 if($this->maxredirs > $this->_redirectdepth) 163 { 164 // only follow redirect if it's on this site, or offsiteok is true 165 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 166 { 167 /* follow the redirect */ 168 $this->_redirectdepth++; 169 $this->lastredirectaddr=$this->_redirectaddr; 170 $this->fetch($this->_redirectaddr); 171 } 172 } 173 } 174 175 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 176 { 177 $frameurls = $this->_frameurls; 178 $this->_frameurls = array(); 179 180 while(list(,$frameurl) = each($frameurls)) 181 { 182 if($this->_framedepth < $this->maxframes) 183 { 184 $this->fetch($frameurl); 185 $this->_framedepth++; 186 } 187 else 188 break; 189 } 190 } 191 } 192 else 193 { 194 return false; 195 } 196 return true; 197 break; 198 case "https": 199 if(!$this->curl_path) 200 return false; 201 if(function_exists("is_executable")) 202 if (!is_executable($this->curl_path)) 203 return false; 204 $this->host = $URI_PARTS["host"]; 205 if(!empty($URI_PARTS["port"])) 206 $this->port = $URI_PARTS["port"]; 207 if($this->_isproxy) 208 { 209 // using proxy, send entire URI 210 $this->_httpsrequest($URI,$URI,$this->_httpmethod); 211 } 212 else 213 { 214 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 215 // no proxy, send only the path 216 $this->_httpsrequest($path, $URI, $this->_httpmethod); 217 } 218 219 if($this->_redirectaddr) 220 { 221 /* url was redirected, check if we've hit the max depth */ 222 if($this->maxredirs > $this->_redirectdepth) 223 { 224 // only follow redirect if it's on this site, or offsiteok is true 225 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 226 { 227 /* follow the redirect */ 228 $this->_redirectdepth++; 229 $this->lastredirectaddr=$this->_redirectaddr; 230 $this->fetch($this->_redirectaddr); 231 } 232 } 233 } 234 235 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 236 { 237 $frameurls = $this->_frameurls; 238 $this->_frameurls = array(); 239 240 while(list(,$frameurl) = each($frameurls)) 241 { 242 if($this->_framedepth < $this->maxframes) 243 { 244 $this->fetch($frameurl); 245 $this->_framedepth++; 246 } 247 else 248 break; 249 } 250 } 251 return true; 252 break; 253 default: 254 // not a valid protocol 255 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 256 return false; 257 break; 258 } 259 return true; 260 } 261 262 /*======================================================================*\ 263 Function: submit 264 Purpose: submit an http form 265 Input: $URI the location to post the data 266 $formvars the formvars to use. 267 format: $formvars["var"] = "val"; 268 $formfiles an array of files to submit 269 format: $formfiles["var"] = "/dir/filename.ext"; 270 Output: $this->results the text output from the post 271 \*======================================================================*/ 272 273 function submit($URI, $formvars="", $formfiles="") 274 { 275 unset($postdata); 276 277 $postdata = $this->_prepare_post_body($formvars, $formfiles); 278 279 $URI_PARTS = parse_url($URI); 280 if (!empty($URI_PARTS["user"])) 281 $this->user = $URI_PARTS["user"]; 282 if (!empty($URI_PARTS["pass"])) 283 $this->pass = $URI_PARTS["pass"]; 284 if (empty($URI_PARTS["query"])) 285 $URI_PARTS["query"] = ''; 286 if (empty($URI_PARTS["path"])) 287 $URI_PARTS["path"] = ''; 288 289 switch(strtolower($URI_PARTS["scheme"])) 290 { 291 case "http": 292 $this->host = $URI_PARTS["host"]; 293 if(!empty($URI_PARTS["port"])) 294 $this->port = $URI_PARTS["port"]; 295 if($this->_connect($fp)) 296 { 297 if($this->_isproxy) 298 { 299 // using proxy, send entire URI 300 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata); 301 } 302 else 303 { 304 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 305 // no proxy, send only the path 306 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); 307 } 308 309 $this->_disconnect($fp); 310 311 if($this->_redirectaddr) 312 { 313 /* url was redirected, check if we've hit the max depth */ 314 if($this->maxredirs > $this->_redirectdepth) 315 { 316 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) 317 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); 318 319 // only follow redirect if it's on this site, or offsiteok is true 320 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 321 { 322 /* follow the redirect */ 323 $this->_redirectdepth++; 324 $this->lastredirectaddr=$this->_redirectaddr; 325 if( strpos( $this->_redirectaddr, "?" ) > 0 ) 326 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get 327 else 328 $this->submit($this->_redirectaddr,$formvars, $formfiles); 329 } 330 } 331 } 332 333 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 334 { 335 $frameurls = $this->_frameurls; 336 $this->_frameurls = array(); 337 338 while(list(,$frameurl) = each($frameurls)) 339 { 340 if($this->_framedepth < $this->maxframes) 341 { 342 $this->fetch($frameurl); 343 $this->_framedepth++; 344 } 345 else 346 break; 347 } 348 } 349 350 } 351 else 352 { 353 return false; 354 } 355 return true; 356 break; 357 case "https": 358 if(!$this->curl_path) 359 return false; 360 if(function_exists("is_executable")) 361 if (!is_executable($this->curl_path)) 362 return false; 363 $this->host = $URI_PARTS["host"]; 364 if(!empty($URI_PARTS["port"])) 365 $this->port = $URI_PARTS["port"]; 366 if($this->_isproxy) 367 { 368 // using proxy, send entire URI 369 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); 370 } 371 else 372 { 373 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 374 // no proxy, send only the path 375 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); 376 } 377 378 if($this->_redirectaddr) 379 { 380 /* url was redirected, check if we've hit the max depth */ 381 if($this->maxredirs > $this->_redirectdepth) 382 { 383 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) 384 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); 385 386 // only follow redirect if it's on this site, or offsiteok is true 387 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 388 { 389 /* follow the redirect */ 390 $this->_redirectdepth++; 391 $this->lastredirectaddr=$this->_redirectaddr; 392 if( strpos( $this->_redirectaddr, "?" ) > 0 ) 393 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get 394 else 395 $this->submit($this->_redirectaddr,$formvars, $formfiles); 396 } 397 } 398 } 399 400 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 401 { 402 $frameurls = $this->_frameurls; 403 $this->_frameurls = array(); 404 405 while(list(,$frameurl) = each($frameurls)) 406 { 407 if($this->_framedepth < $this->maxframes) 408 { 409 $this->fetch($frameurl); 410 $this->_framedepth++; 411 } 412 else 413 break; 414 } 415 } 416 return true; 417 break; 418 419 default: 420 // not a valid protocol 421 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 422 return false; 423 break; 424 } 425 return true; 426 } 427 428 /*======================================================================*\ 429 Function: fetchlinks 430 Purpose: fetch the links from a web page 431 Input: $URI where you are fetching from 432 Output: $this->results an array of the URLs 433 \*======================================================================*/ 434 435 function fetchlinks($URI) 436 { 437 if ($this->fetch($URI)) 438 { 439 if($this->lastredirectaddr) 440 $URI = $this->lastredirectaddr; 441 if(is_array($this->results)) 442 { 443 for($x=0;$x<count($this->results);$x++) 444 $this->results[$x] = $this->_striplinks($this->results[$x]); 445 } 446 else 447 $this->results = $this->_striplinks($this->results); 448 449 if($this->expandlinks) 450 $this->results = $this->_expandlinks($this->results, $URI); 451 return true; 452 } 453 else 454 return false; 455 } 456 457 /*======================================================================*\ 458 Function: fetchform 459 Purpose: fetch the form elements from a web page 460 Input: $URI where you are fetching from 461 Output: $this->results the resulting html form 462 \*======================================================================*/ 463 464 function fetchform($URI) 465 { 466 467 if ($this->fetch($URI)) 468 { 469 470 if(is_array($this->results)) 471 { 472 for($x=0;$x<count($this->results);$x++) 473 $this->results[$x] = $this->_stripform($this->results[$x]); 474 } 475 else 476 $this->results = $this->_stripform($this->results); 477 478 return true; 479 } 480 else 481 return false; 482 } 483 484 485 /*======================================================================*\ 486 Function: fetchtext 487 Purpose: fetch the text from a web page, stripping the links 488 Input: $URI where you are fetching from 489 Output: $this->results the text from the web page 490 \*======================================================================*/ 491 492 function fetchtext($URI) 493 { 494 if($this->fetch($URI)) 495 { 496 if(is_array($this->results)) 497 { 498 for($x=0;$x<count($this->results);$x++) 499 $this->results[$x] = $this->_striptext($this->results[$x]); 500 } 501 else 502 $this->results = $this->_striptext($this->results); 503 return true; 504 } 505 else 506 return false; 507 } 508 509 /*======================================================================*\ 510 Function: submitlinks 511 Purpose: grab links from a form submission 512 Input: $URI where you are submitting from 513 Output: $this->results an array of the links from the post 514 \*======================================================================*/ 515 516 function submitlinks($URI, $formvars="", $formfiles="") 517 { 518 if($this->submit($URI,$formvars, $formfiles)) 519 { 520 if($this->lastredirectaddr) 521 $URI = $this->lastredirectaddr; 522 if(is_array($this->results)) 523 { 524 for($x=0;$x<count($this->results);$x++) 525 { 526 $this->results[$x] = $this->_striplinks($this->results[$x]); 527 if($this->expandlinks) 528 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI); 529 } 530 } 531 else 532 { 533 $this->results = $this->_striplinks($this->results); 534 if($this->expandlinks) 535 $this->results = $this->_expandlinks($this->results,$URI); 536 } 537 return true; 538 } 539 else 540 return false; 541 } 542 543 /*======================================================================*\ 544 Function: submittext 545 Purpose: grab text from a form submission 546 Input: $URI where you are submitting from 547 Output: $this->results the text from the web page 548 \*======================================================================*/ 549 550 function submittext($URI, $formvars = "", $formfiles = "") 551 { 552 if($this->submit($URI,$formvars, $formfiles)) 553 { 554 if($this->lastredirectaddr) 555 $URI = $this->lastredirectaddr; 556 if(is_array($this->results)) 557 { 558 for($x=0;$x<count($this->results);$x++) 559 { 560 $this->results[$x] = $this->_striptext($this->results[$x]); 561 if($this->expandlinks) 562 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI); 563 } 564 } 565 else 566 { 567 $this->results = $this->_striptext($this->results); 568 if($this->expandlinks) 569 $this->results = $this->_expandlinks($this->results,$URI); 570 } 571 return true; 572 } 573 else 574 return false; 575 } 576 577 578 579 /*======================================================================*\ 580 Function: set_submit_multipart 581 Purpose: Set the form submission content type to 582 multipart/form-data 583 \*======================================================================*/ 584 function set_submit_multipart() 585 { 586 $this->_submit_type = "multipart/form-data"; 587 } 588 589 590 /*======================================================================*\ 591 Function: set_submit_normal 592 Purpose: Set the form submission content type to 593 application/x-www-form-urlencoded 594 \*======================================================================*/ 595 function set_submit_normal() 596 { 597 $this->_submit_type = "application/x-www-form-urlencoded"; 598 } 599 600 601 602 603 /*======================================================================*\ 604 Private functions 605 \*======================================================================*/ 606 607 608 /*======================================================================*\ 609 Function: _striplinks 610 Purpose: strip the hyperlinks from an html document 611 Input: $document document to strip. 612 Output: $match an array of the links 613 \*======================================================================*/ 614 615 function _striplinks($document) 616 { 617 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= 618 ([\"\'])? # find single or double quote 619 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching 620 # quote, otherwise match up to next space 621 'isx",$document,$links); 622 623 624 // catenate the non-empty matches from the conditional subpattern 625 626 while(list($key,$val) = each($links[2])) 627 { 628 if(!empty($val)) 629 $match[] = $val; 630 } 631 632 while(list($key,$val) = each($links[3])) 633 { 634 if(!empty($val)) 635 $match[] = $val; 636 } 637 638 // return the links 639 return $match; 640 } 641 642 /*======================================================================*\ 643 Function: _stripform 644 Purpose: strip the form elements from an html document 645 Input: $document document to strip. 646 Output: $match an array of the links 647 \*======================================================================*/ 648 649 function _stripform($document) 650 { 651 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); 652 653 // catenate the matches 654 $match = implode("\r\n",$elements[0]); 655 656 // return the links 657 return $match; 658 } 659 660 661 662 /*======================================================================*\ 663 Function: _striptext 664 Purpose: strip the text from an html document 665 Input: $document document to strip. 666 Output: $text the resulting text 667 \*======================================================================*/ 668 669 function _striptext($document) 670 { 671 672 // I didn't use preg eval (//e) since that is only available in PHP 4.0. 673 // so, list your entities one by one here. I included some of the 674 // more common ones. 675 676 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript 677 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags 678 "'([\r\n])[\s]+'", // strip out white space 679 "'&(quot|#34|#034|#x22);'i", // replace html entities 680 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values 681 "'&(lt|#60|#060|#x3c);'i", 682 "'&(gt|#62|#062|#x3e);'i", 683 "'&(nbsp|#160|#xa0);'i", 684 "'&(iexcl|#161);'i", 685 "'&(cent|#162);'i", 686 "'&(pound|#163);'i", 687 "'&(copy|#169);'i", 688 "'&(reg|#174);'i", 689 "'&(deg|#176);'i", 690 "'&(#39|#039|#x27);'", 691 "'&(euro|#8364);'i", // europe 692 "'&a(uml|UML);'", // german 693 "'&o(uml|UML);'", 694 "'&u(uml|UML);'", 695 "'&A(uml|UML);'", 696 "'&O(uml|UML);'", 697 "'&U(uml|UML);'", 698 "'ß'i", 699 ); 700 $replace = array( "", 701 "", 702 "\\1", 703 "\"", 704 "&", 705 "<", 706 ">", 707 " ", 708 chr(161), 709 chr(162), 710 chr(163), 711 chr(169), 712 chr(174), 713 chr(176), 714 chr(39), 715 chr(128), 716 "?, 717 "?, 718 "?, 719 "?, 720 "?, 721 "?, 722 "?, 723 ); 724 725 $text = preg_replace($search,$replace,$document); 726 727 return $text; 728 } 729 730 /*======================================================================*\ 731 Function: _expandlinks 732 Purpose: expand each link into a fully qualified URL 733 Input: $links the links to qualify 734 $URI the full URI to get the base from 735 Output: $expandedLinks the expanded links 736 \*======================================================================*/ 737 738 function _expandlinks($links,$URI) 739 { 740 741 preg_match("/^[^\?]+/",$URI,$match); 742 743 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); 744 $match = preg_replace("|/$|","",$match); 745 $match_part = parse_url($match); 746 $match_root = 747 $match_part["scheme"]."://".$match_part["host"]; 748 749 $search = array( "|^http://".preg_quote($this->host)."|i", 750 "|^(\/)|i", 751 "|^(?!http://)(?!mailto:)|i", 752 "|/\./|", 753 "|/[^\/]+/\.\./|" 754 ); 755 756 $replace = array( "", 757 $match_root."/", 758 $match."/", 759 "/", 760 "/" 761 ); 762 763 $expandedLinks = preg_replace($search,$replace,$links); 764 765 return $expandedLinks; 766 } 767 768 /*======================================================================*\ 769 Function: _httprequest 770 Purpose: go get the http data from the server 771 Input: $url the url to fetch 772 $fp the current open file pointer 773 $URI the full URI 774 $body body contents to send if any (POST) 775 Output: 776 \*======================================================================*/ 777 778 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") 779 { 780 $cookie_headers = ''; 781 if($this->passcookies && $this->_redirectaddr) 782 $this->setcookies(); 783 784 $URI_PARTS = parse_url($URI); 785 if(empty($url)) 786 $url = "/"; 787 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; 788 if(!empty($this->agent)) 789 $headers .= "User-Agent: ".$this->agent."\r\n"; 790 if(!empty($this->host) && !isset($this->rawheaders['Host'])) { 791 $headers .= "Host: ".$this->host; 792 if(!empty($this->port)) 793 $headers .= ":".$this->port; 794 $headers .= "\r\n"; 795 } 796 if(!empty($this->accept)) 797 $headers .= "Accept: ".$this->accept."\r\n"; 798 if(!empty($this->referer)) 799 $headers .= "Referer: ".$this->referer."\r\n"; 800 if(!empty($this->cookies)) 801 { 802 if(!is_array($this->cookies)) 803 $this->cookies = (array)$this->cookies; 804 805 reset($this->cookies); 806 if ( count($this->cookies) > 0 ) { 807 $cookie_headers .= 'Cookie: '; 808 foreach ( $this->cookies as $cookieKey => $cookieVal ) { 809 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; 810 } 811 $headers .= substr($cookie_headers,0,-2) . "\r\n"; 812 } 813 } 814 if(!empty($this->rawheaders)) 815 { 816 if(!is_array($this->rawheaders)) 817 $this->rawheaders = (array)$this->rawheaders; 818 while(list($headerKey,$headerVal) = each($this->rawheaders)) 819 $headers .= $headerKey.": ".$headerVal."\r\n"; 820 } 821 if(!empty($content_type)) { 822 $headers .= "Content-type: $content_type"; 823 if ($content_type == "multipart/form-data") 824 $headers .= "; boundary=".$this->_mime_boundary; 825 $headers .= "\r\n"; 826 } 827 if(!empty($body)) 828 $headers .= "Content-length: ".strlen($body)."\r\n"; 829 if(!empty($this->user) || !empty($this->pass)) 830 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n"; 831 832 //add proxy auth headers 833 if(!empty($this->proxy_user)) 834 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n"; 835 836 837 $headers .= "\r\n"; 838 839 // set the read timeout if needed 840 if ($this->read_timeout > 0) 841 socket_set_timeout($fp, $this->read_timeout); 842 $this->timed_out = false; 843 844 fwrite($fp,$headers.$body,strlen($headers.$body)); 845 846 $this->_redirectaddr = false; 847 unset($this->headers); 848 849 while($currentHeader = fgets($fp,$this->_maxlinelen)) 850 { 851 if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 852 { 853 $this->status=-100; 854 return false; 855 } 856 857 if($currentHeader == "\r\n") 858 break; 859 860 // if a header begins with Location: or URI:, set the redirect 861 if(preg_match("/^(Location:|URI:)/i",$currentHeader)) 862 { 863 // get URL portion of the redirect 864 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches); 865 // look for :// in the Location header to see if hostname is included 866 if(!preg_match("|\:\/\/|",$matches[2])) 867 { 868 // no host in the path, so prepend 869 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 870 // eliminate double slash 871 if(!preg_match("|^/|",$matches[2])) 872 $this->_redirectaddr .= "/".$matches[2]; 873 else 874 $this->_redirectaddr .= $matches[2]; 875 } 876 else 877 $this->_redirectaddr = $matches[2]; 878 } 879 880 if(preg_match("|^HTTP/|",$currentHeader)) 881 { 882 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) 883 { 884 $this->status= $status[1]; 885 } 886 $this->response_code = $currentHeader; 887 } 888 889 $this->headers[] = $currentHeader; 890 } 891 892 $results = ''; 893 do { 894 $_data = fread($fp, $this->maxlength); 895 if (strlen($_data) == 0) { 896 break; 897 } 898 $results .= $_data; 899 } while(true); 900 901 if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 902 { 903 $this->status=-100; 904 return false; 905 } 906 907 // check if there is a a redirect meta tag 908 909 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) 910 911 { 912 $this->_redirectaddr = $this->_expandlinks($match[1],$URI); 913 } 914 915 // have we hit our frame depth and is there frame src to fetch? 916 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) 917 { 918 $this->results[] = $results; 919 for($x=0; $x<count($match[1]); $x++) 920 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); 921 } 922 // have we already fetched framed content? 923 elseif(is_array($this->results)) 924 $this->results[] = $results; 925 // no framed content 926 else 927 $this->results = $results; 928 929 return true; 930 } 931 932 /*======================================================================*\ 933 Function: _httpsrequest 934 Purpose: go get the https data from the server using curl 935 Input: $url the url to fetch 936 $URI the full URI 937 $body body contents to send if any (POST) 938 Output: 939 \*======================================================================*/ 940 941 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") 942 { 943 if($this->passcookies && $this->_redirectaddr) 944 $this->setcookies(); 945 946 $headers = array(); 947 948 $URI_PARTS = parse_url($URI); 949 if(empty($url)) 950 $url = "/"; 951 // GET ... header not needed for curl 952 //$headers[] = $http_method." ".$url." ".$this->_httpversion; 953 if(!empty($this->agent)) 954 $headers[] = "User-Agent: ".$this->agent; 955 if(!empty($this->host)) 956 if(!empty($this->port)) 957 $headers[] = "Host: ".$this->host.":".$this->port; 958 else 959 $headers[] = "Host: ".$this->host; 960 if(!empty($this->accept)) 961 $headers[] = "Accept: ".$this->accept; 962 if(!empty($this->referer)) 963 $headers[] = "Referer: ".$this->referer; 964 if(!empty($this->cookies)) 965 { 966 if(!is_array($this->cookies)) 967 $this->cookies = (array)$this->cookies; 968 969 reset($this->cookies); 970 if ( count($this->cookies) > 0 ) { 971 $cookie_str = 'Cookie: '; 972 foreach ( $this->cookies as $cookieKey => $cookieVal ) { 973 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; 974 } 975 $headers[] = substr($cookie_str,0,-2); 976 } 977 } 978 if(!empty($this->rawheaders)) 979 { 980 if(!is_array($this->rawheaders)) 981 $this->rawheaders = (array)$this->rawheaders; 982 while(list($headerKey,$headerVal) = each($this->rawheaders)) 983 $headers[] = $headerKey.": ".$headerVal; 984 } 985 if(!empty($content_type)) { 986 if ($content_type == "multipart/form-data") 987 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; 988 else 989 $headers[] = "Content-type: $content_type"; 990 } 991 if(!empty($body)) 992 $headers[] = "Content-length: ".strlen($body); 993 if(!empty($this->user) || !empty($this->pass)) 994 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); 995 996 for($curr_header = 0; $curr_header < count($headers); $curr_header++) { 997 $safer_header = strtr( $headers[$curr_header], "\"", " " ); 998 $cmdline_params .= " -H \"".$safer_header."\""; 999 } 1000 1001 if(!empty($body)) 1002 $cmdline_params .= " -d \"$body\""; 1003 1004 if($this->read_timeout > 0) 1005 $cmdline_params .= " -m ".$this->read_timeout; 1006 1007 $headerfile = tempnam($temp_dir, "sno"); 1008 1009 exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return); 1010 1011 if($return) 1012 { 1013 $this->error = "Error: cURL could not retrieve the document, error $return."; 1014 return false; 1015 } 1016 1017 1018 $results = implode("\r\n",$results); 1019 1020 $result_headers = file("$headerfile"); 1021 1022 $this->_redirectaddr = false; 1023 unset($this->headers); 1024 1025 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) 1026 { 1027 1028 // if a header begins with Location: or URI:, set the redirect 1029 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) 1030 { 1031 // get URL portion of the redirect 1032 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches); 1033 // look for :// in the Location header to see if hostname is included 1034 if(!preg_match("|\:\/\/|",$matches[2])) 1035 { 1036 // no host in the path, so prepend 1037 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 1038 // eliminate double slash 1039 if(!preg_match("|^/|",$matches[2])) 1040 $this->_redirectaddr .= "/".$matches[2]; 1041 else 1042 $this->_redirectaddr .= $matches[2]; 1043 } 1044 else 1045 $this->_redirectaddr = $matches[2]; 1046 } 1047 1048 if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) 1049 $this->response_code = $result_headers[$currentHeader]; 1050 1051 $this->headers[] = $result_headers[$currentHeader]; 1052 } 1053 1054 // check if there is a a redirect meta tag 1055 1056 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) 1057 { 1058 $this->_redirectaddr = $this->_expandlinks($match[1],$URI); 1059 } 1060 1061 // have we hit our frame depth and is there frame src to fetch? 1062 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) 1063 { 1064 $this->results[] = $results; 1065 for($x=0; $x<count($match[1]); $x++) 1066 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); 1067 } 1068 // have we already fetched framed content? 1069 elseif(is_array($this->results)) 1070 $this->results[] = $results; 1071 // no framed content 1072 else 1073 $this->results = $results; 1074 1075 unlink("$headerfile"); 1076 1077 return true; 1078 } 1079 1080 /*======================================================================*\ 1081 Function: setcookies() 1082 Purpose: set cookies for a redirection 1083 \*======================================================================*/ 1084 1085 function setcookies() 1086 { 1087 for($x=0; $x<count($this->headers); $x++) 1088 { 1089 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match)) 1090 $this->cookies[$match[1]] = urldecode($match[2]); 1091 } 1092 } 1093 1094 1095 /*======================================================================*\ 1096 Function: _check_timeout 1097 Purpose: checks whether timeout has occurred 1098 Input: $fp file pointer 1099 \*======================================================================*/ 1100 1101 function _check_timeout($fp) 1102 { 1103 if ($this->read_timeout > 0) { 1104 $fp_status = socket_get_status($fp); 1105 if ($fp_status["timed_out"]) { 1106 $this->timed_out = true; 1107 return true; 1108 } 1109 } 1110 return false; 1111 } 1112 1113 /*======================================================================*\ 1114 Function: _connect 1115 Purpose: make a socket connection 1116 Input: $fp file pointer 1117 \*======================================================================*/ 1118 1119 function _connect(&$fp) 1120 { 1121 if(!empty($this->proxy_host) && !empty($this->proxy_port)) 1122 { 1123 $this->_isproxy = true; 1124 1125 $host = $this->proxy_host; 1126 $port = $this->proxy_port; 1127 } 1128 else 1129 { 1130 $host = $this->host; 1131 $port = $this->port; 1132 } 1133 1134 $this->status = 0; 1135 1136 if($fp = fsockopen( 1137 $host, 1138 $port, 1139 $errno, 1140 $errstr, 1141 $this->_fp_timeout 1142 )) 1143 { 1144 // socket connection succeeded 1145 1146 return true; 1147 } 1148 else 1149 { 1150 // socket connection failed 1151 $this->status = $errno; 1152 switch($errno) 1153 { 1154 case -3: 1155 $this->error="socket creation failed (-3)"; 1156 case -4: 1157 $this->error="dns lookup failure (-4)"; 1158 case -5: 1159 $this->error="connection refused or timed out (-5)"; 1160 default: 1161 $this->error="connection failed (".$errno.")"; 1162 } 1163 return false; 1164 } 1165 } 1166 /*======================================================================*\ 1167 Function: _disconnect 1168 Purpose: disconnect a socket connection 1169 Input: $fp file pointer 1170 \*======================================================================*/ 1171 1172 function _disconnect($fp) 1173 { 1174 return(fclose($fp)); 1175 } 1176 1177 1178 /*======================================================================*\ 1179 Function: _prepare_post_body 1180 Purpose: Prepare post body according to encoding type 1181 Input: $formvars - form variables 1182 $formfiles - form upload files 1183 Output: post body 1184 \*======================================================================*/ 1185 1186 function _prepare_post_body($formvars, $formfiles) 1187 { 1188 settype($formvars, "array"); 1189 settype($formfiles, "array"); 1190 $postdata = ''; 1191 1192 if (count($formvars) == 0 && count($formfiles) == 0) 1193 return; 1194 1195 switch ($this->_submit_type) { 1196 case "application/x-www-form-urlencoded": 1197 reset($formvars); 1198 while(list($key,$val) = each($formvars)) { 1199 if (is_array($val) || is_object($val)) { 1200 while (list($cur_key, $cur_val) = each($val)) { 1201 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; 1202 } 1203 } else 1204 $postdata .= urlencode($key)."=".urlencode($val)."&"; 1205 } 1206 break; 1207 1208 case "multipart/form-data": 1209 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); 1210 1211 reset($formvars); 1212 while(list($key,$val) = each($formvars)) { 1213 if (is_array($val) || is_object($val)) { 1214 while (list($cur_key, $cur_val) = each($val)) { 1215 $postdata .= "--".$this->_mime_boundary."\r\n"; 1216 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; 1217 $postdata .= "$cur_val\r\n"; 1218 } 1219 } else { 1220 $postdata .= "--".$this->_mime_boundary."\r\n"; 1221 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; 1222 $postdata .= "$val\r\n"; 1223 } 1224 } 1225 1226 reset($formfiles); 1227 while (list($field_name, $file_names) = each($formfiles)) { 1228 settype($file_names, "array"); 1229 while (list(, $file_name) = each($file_names)) { 1230 if (!is_readable($file_name)) continue; 1231 1232 $fp = fopen($file_name, "r"); 1233 $file_content = fread($fp, filesize($file_name)); 1234 fclose($fp); 1235 $base_name = basename($file_name); 1236 1237 $postdata .= "--".$this->_mime_boundary."\r\n"; 1238 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; 1239 $postdata .= "$file_content\r\n"; 1240 } 1241 } 1242 $postdata .= "--".$this->_mime_boundary."--\r\n"; 1243 break; 1244 } 1245 1246 return $postdata; 1247 } 1248 } 1249 1250 ?>