/** * Describe:Curl post函数 * @param $url * @param array $data * @param bool $isPost * @param int $coding * @param bool $isForge * @return bool|string * Created by zhangzq at 2021/4/26 17:22 */ function curlPost($url, $data = array(), $isPost = true, $coding = 0, $isForge = true) { $ip = mt_rand(11, 191) . "." . mt_rand(0, 240) . "." . mt_rand(1, 240) . "." . mt_rand(1, 240); //随机ip $agentarry = [ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (compatible; JikeSpider; +http://shoulu.jike.com/spider.html)", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/;)", "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)", //PC端的UserAgent "safari 5.1 – MAC" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", "safari 5.1 – Windows" => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Firefox 38esr" => "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0", "IE 11" => "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko", "IE 9.0" => "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0", "IE 8.0" => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)", "IE 7.0" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)", "IE 6.0" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", "Firefox 4.0.1 – MAC" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Firefox 4.0.1 – Windows" => "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Opera 11.11 – MAC" => "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11", "Opera 11.11 – Windows" => "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11", "Chrome 17.0 – MAC" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "傲游(Maxthon)" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)", "腾讯TT" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)", "世界之窗(The World) 2.x" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", "世界之窗(The World) 3.x" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)", "360浏览器" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)", "搜狗浏览器 1.x" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)", "Avant" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)", "Green Browser" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", //移动端口 // "safari iOS 4.33 – iPhone" => "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", // "safari iOS 4.33 – iPod Touch" => "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", // "safari iOS 4.33 – iPad" => "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", // "Android N1" => "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", // "Android QQ浏览器 For android" => "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", // "Android Opera Mobile" => "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10", // "Android Pad Moto Xoom" => "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13", // "BlackBerry" => "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+", // "WebOS HP Touchpad" => "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0", // "UC标准" => "NOKIA5700/ UCWEB7.0.2.37/28/999", // "UCOpenwave" => "Openwave/ UCWEB7.0.2.37/28/999", // "UC Opera" => "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999", // "微信内置浏览器" => "Mozilla/5.0 (Linux; Android 6.0; 1503-M02 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.2 TBS/036558 Safari/537.36 MicroMessenger/6.3.25.861 NetType/WIFI Language/zh_CN", // ""=>"", ]; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($ch, CURLOPT_TIMEOUT, 10); //追踪返回302状态码,继续抓取 curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_NOBODY, false); curl_setopt($ch, CURLOPT_REFERER, 'https://www.ixigua.com/');//模拟来路 if ($isPost) { curl_setopt($ch, CURLOPT_POST, 1);// POST数据 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data)); // POST参数 } if ($isForge) { $useragent = $agentarry[array_rand($agentarry, 1)]; //随机浏览器useragent $header = array( 'CLIENT-IP:' . $ip, 'X-FORWARDED-FOR:' . $ip, ); //构造ip curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); //模拟常用浏览器的useragent } $output = curl_exec($ch);// 执行并获取url地址的内容 $errorCode = curl_errno($ch); curl_close($ch);// 释放curl句柄 if (0 !== $errorCode) { //echo 'cURL Error:'.curl_error($ch); return false; } if ($coding == 1) { $output = mb_convert_encoding($output, "UTF-8", "GBK"); } elseif ($coding == 2) { $output = mb_convert_encoding($output, "GBK", "UTF-8"); } return $output; }
http.php
<?php namespace app\lucky\common; /** * Class Http */ class Http { private static $info = []; /** * @param $url * @param array $headers * @param array $options * @return \Requests_Response */ public static function get($url, array $headers = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $options]); } /** * @param $url * @param array $headers * @param array $options * @return \Requests_Response */ public static function head($url, array $headers = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $options]); } /** * @param $url * @param array $headers * @param array $options * @return \Requests_Response */ public static function delete($url, array $headers = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $options]); } /** * @param $url * @param array $headers * @param array $options * @return \Requests_Response */ public static function trace($url, array $headers = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $options]); } /** * @param $url * @param array $headers * @param array $data * @param array $options * @return \Requests_Response */ public static function post($url, array $headers = [], $data = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $data, $options]); } /** * @param $url * @param array $headers * @param array $data * @param array $options * @return \Requests_Response */ public static function put($url, array $headers = [], array $data = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $data, $options]); } /** * @param $url * @param array $headers * @param array $data * @param array $options * @return \Requests_Response */ public static function options($url, array $headers = [], array $data = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $data, $options]); } /** * @param $url * @param $headers * @param array $data * @param array $options * @return \Requests_Response */ public static function patch($url, array $headers, array $data = [], array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $data, $options]); } /** * @param string $url * @param array $headers * @param array $data * @param string $type * @param array $options * @return \Requests_Response */ public static function request($url, array $headers = [], array $data = [], $type = \Requests::GET, array $options = []) { $options = self::setOptions($options); return self::call(__FUNCTION__, [$url, $headers, $data, $type, $options]); } /** * @param array $requests * @param array $options * @return \Requests_Response */ public static function request_multiple(array $requests, array $options) { return self::call(__FUNCTION__, [$requests, $options]); } private static function getHooks() { $hooks = new \Requests_Hooks(); $hooks->register('curl.after_request', function ($headers, $info) { self::$info = !is_array($info) ? [$info] : $info; }); return $hooks; } private static function setOptions($options) { if (!isset($options['hooks'])) { $options['hooks'] = self::getHooks(); } $options['idn'] = false; return $options; } private static function call($name, $args) { try { $type = $name; if ('request' === $name) { $type = isset($args[3]) ? $args[3] : $name; } $type = strtolower($type); //设置url if ($type !== 'request_multiple') { $args[0] = new RequestsIRI($args[0]); } $response = call_user_func_array(['Requests', $name], $args); if ($response instanceof \Requests_Response && is_object($response->url)) { $response->url = $response->url->uri; } return $response; } catch (\Exception $e) { \Log::error($e->getMessage()); $response = new \Requests_Response(); $response->body = ''; return $response; } } }
//业务代码 // www.cwl.gov.cn 请求头 private $cwlHeader = [ 'X-Requested-With'=> 'XMLHttpRequest', "Referer" => "http://www.cwl.gov.cn/kjxx/" ]; protected function doGet($url, $urlParams = [], $headers = [], $options = []) { $response = \app\lucky\common\Http::request($url, $headers, $urlParams, 'GET', $options); return $response->body; } /** * @param $num * @return array * @description 按期号爬(福彩3D) */ public function fcsdCrawlerByNum($num){ $url = "http://www.cwl.gov.cn/cwl_admin/kjxx/findKjxx/forIssue?name=3d&code=".$num; $data = $this->doGet($url,[],$this->cwlHeader); $data = json_decode($data,true); if($data["state"] != _MSG_SUCCESS){ Redis::getInstance()->redisHSetTrue(_REDIS_CRAWLER_STATUS,'fcsdCrawlerByNum'); \Log::warning("现在已经爬不到 福彩3D 开奖结果了(中国福利彩票发行管理中心官方网站 www.cwl.gov.cn) fcsdCrawlerByNum()", []); return ["code"=>_MSG_SYSTEM_ERROR, "msg"=>$data["message"]]; } Redis::getInstance()->redisHSetFalse(_REDIS_CRAWLER_STATUS,'fcsdCrawlerByNum'); return $this->fcsdDataFormat($data["result"]); }
另外一种的业务逻辑PHP代码块
/** * @param $lotteryId * @return array * @description 地方彩往期爬取 */ public function dfCrawlerM500List($lotteryId) { $id500 = $this->fu51ToWb[$lotteryId]; $url = "https://m.500.com/info/kaijiang/moreexpect/" . $id500 . "/?from="; $result = $this->doGet($url, [], $this->m500Header); $encode = mb_detect_encoding($result, array("ASCII", 'UTF-8', "GB2312", "GBK", 'BIG5')); $result = mb_convert_encoding($result, 'UTF-8', $encode); $pattern = '/<div.*?>.*?([0-9]{5,10}).*?<em>([0-9\-]{10}).*?<\/em>\s*<\/div>\s*<div.*?>\s*<ul.*?>(.*?)<\/ul>\s*<\/div>/ism'; // if (preg_match_all($pattern, $result, $matches)) { unset($matches[0]); $data = []; foreach ($matches[1] as $key => $value) { if (preg_match_all('/<li.*?>(.*?)<\/li>/ism', $matches[3][$key], $res)) { unset($res[0]); $res = $res[1]; } else { continue; } $data[] = [ "lottery_id" => $lotteryId, "lottery_name" => $this->dfName[$lotteryId], "lottery_no" => $matches[1][$key], "lottery_date" => $matches[2][$key], "lottery_res" => $res, ]; } unset($matches); unset($result); } else { Redis::getInstance()->redisHSetTrue(_REDIS_CRAWLER_STATUS, 'dfCrawlerM500List'); \Log::warning("现在已经爬不到 地方彩 历史开奖结果了(500彩票网 m.500.com) dfCrawlerM500List()", []); return ["code" => _MSG_SYSTEM_ERROR, "msg" => "查询失败", "data" => []]; } Redis::getInstance()->redisHSetFalse(_REDIS_CRAWLER_STATUS, 'dfCrawlerM500List'); return ["code" => _MSG_SYSTEM_SUCCESS, "msg" => "查询成功", "data" => $data]; }
$pattern = '/<div.*?>.*?([0-9]{5,10}).*?<em>([0-9\-]{10}).*?<\/em>\s*<\/div>\s*<div.*?>\s*<ul.*?>(.*?)<\/ul>\s*<\/div>/ism'; //对于的html代码块 /* <div class="kaij-tit ">2020009期<em>2020-01-20 星期一</em> </div> <div class="kaij-jg "> <ul> <li class="red-ball" >3</li> <li class="red-ball" >2</li> <li class="red-ball" >2</li> <li class="red-ball" >5</li> <li class="red-ball" >8</li> </ul> </div> */
POST方法处理
/* * 球队球员转入 */ public function getPlayerShiftTo($teamId, $page_no, $page_count) { try { $uri = 'sports/team/player_shiftTo'; $data = Http::post($this->url . $uri, [], ['team_id' => $teamId, 'page_no' => $page_no, 'page_count' => $page_count]); $data = json_decode($data->body, true); if (empty($data)) { throw new \Exception('服务接口出现错误 -1'); } return $data; } catch (\Exception $e) { \Log::error($e->getMessage()); return ['code' => -1, 'msg' => $e->getMessage()]; } }