<?php
/**
* 使用PHP获取网页内容的多种方法
*/
$url = 'https://onehaoka.com/979642961.html';
$url = 'https://onehaoka.com/979642962.html';
$url = 'https://onehaoka.com/979642963.html';
$url = 'https://onehaoka.com/979642964.html';
$url = 'https://onehaoka.com/979642965.html';
$url = 'https://onehaoka.com/979642966.html';
$url = 'https://onehaoka.com/979642967.html';
$url = 'https://onehaoka.com/979642968.html';
// 方法1: 使用 file_get_contents (最简单的方法)
function getContentByFileGetContents($url) {
// 设置上下文选项,模拟浏览器访问
$context = stream_context_create([
'http' => [
'method' => 'GET',
'header' => [
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
],
'timeout' => 30,
'follow_location' => true,
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
$content = @file_get_contents($url, false, $context);
return $content;
}
// 方法2: 使用 cURL (推荐的方法,功能更强大)
function getContentByCurl($url) {
$ch = curl_init();
// 设置cURL选项
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
// 设置请求头,模拟浏览器
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
]);
$content = curl_exec($ch);
if (curl_errno($ch)) {
echo 'cURL Error: ' . curl_error($ch) . "\n";
curl_close($ch);
return false;
}
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
echo "HTTP Error: " . $httpCode . "\n";
return false;
}
return $content;
}
// 方法3: 使用 Guzzle HTTP 客户端 (需要安装: composer require guzzlehttp/guzzle)
function getContentByGuzzle($url) {
// 请确保已安装 Guzzle: composer require guzzlehttp/guzzle
if (!class_exists('GuzzleHttp\Client')) {
echo "请先安装 Guzzle: composer require guzzlehttp/guzzle\n";
return false;
}
try {
$client = new \GuzzleHttp\Client([
'timeout' => 30,
'verify' => false,
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
],
]);
$response = $client->get($url);
return $response->getBody()->getContents();
} catch (Exception $e) {
echo 'Guzzle Error: ' . $e->getMessage() . "\n";
return false;
}
}
// ============ 使用示例 ============
echo "=== 使用 cURL 获取网页内容 ===\n";
$content = getContentByCurl($url);
if ($content !== false) {
echo "成功获取内容,长度: " . strlen($content) . " 字节\n\n";
// 保存到文件
file_put_contents('page_content.html', $content);
echo "内容已保存到 page_content.html\n";
// 显示前1000个字符
echo "\n=== 内容预览 (前1000字符) ===\n";
echo substr($content, 0, 1000);
echo "\n... (内容已截断)\n";
} else {
echo "获取内容失败\n";
}
// 备用:使用 file_get_contents
echo "\n=== 使用 file_get_contents 获取 ===\n";
$content2 = getContentByFileGetContents($url);
if ($content2 !== false) {
echo "file_get_contents 成功,长度: " . strlen($content2) . " 字节\n";
} else {
echo "file_get_contents 失败\n";
}