使用PHP获取网页内容的多种方法-阿里云开发者社区

使用PHP获取网页内容的多种方法

2026-02-28 61

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介： /** * 使用PHP获取网页内容的多种方法案例分享 */

<?php

/**

* 使用PHP获取网页内容的多种方法

$url = 'https://onehaoka.com/979642961.html';

$url = 'https://onehaoka.com/979642962.html';

$url = 'https://onehaoka.com/979642963.html';

$url = 'https://onehaoka.com/979642964.html';

$url = 'https://onehaoka.com/979642965.html';

$url = 'https://onehaoka.com/979642966.html';

$url = 'https://onehaoka.com/979642967.html';

$url = 'https://onehaoka.com/979642968.html';

// 方法1: 使用 file_get_contents (最简单的方法)

function getContentByFileGetContents($url) {

// 设置上下文选项，模拟浏览器访问

$context = stream_context_create([

'http' => [

'method' => 'GET',

'header' => [

'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',

'timeout' => 30,

'follow_location' => true,

'ssl' => [

'verify_peer' => false,

'verify_peer_name' => false,

]);

$content = @file_get_contents($url, false, $context);

return $content;

}

// 方法2: 使用 cURL (推荐的方法，功能更强大)

function getContentByCurl($url) {

$ch = curl_init();

// 设置cURL选项

curl_setopt($ch, CURLOPT_URL, $url);

curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

curl_setopt($ch, CURLOPT_TIMEOUT, 30);

curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);

curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);

// 设置请求头，模拟浏览器

curl_setopt($ch, CURLOPT_HTTPHEADER, [

'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',

]);

$content = curl_exec($ch);

if (curl_errno($ch)) {

echo 'cURL Error: ' . curl_error($ch) . "\n";

curl_close($ch);

return false;

}

$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

curl_close($ch);

if ($httpCode !== 200) {

echo "HTTP Error: " . $httpCode . "\n";

return false;

}

return $content;

}

// 方法3: 使用 Guzzle HTTP 客户端 (需要安装: composer require guzzlehttp/guzzle)

function getContentByGuzzle($url) {

// 请确保已安装 Guzzle: composer require guzzlehttp/guzzle

if (!class_exists('GuzzleHttp\Client')) {

echo "请先安装 Guzzle: composer require guzzlehttp/guzzle\n";

return false;

}

try {

$client = new \GuzzleHttp\Client([

'timeout' => 30,

'verify' => false,

'headers' => [

'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

]);

$response = $client->get($url);

return $response->getBody()->getContents();

} catch (Exception $e) {

echo 'Guzzle Error: ' . $e->getMessage() . "\n";

return false;

}

// ============ 使用示例 ============

echo "=== 使用 cURL 获取网页内容 ===\n";

$content = getContentByCurl($url);

if ($content !== false) {

echo "成功获取内容，长度: " . strlen($content) . " 字节\n\n";

// 保存到文件

file_put_contents('page_content.html', $content);

echo "内容已保存到 page_content.html\n";

// 显示前1000个字符

echo "\n=== 内容预览 (前1000字符) ===\n";

echo substr($content, 0, 1000);

echo "\n... (内容已截断)\n";

} else {

echo "获取内容失败\n";

}

// 备用：使用 file_get_contents

echo "\n=== 使用 file_get_contents 获取 ===\n";

$content2 = getContentByFileGetContents($url);

if ($content2 !== false) {

echo "file_get_contents 成功，长度: " . strlen($content2) . " 字节\n";

} else {

echo "file_get_contents 失败\n";

}

使用PHP获取网页内容的多种方法

热门文章

最新文章

相关电子书

探索云世界

热门

云计算

大数据

云原生

人工智能

数据库

开发与运维

活动广场

任务中心

训练营

直播

乘风者计划

下载

镜像站

技术资料

使用PHP获取网页内容的多种方法

热门文章

最新文章

相关电子书