cheerio语法类似jQuery
doc
安装
npm i cheerio
代码实例
const cheerio = require("cheerio"); const doc = cheerio.load('<h2 class="title">Hello world</h2>', { xmlMode: true, decodeEntities: false }); doc("h2.title").text("Hello there!"); doc("h2").addClass("welcome"); console.log(doc.xml()); // <h2 class="title welcome">Hello there!</h2>
项目实战
import cheerio from "cheerio"; /** * 将外链图片转为本站连接 * @param {*} html * @returns */ export async function replaceImage(html) { const doc = cheerio.load(html, { xmlMode: true, decodeEntities: false }); let elems = []; // each不等待promise doc("img").each(function(index, elem) { elems.push(doc(this)); }); for (let elem of elems) { let src = elem.attr("src"); if (src && src.indexOf(process.env.VUE_APP_BASE_URL) == -1) { // 修改为自己的替换方法 let imageSrc = await saveImage(src); if (imageSrc) { elem.attr("src", imageSrc); } } } return doc.xml(); } /** * 提取图片连接 * @param {*} html * @returns */ export function extractImages(html) { const doc = cheerio.load(html, { xmlMode: true, decodeEntities: false }); let images = []; doc("img").each(function(index, elem) { let src = doc(this).attr("src"); if (src) { images.push(src); } }); return images; } /** * 移除style属性 * @param {*} html * @returns */ export function removeStyle(html) { const doc = cheerio.load(html, { xmlMode: true, decodeEntities: false }); doc("*[style]").removeAttr("style"); return doc.xml(); }
xml和html
const cheerio = require("cheerio"); const doc = cheerio.load("<a></a>"); // xml模式输出,a标签被处理成自闭合标签 console.log(doc.xml()); // <html><head/><body><a/></body></html> // html格式输出,a标签没有被处理 console.log(doc.html()); // <html><head></head><body><a></a></body></html>
如果只是使用html片段,可以自己处理html返回后的结果
const cheerio = require("cheerio"); function getDom(html) { return cheerio.load(html); } function toHtml(doc) { // 将生成文本多余的标签去除 let html = doc.html(); let pattern = /<html><head><\/head><body>([\s\S]*)<\/body><\/html>/; let res = html.match(pattern); return res[1]; } console.log(toHtml(getDom("<a></a>"))); // <a></a>