var request = require('request'); var jsdom = require("jsdom"); var JSDOM = jsdom.JSDOM; const PREFIX = "https://www.jianshu.com"; /* a given article: https://www.jianshu.com/p/963cd23fb092 value got from API: /p/5c1d0319dc42 */ var url = "https://www.jianshu.com/u/99b8712e8850?order_by=shared_at&page=2"; var pageOptions = { url: url, method: "GET", headers: { "Accept": "text/html" } }; function getArticles() { return new Promise(function(resolve,reject){ var requestC = request.defaults({jar: true}); requestC(pageOptions,function(error,response,body){ var document = new JSDOM(body).window.document; var content = document.getElementsByTagName("li"); for( var i =0; i < content.length; i++){ var li = content[i]; var children = li.childNodes; for( var j = 0; j < children.length; j++){ var eachChild = children[j]; if( eachChild.nodeName == "DIV"){ var grandChild = eachChild.childNodes; for( var k = 0; k < grandChild.length; k++){ var grand = grandChild[k]; if( grand.nodeName == "A"){ var fragment = grand.getAttribute("href"); if( fragment.indexOf("/p") < 0) continue; console.log("title: " + grand.text); var wholeURL = PREFIX + fragment; console.log("url: " + wholeURL); } } } } } }); }); } getArticles().then(function(token) { });
执行后的输出: