3. 1 获取所有打卡贴
首先关键字搜索,获取打卡贴列表
// 获取所有链接的JS string script = @"Array.from(document.getElementsByClassName('user-tabs user-tabs-search')[0].getElementsByClassName('long-text-title')).map(x => ( x.href));"; // 执行JS代码,返回一个JavascriptResponse JavascriptResponse response1 = await browser.EvaluateScriptAsync(script); dynamic arr = response1.Result; // 遍历结果集合,将所有URL存储到一个静态集合中 foreach (dynamic row in arr) { RecordManager.Urls.Add(row.ToString()); }
为了直观,弹出一个对话框,将URL显示出来:
GetCSDN getCSDN = new GetCSDN(); getCSDN.browser = browser; getCSDN.ShowDialog();
这里定义了一个静态类来控制分析操作。
public static class RecordManager { // 是否开始分析 public static bool IsStart { get; set; } = false; // 分析到第几页 public static int Index { get; set; } = -1; // 获取下一页地址 public static string GetNextUrl() { Index = Index + 1; if (Urls.Count == Index) { return ""; } return Urls[Index]; } // URL列表 public static List<string> Urls = new List<string>(); // 获取到的打卡记录列表 public static List<Record> RecordList { get; set; } = new List<Record>(); // 分析完之后的回调函数 public static Action callback; }
3.2 逐一打开各个页面并获取结果
上一图右边做了个开始按钮,点击将逐一开始分析。
系统提供了多种Handler用于浏览器各种操作的处理,这里要分析请求操作,所以用到RequestHandler
public class CustomRequestHandler: RequestHandler { protected override IResourceRequestHandler GetResourceRequestHandler(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, bool isNavigation, bool isDownload, string requestInitiator, ref bool disableDefaultHandling) { return new CustomResourceRequestHandler(); } }
主要是指定了另一个专门用于处理请求的CustomResourceRequestHandler,二者关系如下图,后者才是分析的主角:
public class CustomResourceRequestHandler : ResourceRequestHandler { private readonly MemoryStream memoryStream = new MemoryStream(); protected override IResponseFilter GetResourceResponseFilter(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, IResponse response) { // 将请求响应结果放到MemoryStream中 return new CefSharp.ResponseFilter.StreamResponseFilter(memoryStream); } protected override void OnResourceLoadComplete(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, IResponse response, UrlRequestStatus status, long receivedContentLength) { // 只分析打卡列表的请求结果 if (!RecordManager.IsStart || !(request.Url.ToLower().StartsWith("https://bizapi.csdn.net/community-cloud/v1/community/task/list") && request.Method.ToLower().Equals("get"))) { return; } var bytes = memoryStream.ToArray(); string pages = string.Empty; string page = request.Url.Substring(request.Url.IndexOf("page=") + 5, 1); var str = System.Text.Encoding.UTF8.GetString(bytes); JObject obj = (Newtonsoft.Json.Linq.JObject)Newtonsoft.Json.JsonConvert.DeserializeObject(str); List<Record> list = (List<Record>)obj["data"]["finish"]["list"].ToObject(typeof(List<Record>)); pages = obj["data"]["finish"]["pages"].ToString(); if (list.Count > 0) { RecordManager.RecordList.AddRange(list); } if (pages.Equals(page)) { string url = RecordManager.GetNextUrl(); if (string.IsNullOrEmpty(url)) { RecordManager.callback(); } else { // 休息5秒,再请求下一篇文章 Thread.Sleep(5000); browser.MainFrame.LoadUrl(url); } } else { // 休息5秒,再请求下一页 Thread.Sleep(5000); string js = $"document.getElementsByClassName('number')[{int.Parse( page)}].click();"; browser.MainFrame.ExecuteJavaScriptAsync(js); } } }
3.3 展示结果
3.4 导出结果
讲结果导出到Excel:
//创建工作薄 var workbook = new HSSFWorkbook(); //创建表 var table = workbook.CreateSheet("data"); int i = 0; RecordManager.RecordList.ForEach(record => { var row = table.CreateRow(i); var cell = row.CreateCell(0); cell.SetCellValue(record.finishTime); var cell1 = row.CreateCell(1); cell1.SetCellValue(record.userName); var cell2 = row.CreateCell(2); cell2.SetCellValue(record.nickName); i++; }); using (var fs = File.OpenWrite(@"d:/test/1.xls")) { workbook.Write(fs); //向打开的这个xls文件中写入mySheet表并保存。 Console.WriteLine("生成成功"); }