用vba程序从网页里抓取想要的内容,如下代码可抓取autopiano.cn网站上的所有曲谱和歌词:
Private Sub MacraGrabTunes() Dim iBegin, iEnd, iRow As Integer Dim t0, t1 As Single Dim Sign1, Sign2 As String Dim strHtml, strTitle, strTune As String Dim Web1 As Object Set Web1 = CreateObject("Msxml2.ServerXMLHTTP.3.0") Application.ScreenUpdating = False t0 = Timer For i = 1 To 5000 strURL = "https://www.autopiano.cn/post/" & i Web1.Open "GET", strURL, False On Error Resume Next Web1.Send strHtml = Web1.responseText If Err < 0 Then strHtml = "检测网址找不到或没返回信息!" MsgBox strHtml Else Sign1 = "<div class=""section-content"">" iBegin = InStr(strHtml, Sign1) If iBegin < 1 Then strHtml = "没找到歌谱" Else Sign1 = "<title>" Sign2 = "- 自由钢琴" iBegin = InStr(strHtml, Sign1) iEnd = InStr(strHtml, Sign2) strTitle = Mid(strHtml, iBegin + Len(Sign1), iEnd - iBegin - Len(Sign1) - 1) Sign1 = "<div class=""section-content"">" iBegin = InStr(strHtml, Sign1) strHtml = Mid(strHtml, iBegin - 1, Len(strHtml)) Sign2 = "</div>" iBegin = InStr(strHtml, Sign1) iEnd = InStr(strHtml, Sign2) strTune = Mid(strHtml, iBegin + Len(Sign1) + 1, iEnd - iBegin - Len(Sign1) - 1) Sign1 = "<div class=""section lyric-section"">" iBegin = InStr(strHtml, Sign1) If iBegin > 0 Then strHtml = Mid(strHtml, iBegin - 1, Len(strHtml)) Sign1 = "<div class=""section-content"">" iBegin = InStr(strHtml, Sign1) iEnd = InStr(strHtml, Sign2) strHtml = Mid(strHtml, iBegin + Len(Sign1) + 1, iEnd - iBegin - Len(Sign1) - 1) Else strHtml = "-NULL-" End If If Len(strHtml) < 4 Then strHtml = "-NULL-" iRow = Cells(Rows.Count, 1).End(xlUp).Row + 1 If iRow < 5 Then iRow = 5 Cells(iRow, 1) = i Cells(iRow, 2) = strTitle Cells(iRow, 3) = strTune Cells(iRow, 4) = strHtml t1 = Timer Do DoEvents Loop While Timer - t1 < 0.02 End If End If Next Cells.Replace What:="<p>=", Replacement:="'=", LookAt:=xlPart, SearchOrder _ :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False Cells.Replace What:="<p>", Replacement:="", LookAt:=xlPart, SearchOrder _ :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False Cells.Replace What:="</p>", Replacement:="", LookAt:=xlPart, SearchOrder _ :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False Cells.Replace What:="<br />", Replacement:="", LookAt:=xlPart, SearchOrder _ :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False Application.ScreenUpdating = True t1 = Timer MsgBox "结束,用时:" & t1 - t0 End Sub