asp数据采集
数据采集程序
'
On Error Resume Next
Server.Scripttimeout = 300
' ---------------------------------------------------------------------
' 采集数据
Function getHTTPData(url)
dim http
set http = Server.createobject( " Msxml2.XMLHTTP " )
if instr (url, " http:// " ) = 0 then url = " http:// " & url
Http.open " GET " ,url, false
Http.send()
if Http.Status <> 200 then exit function
getHTTPData = bytesToBSTR(Http.responseBody, " UTF-8 " )
set http = nothing
if err.number <> 0 then err.Clear
sCharset = ""
End function
' ---------------------------------------------------------------------
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject( " adodb.stream " )
objstream.Type = 1
objstream.Mode = 3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
' ---------------------------------------------------------------------
' 服务器登录
Function login(url)
dim http
set http = Server.createobject( " Msxml2.XMLHTTP " )
if instr (url, " http:// " ) = 0 then url = " http:// " & url
Http.open " GET " ,url, false
Http.send()
if Http.Status <> 200 then exit function
set http = nothing
if err.number <> 0 then err.Clear
End function
' ---------------------------------------------------------------------
' 正则替换
Function ReplaceText(fString,patrn, replStr)
Set regEx = New RegExp
regEx.Pattern = patrn
regEx.IgnoreCase = True
regEx.Global = True
ReplaceText = regEx.Replace(fString, replStr)
End Function
' ---------------------------------------------------------------------
' 去标签 包括内容
Function ReplaceTag(str, tag)
Set regEx = New RegExp
regEx.Pattern = " < " & tag & " [^>]*?>.*?<\/ " & tag & " > "
regEx.IgnoreCase = True
regEx.Global = True
ReplaceTag = regEx.Replace(str, "" )
End Function
' ---------------------------------------------------------------------
' 去标签 不包括内容
Function ReplaceTab(str, tag)
Set regEx = New RegExp
regEx.Pattern = " <\/? " & tag & " [^>]*> "
regEx.IgnoreCase = True
regEx.Global = True
ReplaceTab = regEx.Replace(str, "" )
End Function
' ---------------------------------------------------------------------
' 去标签属性 保留标签
Function ReplaceinnerTag(str, tag)
Set regEx = New RegExp
regEx.Pattern = " (<\/? " & tag & " )[^>]*> "
regEx.IgnoreCase = True
regEx.Global = True
ReplaceinnerTag = regEx.Replace(str, " $1> " )
End Function
' ---------------------------------------------------------------------
' 按正则取数据
Function getText(fString, patrn,n)
dim Matches, tStr
tStr = fString
Set re = New Regexp
re.IgnoreCase = True
re.Global = True
re.Pattern = patrn
set Matches = re.Execute(tStr)
set re = nothing
rStr = ""
For Each Match in Matches
rStr = Match.SubMatches(n)
exit for
Next
getText = rStr
End Function
' ---------------------------------------------------------------------
' 数据过滤
Function Encode_text(str)
If Isnull (str) Then
Encode_text = ""
Exit Function
End If
str = ReplaceText(str, " <\/?br[^>]*> " , vbCrlf )
str = ReplaceText(str, " <\/?p[^>]*> " , vbCrlf )
str = ReplaceTab(str, " [a-zA-Z] " )
str = ReplaceText(str, " \n\s*\r " , Chr ( 10 ) & Chr ( 13 ))
str = Replace (str, " & " , " & " )
str = Replace (str, " ; " , " ; " )
str = Replace (str, " & " , " & " )
str = Replace (str, Chr ( 34 ), " " " )
str = Replace (str, " ' " , " ' " )
str = Replace (str, " < " , " < " )
str = Replace (str, " > " , " > " )
str = Replace (str, " ( " , " ( " )
str = Replace (str, " ) " , " ) " )
str = Replace (str, " * " , " * " )
str = Replace (str, " % " , " % " )
str = Replace (str,vbCrlf, " <br/> " )
Encode_text = str
End Function
' ---------------------------------------------------------------------
' 通过Matches取数据
dim Matches
sub setMatches(str,sRe)
Set re = New Regexp
re.IgnoreCase = True
re.Global = True
re.Pattern = sRe
set Matches = re.Execute(str)
set re = nothing
end sub
' ---------------------------------------------------------------------
Server.Scripttimeout = 300
' ---------------------------------------------------------------------
' 采集数据
Function getHTTPData(url)
dim http
set http = Server.createobject( " Msxml2.XMLHTTP " )
if instr (url, " http:// " ) = 0 then url = " http:// " & url
Http.open " GET " ,url, false
Http.send()
if Http.Status <> 200 then exit function
getHTTPData = bytesToBSTR(Http.responseBody, " UTF-8 " )
set http = nothing
if err.number <> 0 then err.Clear
sCharset = ""
End function
' ---------------------------------------------------------------------
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject( " adodb.stream " )
objstream.Type = 1
objstream.Mode = 3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
' ---------------------------------------------------------------------
' 服务器登录
Function login(url)
dim http
set http = Server.createobject( " Msxml2.XMLHTTP " )
if instr (url, " http:// " ) = 0 then url = " http:// " & url
Http.open " GET " ,url, false
Http.send()
if Http.Status <> 200 then exit function
set http = nothing
if err.number <> 0 then err.Clear
End function
' ---------------------------------------------------------------------
' 正则替换
Function ReplaceText(fString,patrn, replStr)
Set regEx = New RegExp
regEx.Pattern = patrn
regEx.IgnoreCase = True
regEx.Global = True
ReplaceText = regEx.Replace(fString, replStr)
End Function
' ---------------------------------------------------------------------
' 去标签 包括内容
Function ReplaceTag(str, tag)
Set regEx = New RegExp
regEx.Pattern = " < " & tag & " [^>]*?>.*?<\/ " & tag & " > "
regEx.IgnoreCase = True
regEx.Global = True
ReplaceTag = regEx.Replace(str, "" )
End Function
' ---------------------------------------------------------------------
' 去标签 不包括内容
Function ReplaceTab(str, tag)
Set regEx = New RegExp
regEx.Pattern = " <\/? " & tag & " [^>]*> "
regEx.IgnoreCase = True
regEx.Global = True
ReplaceTab = regEx.Replace(str, "" )
End Function
' ---------------------------------------------------------------------
' 去标签属性 保留标签
Function ReplaceinnerTag(str, tag)
Set regEx = New RegExp
regEx.Pattern = " (<\/? " & tag & " )[^>]*> "
regEx.IgnoreCase = True
regEx.Global = True
ReplaceinnerTag = regEx.Replace(str, " $1> " )
End Function
' ---------------------------------------------------------------------
' 按正则取数据
Function getText(fString, patrn,n)
dim Matches, tStr
tStr = fString
Set re = New Regexp
re.IgnoreCase = True
re.Global = True
re.Pattern = patrn
set Matches = re.Execute(tStr)
set re = nothing
rStr = ""
For Each Match in Matches
rStr = Match.SubMatches(n)
exit for
Next
getText = rStr
End Function
' ---------------------------------------------------------------------
' 数据过滤
Function Encode_text(str)
If Isnull (str) Then
Encode_text = ""
Exit Function
End If
str = ReplaceText(str, " <\/?br[^>]*> " , vbCrlf )
str = ReplaceText(str, " <\/?p[^>]*> " , vbCrlf )
str = ReplaceTab(str, " [a-zA-Z] " )
str = ReplaceText(str, " \n\s*\r " , Chr ( 10 ) & Chr ( 13 ))
str = Replace (str, " & " , " & " )
str = Replace (str, " ; " , " ; " )
str = Replace (str, " & " , " & " )
str = Replace (str, Chr ( 34 ), " " " )
str = Replace (str, " ' " , " ' " )
str = Replace (str, " < " , " < " )
str = Replace (str, " > " , " > " )
str = Replace (str, " ( " , " ( " )
str = Replace (str, " ) " , " ) " )
str = Replace (str, " * " , " * " )
str = Replace (str, " % " , " % " )
str = Replace (str,vbCrlf, " <br/> " )
Encode_text = str
End Function
' ---------------------------------------------------------------------
' 通过Matches取数据
dim Matches
sub setMatches(str,sRe)
Set re = New Regexp
re.IgnoreCase = True
re.Global = True
re.Pattern = sRe
set Matches = re.Execute(str)
set re = nothing
end sub
' ---------------------------------------------------------------------
例子
'
例子
call setMatches(textcontent, re)
For Each Match in Matches
response.write Match.value
Next
call setMatches(textcontent, re)
For Each Match in Matches
response.write Match.value
Next
本文转自博客园cloudgamer的博客,原文链接:
asp数据采集,如需转载请自行联系原博主。