一. 先贴一张图,这个界面就是程序的主界面了:
二. 部分代码说明(主要讲解异步分析和下载):
异步分析下载采取的策略是同时分析同时下载,即未等待数据全部分析完毕就开始把已经分析出来的图片链接开始下载。下载成功的均在List框链接前面划上了√ ,未能下载的图片有可能是分析错误或者是下载异常。
1. 异步分析部分代码
/// 异步分析下载
/// </summary>
private void AsyncAnalyzeAndDownload( string url, string savePath)
{
this .uriString = url;
this .savePath = savePath;
#region 分析计时开始
count = 0 ;
count1 = 0 ;
freq = 0 ;
result = 0 ;
QueryPerformanceFrequency( ref freq);
QueryPerformanceCounter( ref count);
#endregion
using (WebClient wClient = new WebClient())
{
AutoResetEvent waiter = new AutoResetEvent( false );
wClient.Credentials = CredentialCache.DefaultCredentials;
wClient.DownloadDataCompleted += new DownloadDataCompletedEventHandler(AsyncURIAnalyze);
wClient.DownloadDataAsync( new Uri(uriString), waiter);
// waiter.WaitOne(); // 阻止当前线程,直到收到信号
}
}
/// <summary>
/// 异步分析
/// </summary>
protected void AsyncURIAnalyze(Object sender, DownloadDataCompletedEventArgs e)
{
AutoResetEvent waiter = (AutoResetEvent)e.UserState;
try
{
if ( ! e.Cancelled && e.Error == null )
{
string dnDir = string .Empty;
string domainName = string .Empty;
string uri = uriString;
// 获得域名 http://www.sina.com/
Match match = Regex.Match(uri, @" ((http(s)?://)?)+[\w-.]+[^/] " ); // , RegexOptions.IgnoreCase
domainName = match.Value;
// 获得域名最深层目录 http://www.sina.com/mail/
if (domainName.Equals(uri))
dnDir = domainName;
else
dnDir = uri.Substring( 0 , uri.LastIndexOf( ' / ' ));
dnDir += ' / ' ;
// 获取数据
string pageData = Encoding.UTF8.GetString(e.Result);
List < string > urlList = new List < string > ();
// 匹配全路径
match = Regex.Match(pageData, @" ((http(s)?://)?)+(((/?)+[\w-.]+(/))*)+[\w-./]+\.+( " + ImageType + " ) " ); // , RegexOptions.IgnoreCase
while (match.Success)
{
string item = match.Value;
// 短路径处理
if (item.IndexOf( " http:// " ) == - 1 && item.IndexOf( " https:// " ) == - 1 )
item = (item[ 0 ] == ' / ' ? domainName : dnDir) + item;
if ( ! urlList.Contains(item))
{
urlList.Add(item);
imgUrlList.Add(item);
// 实时显示分析结果
AddlbShowItem(item);
// 边分析边下载
WebRequest hwr = WebRequest.Create(item);
hwr.BeginGetResponse( new AsyncCallback(AsyncDownLoad), hwr);
// hwr.Timeout = "0x30D40"; // 默认 0x186a0 -> 100000 0x30D40 -> 200000
// hwr.Method = "POST";
// hwr.ContentType = "application/x-www-form-urlencoded";
// hwr.MaximumAutomaticRedirections = 3;
// hwr.Accept ="image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
// hwr.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
// IAsyncResult iar = hwr.BeginGetResponse(new AsyncCallback(AsyncDownLoad), hwr);
// iar.AsyncWaitHandle.WaitOne();
}
match = match.NextMatch();
}
}
}
finally
{
waiter.Set();
#region 分析计时结束
QueryPerformanceCounter( ref count1);
count = count1 - count;
result = ( double )(count) / ( double )freq;
toolStripStatusLabel1.Text = " 分析完毕! " ;
toolStripStatusLabel2.Text = string .Format( " | 分析耗时:{0}秒 " , result);
Application.DoEvents();
#endregion
// 分析完毕
isAnalyzeComplete = true ;
}
}
这两个方法主要是用WebClient来请求然后异步获得网址所返回的数据并对数据分析,提取图片链接,提取主要有两种方式:一种是完整路径的图片链接;一种是短路径的链接,比如/images/bg.gif,程序会自动为其加上域名部分组成完整的链接。
2. 异步下载部分代码
/// 异步接受数据
/// </summary>
/// <param name="asyncResult"></param>
public void AsyncDownLoad(IAsyncResult asyncResult)
{
#region 下载计时开始
if (cfreq == 0 )
{
QueryPerformanceFrequency( ref cfreq);
QueryPerformanceCounter( ref ccount);
}
#endregion
WebRequest request = (WebRequest)asyncResult.AsyncState;
string url = request.RequestUri.ToString();
try
{
WebResponse response = request.EndGetResponse(asyncResult);
using (Stream stream = response.GetResponseStream())
{
Image img = Image.FromStream(stream);
string [] tmpUrl = url.Split( ' . ' );
img.Save( string .Concat(savePath, " / " , DateTime.Now.ToString( " yyyyMMddHHmmssfff " ), " . " , tmpUrl[tmpUrl.Length - 1 ]));
img.Dispose();
stream.Close();
}
allDone.Set();
// 从未下载的列表中删除已经下载的图片
imgUrlList.Remove(url);
// 更新列表框
int indexItem = this .lbShow.Items.IndexOf(url);
if (indexItem >= 0 && indexItem <= this .lbShow.Items.Count)
SetlbShowItem(indexItem);
}
catch (Exception)
{
imgUrlList.Remove(url);
}
}
这部分就是异步下载图片并保存的代码,调用部分请看AsyncURIAnalyze方法分析图片链接匹配成功后就开始进行图片下载,每下载完一张图片就更新显示在界面正下方List框内(在链接前标记√ )。
篇幅有限,还有一起其他重要的代码如 实时显示分析和下载结果 的代码请下载源代码查看。另外需要注意的是输入需要下载图片的网址的时候需要输入完整的链接,带http如http://www.sina.com/ 。
程序和代码:
exe可执行文件:http://files.cnblogs.com/over140/IBD_exe.rar
源代码:http://files.cnblogs.com/over140/ImagesBatchDownloading2008-8-21.rar
本文转自博客园农民伯伯的博客,原文链接:笨笨图片批量抓取下载 V0.2 beta[C# | WinForm | 正则表达式 | HttpWebRequest | Async异步编程],如需转载请自行联系原博主。