http://www.chinanews.com/rss/scroll-news.xml%22,//热点
" http://www.chinanews.com/rss/finance.xml%22,//财经
" http://www.chinanews.com/rss/sports.xml%22,//体育
" http://www.chinanews.com/rss/ent.xml%22,//娱乐
" http://www.chinanews.com/rss/health.xml%22,//健康
" http://www.chinanews.com/rss/auto.xml%22,//汽车
" http://www.chinanews.com/rss/society.xml%22,//社会
};
string[] urilist ={ "
" http://www.chinanews.com/rss/finance.xml%22,//财经
" http://www.chinanews.com/rss/sports.xml%22,//体育
" http://www.chinanews.com/rss/ent.xml%22,//娱乐
" http://www.chinanews.com/rss/health.xml%22,//健康
" http://www.chinanews.com/rss/auto.xml%22,//汽车
" http://www.chinanews.com/rss/society.xml%22,//社会
};
private
void ReadNews(
string uri,
ushort type)
{
try
{
string xml = NetHelper.ReadHtml(uri, Encoding.GetEncoding( " gb2312 "));
XmlDocument doc = new XmlDocument();
doc.LoadXml(xml);
Dictionary< ushort, NewsEntity> newslist = new Dictionary< ushort, NewsEntity>();
XmlNodeList list = doc.SelectNodes( " rss/channel/item ");
for ( int i = 0; i < list.Count; i++)
{
string title = System.Helpers.XmlHelper.GetChileNode(list[i], " title ").InnerText.Replace( " (图) ", "").Replace( " (组图) ", "").Replace( " (图) ", "");
string link = System.Helpers.XmlHelper.GetChileNode(list[i], " link ").InnerText;
string result = "";
int end = 0;
string html = NetHelper.ReadHtml(link, Encoding.Default);
int start = html.IndexOf( " <div class=left_zw> ");
if (start > 0)
end = html.IndexOf( " <!--正文--> ", start);
result = html.Substring(start, end - start);
int _end = 0;
int _start = result.IndexOf( @" <div id=""function_code_page""> ");
if (_start > 0)
_end = result.IndexOf( " </div> ", _start);
string pageStr = result.Substring(_start, _end - _start);
result = result.Replace(pageStr, "");
result = Regex.Replace(result, " \r ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " \n ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " <.*?> ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @" &(.{2,6}); ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " \r{2,} ", " \r ", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " \t{2,} ", " \t ", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @" \s{2,} ", "", RegexOptions.IgnoreCase);
Console.WriteLine(result);
result = result.Trim( ' \r ', ' \n ').TrimEnd();
if (! string.IsNullOrEmpty(title) && ! string.IsNullOrEmpty(result))
{
{
try
{
string xml = NetHelper.ReadHtml(uri, Encoding.GetEncoding( " gb2312 "));
XmlDocument doc = new XmlDocument();
doc.LoadXml(xml);
Dictionary< ushort, NewsEntity> newslist = new Dictionary< ushort, NewsEntity>();
XmlNodeList list = doc.SelectNodes( " rss/channel/item ");
for ( int i = 0; i < list.Count; i++)
{
string title = System.Helpers.XmlHelper.GetChileNode(list[i], " title ").InnerText.Replace( " (图) ", "").Replace( " (组图) ", "").Replace( " (图) ", "");
string link = System.Helpers.XmlHelper.GetChileNode(list[i], " link ").InnerText;
string result = "";
int end = 0;
string html = NetHelper.ReadHtml(link, Encoding.Default);
int start = html.IndexOf( " <div class=left_zw> ");
if (start > 0)
end = html.IndexOf( " <!--正文--> ", start);
result = html.Substring(start, end - start);
int _end = 0;
int _start = result.IndexOf( @" <div id=""function_code_page""> ");
if (_start > 0)
_end = result.IndexOf( " </div> ", _start);
string pageStr = result.Substring(_start, _end - _start);
result = result.Replace(pageStr, "");
result = Regex.Replace(result, " \r ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " \n ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " <.*?> ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @" &(.{2,6}); ", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " \r{2,} ", " \r ", RegexOptions.IgnoreCase);
result = Regex.Replace(result, " \t{2,} ", " \t ", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @" \s{2,} ", "", RegexOptions.IgnoreCase);
Console.WriteLine(result);
result = result.Trim( ' \r ', ' \n ').TrimEnd();
if (! string.IsNullOrEmpty(title) && ! string.IsNullOrEmpty(result))
{
}
}
}
}
catch
{
catch
{
}
}
}
本文转自94cool博客园博客,原文链接:http://www.cnblogs.com/94cool/archive/2011/10/20/2218576.html,如需转载请自行联系原作者