using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Net;
using Ivony.Fluent;
using Ivony.Web.Html;
using Ivony.Web.Html.Parser;
using Ivony.Web.Html.HtmlAgilityPackAdaptor;
using HtmlAgilityPack;
namespace Html解析
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load( object sender, EventArgs e)
{
var client = new WebClient();
var html = client.DownloadString( " http://www.cnblogs.com/ ");
var parser = new JumonyParser();
var document = parser.Parse(html);
var links = document.Find( " a[href] ");
var baseUrl = new Uri( " http://www.cnblogs.com ");
var data = from hyperLink in links
let url = new Uri(baseUrl, hyperLink.Attribute( " href ").Value())
orderby url.AbsoluteUri
select new
{
Url = url.AbsoluteUri,
IsLinkingOut = !url.Host.EndsWith( " cnblogs.com "),
Target = hyperLink.Attribute( " target ").Value() ?? " _self "
};
GridView1.DataSource = data;
GridView1.DataBind();
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Net;
using Ivony.Fluent;
using Ivony.Web.Html;
using Ivony.Web.Html.Parser;
using Ivony.Web.Html.HtmlAgilityPackAdaptor;
using HtmlAgilityPack;
namespace Html解析
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load( object sender, EventArgs e)
{
var client = new WebClient();
var html = client.DownloadString( " http://www.cnblogs.com/ ");
var parser = new JumonyParser();
var document = parser.Parse(html);
var links = document.Find( " a[href] ");
var baseUrl = new Uri( " http://www.cnblogs.com ");
var data = from hyperLink in links
let url = new Uri(baseUrl, hyperLink.Attribute( " href ").Value())
orderby url.AbsoluteUri
select new
{
Url = url.AbsoluteUri,
IsLinkingOut = !url.Host.EndsWith( " cnblogs.com "),
Target = hyperLink.Attribute( " target ").Value() ?? " _self "
};
GridView1.DataSource = data;
GridView1.DataBind();
}
}
}
本文转自 jirigala 51CTO博客,原文链接:http://blog.51cto.com/2347979/1197289,如需转载请自行联系原作者