该App已经上传到百度应用市场:http://shouji.baidu.com/soft/item?docid=8928185&from=as&f=search_app_it%E8%B5%84%E8%AE%AF%40list_1_image%402%40header_all_input
有兴趣的可以下载看看。接下来我会公布源代码,不过该App并没有使用代码混淆,所以可以通过反编译清楚的看到源码。
CSDN 的博客文章 :
1.文章的信息:
public class NewsItem { private int id; //标题 private String title; //链接 private String link; //发布时间 private String date; //图片链接 private String picLink; //内容 private String content; //类型 private int newsType; public NewsItem() { } public int getId() { return id; } public void setId(int id) { this.id = id; } public int getNewsType() { return newsType; } public void setNewsType(int newsType) { this.newsType = newsType; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getPicLink() { return picLink; } public void setPicLink(String picLink) { this.picLink = picLink; } public String getDate() { return date; } public void setDate(String date) { this.date = date; } public String getLink() { return link; } public void setLink(String link) { this.link = link; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } }
抓取文章的:
** * Created by Administrator on 2015/11/13. * 处理NewItem的业务类 */ public class NewsItemBiz { public List<NewsItem> getNewsItems(int newTypes, int currentPage) throws CommonExecption { String urlStr = URLUtil.generateUrl(newTypes, currentPage); String htmlStr = DataUtil.doGet(urlStr, "UTF-8"); List<NewsItem> newsItems = new ArrayList<NewsItem>(); NewsItem newsItem = null; Document doc = Jsoup.parse(htmlStr); Elements units = doc.getElementsByClass("unit"); for (int i = 0; i < units.size(); i++) { newsItem = new NewsItem(); newsItem.setNewsType(newTypes); Element unit_ele = units.get(i); Element h1_ele = unit_ele.getElementsByTag("h1").get(0); Element h1_a_ele = h1_ele.child(0); String title = h1_a_ele.text(); title = AppUtil.encoding(title,"utf-8"); String hred = h1_a_ele.attr("href"); hred = AppUtil.encoding(hred,"utf-8"); newsItem.setLink(hred); newsItem.setTitle(title); Element h4_ele = unit_ele.getElementsByTag("h4").get(0); Element ago_ele = h4_ele.getElementsByClass("ago").get(0); String date = ago_ele.text(); date = AppUtil.encoding(date,"utf-8"); newsItem.setDate(date); Element d1_ele = unit_ele.getElementsByTag("dl").get(0); Element dt_ele = d1_ele.child(0); try { Element img_ele = dt_ele.child(0); String imgLink = img_ele.child(0).attr("src"); // System.out.println("link--------"+imgLink); imgLink = AppUtil.encoding(imgLink,"utf-8"); newsItem.setPicLink(imgLink); } catch (IndexOutOfBoundsException e) { e.printStackTrace(); } Element content_ele = d1_ele.child(1); String content = content_ele.text(); content = AppUtil.encoding(content,"utf-8"); newsItem.setContent(content); newsItems.add(newsItem); } return newsItems; } public NewsDto getNews(String urlStr) throws CommonExecption { NewsDto newsDto = new NewsDto(); List<News> newses = new ArrayList<>(); String htmlStr = DataUtil.doGet(urlStr, "UTF-8"); Document doc = Jsoup.parse(htmlStr); // 获得文章中的第一个detail // System.out.println(htmlStr); Element detailEle = doc.select(".left .detail").get(0); // 标题 Element titleEle = detailEle.select("h1.title").get(0); News news = new News(); String title = titleEle.text(); title = AppUtil.encoding(title,"utf-8"); news.setTitle(title); news.setType(Constant.TITLE); newses.add(news); // 摘要 Element summaryEle = detailEle.select("div.summary").get(0); news = new News(); String summary = summaryEle.text(); summary = AppUtil.encoding(summary,"utf-8"); news.setSumary(summary); news.setType(Constant.SUMMARY); newses.add(news); // 内容 Element contentEle = detailEle.select("div.con.news_content").get(0); Elements childrenEle = contentEle.children(); for (Element child : childrenEle) { Elements imgEles = child.getElementsByTag("img"); // 图片 if (imgEles.size() > 0) { for (Element imgEle : imgEles) { if (imgEle.attr("src").equals("")) continue; news = new News(); String imgLink = imgEle.attr("src"); imgLink = AppUtil.encoding(imgLink,"utf-8"); news.setImageLink(imgLink); news.setType(Constant.IMG); newses.add(news); } } // 移除图片 imgEles.remove(); if (child.text().equals("")) continue; news = new News(); news.setType(Constant.CONTENT); try { if (child.children().size() == 1) { Element cc = child.child(0); if (cc.tagName().equals("b")) { news.setType(Constant.BOLD_TITLE); } } } catch (IndexOutOfBoundsException e) { e.printStackTrace(); } String content = child.outerHtml(); content = AppUtil.encoding(content,"utf-8"); news.setContent(content); newses.add(news); } newsDto.setNewses(newses); return newsDto; } }
URL的处理:
public class URLUtil { public static final String NEWS_LIST_URL = "http://www.csdn.net/headlines.html"; public static final String NEWS_LIST_URL_YIDONG = "http://mobile.csdn.net/mobile"; public static final String NEWS_LIST_URL_YANFA = "http://sd.csdn.net/sd"; public static final String NEWS_LIST_URL_YUNJISUAN = "http://cloud.csdn.net/cloud"; public static final String NEWS_LIST_URL_ZAZHI = "http://programmer.csdn.net/programmer"; public static final String NEWS_LIST_URL_YEJIE = "http://news.csdn.net/news"; public static String generateUrl(int newType,int currentPage){ currentPage=currentPage>0 ? currentPage :1; String urlstr=""; switch (newType){ case Constant.NEW_TYPE_YEJIE: urlstr=NEWS_LIST_URL_YEJIE; break; case Constant.NEW_TYPE_YANFA: urlstr=NEWS_LIST_URL_YANFA; break; case Constant.NEW_TYPE_YUNJISUAN: urlstr=NEWS_LIST_URL_YUNJISUAN; break; case Constant.NEW_TYPE_YIDONG: urlstr=NEWS_LIST_URL_YIDONG; break; case Constant.NEW_TYPE_CHENGXUYUAN: urlstr=NEWS_LIST_URL_ZAZHI; break; default: urlstr=NEWS_LIST_URL; } urlstr +="/"+currentPage; return urlstr; } }
访问网络:
public class DataUtil { /** * 通过传入url链接访问网络,获取网页的html数据 * * @param urlstr * @return * @throws CommonExecption */ public static String doGet(String urlstr, String uncode) throws CommonExecption { StringBuffer sb = new StringBuffer(); try { /* URL url = new URL(urlstr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(5000); conn.setDoInput(true); conn.setDoOutput(true); */ HttpClient client = new HttpClient(); GetMethod getMethod = new GetMethod(urlstr); getMethod.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); getMethod.addRequestHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"); getMethod.addRequestHeader("Host","www.csdn.net"); getMethod.addRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0"); getMethod.addRequestHeader("Connection","keep-alive"); int state= client.executeMethod(getMethod); if (state== 200) { InputStream is = getMethod.getResponseBodyAsStream(); int len = 0; byte[] buf = new byte[1024]; while ((len = is.read(buf)) != -1) { sb.append(new String(buf, 0, len, uncode)); } is.close(); } else { throw new CommonExecption("访问网络失败"); } } catch (Exception e) { throw new CommonExecption("访问网络失败"); } return sb.toString(); } }
关于异常处理:
public class CommonExecption extends Exception{ public CommonExecption(){ super(); } public CommonExecption(String message,Throwable cause){ super(message,cause); } public CommonExecption(String message){ super(message); } public CommonExecption(Throwable casuse){ super(casuse); }
二 博客园
访问网络:
public class BlogHouseDataUtil { /** * 返回该链接地址的html数据 * * @param urlStr * @return * @throws */ public static String doGet(String urlStr,int currentPage,int newType) throws CommonExecption { StringBuffer sb = new StringBuffer(); try { HttpClient client=new HttpClient(); PostMethod post =new PostMethod(urlStr); switch(newType){ case Constant.NEWS_TYPE_HOME: post.addParameter("CategoryType", "SiteHome"); post.addParameter("CategoryId",String.valueOf(808)); post.addParameter("ItemListActionName", "PostList"); break; case Constant.NEWS_TYPE_PICK: post.addParameter("CategoryType", "Picked"); post.addParameter("CategoryId",String.valueOf(-2)); post.addParameter("ItemListActionName", "PostList"); break; case Constant.NEWS_TYPE_CANDIDATE: post.addParameter("CategoryType", "HomeCandidate"); post.addParameter("CategoryId",String.valueOf(108697)); post.addParameter("ItemListActionName", "PostList"); break; case Constant.NEWS_TYPE_NEWS: post.addParameter("CategoryType", "News"); post.addParameter("CategoryId",String.valueOf(-1)); post.addParameter("ItemListActionName", "NewsList"); break; } post.addParameter("PageIndex",String.valueOf(currentPage)); post.addParameter("ParentCategoryId",String.valueOf(0)); int state=client.executeMethod(post); if (state == 200) { InputStream is = post.getResponseBodyAsStream(); int len = 0; byte[] buf = new byte[1024]; while ((len = is.read(buf)) != -1) { sb.append(new String(buf, 0, len, "UTF-8")); } is.close(); } else { throw new CommonExecption("访问网络失败!"); } } catch (Exception e) { e.printStackTrace(); throw new CommonExecption("访问网络失败!"); } return sb.toString(); } }
URL处理
public class Blog_URLUtil { public static final String HOME_URL="http://www.cnblogs.com/#p"; //首页 public static final String PICK_URL="http://www.cnblogs.com/pick/#p"; //精华 public static final String CANDIDATE_URL="http://www.cnblogs.com/candidate/#p"; //候选 public static final String NEWS_URL="http://www.cnblogs.com/news/#p"; //新闻 /** * 根据文章类型,和当前页码生成url * @param newsType * @param currentPage * @return */ public static String generateUrl(int newsType, int currentPage) { currentPage = currentPage > 0 ? currentPage : 1; String urlStr = ""; switch (newsType) { case Constant.NEWS_TYPE_HOME: urlStr = HOME_URL; break; case Constant.NEWS_TYPE_PICK: urlStr = PICK_URL; break; case Constant.NEWS_TYPE_CANDIDATE: urlStr = CANDIDATE_URL; break; case Constant.NEWS_TYPE_NEWS: urlStr = NEWS_URL; break; default: break; } urlStr += ""+currentPage; return urlStr; } }
抓取文章 :
public class NewItemBlogHouse { public List<NewsItem> getNewsItems(int newsType, int currentPage) throws CommonExecption { String urlStr = Blog_URLUtil.generateUrl(newsType, currentPage); String htmlStr = BlogHouseDataUtil.doGet(urlStr, currentPage, newsType); System.out.println("htmlStr------" + htmlStr); List<NewsItem> newsItems = new ArrayList<NewsItem>(); NewsItem newsItem = null; Document doc = Jsoup.parse(htmlStr); Elements units = doc.getElementsByClass("post_item_body"); // System.out.println("--------"+units.toString()); for (int i = 0; i < units.size(); i++) { newsItem = new NewsItem(); newsItem.setNewsType(newsType); Element unit_ele = units.get(i); Element h1_ele = unit_ele.getElementsByTag("h3").get(0); Element h1_a_ele = h1_ele.child(0); String title = h1_a_ele.text(); title = AppUtil.encoding(title, "utf-8"); String href = h1_a_ele.attr("href"); href = AppUtil.encoding(href, "utf-8"); newsItem.setLink(href); newsItem.setTitle(title); // System.out.println("href---------->"+href); // System.out.println("title---------->"+title); Element div_date = unit_ele.getElementsByTag("div").get(1); String date = div_date.text(); // String span_ele=p_ele.getElementsByTag("span").get(0).text(); // System.out.println("---------"+text); date = AppUtil.encoding(date, "utf-8"); newsItem.setDate(date); try {// 可能没有图片 Element div_pic = unit_ele.getElementsByTag("div").get(0); Element p_pic = div_pic.getElementsByTag("a").get(1); Element img = p_pic.child(0); String imgLink = img.attr("src"); // System.out.println(imgLink); imgLink = AppUtil.encoding(imgLink, "utf-8"); newsItem.setPicLink(imgLink); } catch (IndexOutOfBoundsException e) { System.out.println("没有图片"); } Element div_content = unit_ele.getElementsByTag("div").get(0); Element p_content = div_content.getElementsByTag("p").get(0); String content = p_content.text(); // System.out.println("--------"+content); content = AppUtil.encoding(content, "utf-8"); newsItem.setContent(content); newsItems.add(newsItem); } return newsItems; } }
三 51CTO
网络请求:
public class DataUtil { /** * 通过传入url链接访问网络,获取网页的html数据 * * @param urlstr * @return * @throws CommonExecption */ public static String doGet(String urlstr, String uncode) throws CommonExecption { StringBuffer sb = new StringBuffer(); try { /* URL url = new URL(urlstr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(5000); conn.setDoInput(true); conn.setDoOutput(true); */ HttpClient client = new HttpClient(); GetMethod getMethod = new GetMethod(urlstr); getMethod.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); getMethod.addRequestHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"); getMethod.addRequestHeader("Host","www.csdn.net"); getMethod.addRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0"); getMethod.addRequestHeader("Connection","keep-alive"); int state= client.executeMethod(getMethod); if (state== 200) { InputStream is = getMethod.getResponseBodyAsStream(); int len = 0; byte[] buf = new byte[1024]; while ((len = is.read(buf)) != -1) { sb.append(new String(buf, 0, len, uncode)); } is.close(); } else { throw new CommonExecption("访问网络失败"); } } catch (Exception e) { throw new CommonExecption("访问网络失败"); } return sb.toString(); } }
URL处理
public class CTO_URLUtil { public static final String FIRST_URL="http://blog.51cto.com/artcommend"; public static final String NETWORK_URL="http://blog.51cto.com/artcommend/14"; //网络开发 public static final String DEVELOP_URL="http://blog.51cto.com/artcommend/8"; //开发技术������ public static final String ADMIN_URL="http://blog.51cto.com/artcommend/9"; //IT管理��� public static final String LIFE_URL="http://blog.51cto.com/artcommend/12"; //IT生活��� /** * 根据文章类型,和当前页码生成url * @param newsType * @param currentPage * @return */ public static String generateUrl(int newsType, int currentPage) { currentPage = currentPage > 0 ? currentPage : 1; String urlStr = ""; switch (newsType) { case Constant.NEWS_TYPE_NETWORK: urlStr = NETWORK_URL; break; case Constant.NEWS_TYPE_DEVELOPMENT: urlStr = DEVELOP_URL; break; case Constant.NEWS_TYPE_IT_ADMIN: urlStr = ADMIN_URL; break; case Constant.NEWS_TYPE_IT_LIFE: urlStr = LIFE_URL; break; default: break; } urlStr += "/" + currentPage; return urlStr; } }
抓取文章 :
public class NewItem51CTO { /** * 处理开发 网络管理 ,IT生活..... * * @param newsType * @param currentPage * @return * @throws CommonExecption */ public List<NewsItem> getNewsItems(int newsType, int currentPage) throws CommonExecption { String urlStr = CTO_URLUtil.generateUrl(newsType, currentPage); String htmlStr = DataUtil.doGet(urlStr, "GB2312"); List<NewsItem> newsItems = new ArrayList<>(); NewsItem newsItem = null; Document doc = Jsoup.parse(htmlStr); Elements units = doc.getElementsByClass("r_li"); for (int i = 0; i < units.size(); i++) { newsItem = new NewsItem(); newsItem.setNewsType(newsType); Element unit_ele = units.get(i); Element h1_ele = unit_ele.getElementsByTag("h4").get(0); Element h1_a_ele = h1_ele.child(0); String title = h1_a_ele.text(); title= AppUtil.encoding(title,"utf-8"); String href = h1_a_ele.attr("href"); href=AppUtil.encoding(href,"utf-8"); newsItem.setLink(href); newsItem.setTitle(title); // System.out.println(href); Element div_date = unit_ele.getElementsByTag("div").get(2); Element p_ele = div_date.getElementsByTag("p").get(0); String span_ele = p_ele.getElementsByTag("span").get(0).text(); // System.out.println("---------"+div_date.toString()); span_ele=AppUtil.encoding(span_ele,"utf-8"); newsItem.setDate(span_ele); try {// 可能没有图片 Element div_pic = unit_ele.getElementsByTag("div").get(1); Element p_pic = div_pic.getElementsByTag("a").get(0); Element img = p_pic.child(0); String imgLink = img.attr("src"); imgLink=AppUtil.encoding(imgLink,"utf-8"); newsItem.setPicLink(imgLink); } catch (IndexOutOfBoundsException e) { System.out.println("数组边界异常"); } Element div_content = unit_ele.getElementsByTag("div").get(1); Element p_content = div_content.getElementsByTag("p").get(0); String content = p_content.text(); content=AppUtil.encoding(content,"utf-8"); // System.out.println("--------"+content); newsItem.setContent(content); newsItems.add(newsItem); } return newsItems; } }
四 ITeye
网络请求 :
public class ITeyeDataUtil { /** * 返回该链接地址的html数据 * * @param urlStr * @return * @throws */ public static String doGet1(String urlStr,int currentPage,int newType,int useAgentNum) throws CommonExecption { StringBuffer sb = new StringBuffer(); try { URL url = new URL(urlStr); HttpURLConnection conn =(HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(8000); conn.setDoInput(true); conn.setDoOutput(true); System.out.println("code-----" + conn.getResponseCode()); // conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"); /** * 更换代理 */ String []useAgent=new String[]{"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11", " Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"}; switch(useAgentNum){ case 14: conn.setRequestProperty("User-Agent", useAgent[0]); break; case 15: conn.setRequestProperty("User-Agent", useAgent[1]); break; case 16: conn.setRequestProperty("User-Agent", useAgent[2]); break; case 17: conn.setRequestProperty("User-Agent", useAgent[3]); break; } if (conn.getResponseCode()==HttpURLConnection.HTTP_OK) { InputStream is =conn.getInputStream(); int len = 0; byte[] buf = new byte[1024]; while ((len = is.read(buf)) != -1) { sb.append(new String(buf, 0, len, "UTF-8")); } is.close(); } else { throw new CommonExecption("访问网络失败!"); } } catch (Exception e) { e.printStackTrace(); throw new CommonExecption("访问网络失败!"); } return sb.toString(); } public static String doGet(String urlStr) throws CommonExecption { StringBuffer sb = new StringBuffer(); try { HttpClient client=new HttpClient(); GetMethod getMethod=new GetMethod(urlStr); getMethod.addRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0"); getMethod.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); getMethod.addRequestHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"); getMethod.addRequestHeader("Host","www.iteye.com"); getMethod.addRequestHeader("Connection","keep-alive"); getMethod.addRequestHeader("Referer","http://www.iteye.com/news"); client.getParams().setParameter("http.protocol.cookie-policy", CookiePolicy.BROWSER_COMPATIBILITY); int state=client.executeMethod(getMethod); System.out.println("state2-----"+state); if (state==200) { InputStream is =getMethod.getResponseBodyAsStream(); BufferedInputStream buff=new BufferedInputStream(is); BufferedReader reader=null; reader=new BufferedReader(new InputStreamReader(getMethod.getResponseBodyAsStream())); String line = ""; while ((line=reader.readLine())!=null) { sb.append(line + "\n"); } is.close(); } else { throw new CommonExecption("访问网络失败"); } } catch (Exception e) { e.printStackTrace(); throw new CommonExecption("访问网络失败"); } return sb.toString(); } }
URL处理:
public class ITEYE_URLUtil { public static final String NEWS_URL="http://www.iteye.com/news?page="; //资讯 public static final String MAGAZINES_URL="http://www.iteye.com/magazines?page="; //精华 public static final String BLOG_URL="http://www.iteye.com/blogs?page="; //博客 public static final String SUBJECTS_URL="http://www.iteye.com/blogs/subjects?page="; //专栏 /** * 根据文章类型,和当前页码生成url * @param newsType * @param currentPage * @return */ public static String generateUrl(int newsType, int currentPage) { currentPage = currentPage > 0 ? currentPage : 1; String urlStr = ""; switch (newsType) { case Constant.NEWS_TYPE_NEW: urlStr = NEWS_URL; break; case Constant.NEWS_TYPE_MAGAZINES: urlStr = MAGAZINES_URL; break; case Constant.NEWS_TYPE_BLOGS: urlStr =BLOG_URL; break; case Constant.NEWS_TYPE_SUBJECTS: urlStr =SUBJECTS_URL; break; default: break; } urlStr +=currentPage; return urlStr; } }
抓取文章 :
public class NewItemITeye { public List<NewsItem> getNewsItems(int newsType, int currentPage, int useAgentNum) throws CommonExecption { String urlStr = ITEYE_URLUtil.generateUrl(newsType, currentPage); String htmlStr = ITeyeDataUtil.doGet1(urlStr, newsType, currentPage, 12); List<NewsItem> newsItems = new ArrayList<NewsItem>(); NewsItem newsItem = null; Document doc = Jsoup.parse(htmlStr); Elements units = doc.getElementsByClass("content"); for (int i = 0; i < units.size(); i++) { newsItem = new NewsItem(); newsItem.setNewsType(newsType); Element unit_ele = units.get(i); Element h3_ele = unit_ele.getElementsByTag("h3").get(0); //解析时间 Element span_ele = null; switch (newsType) { case Constant.NEWS_TYPE_NEW: Element a_ele = h3_ele.getElementsByTag("a").get(1); String title = a_ele.text(); title = AppUtil.encoding(title, "utf-8"); newsItem.setTitle(title); String href = a_ele.attr("href"); href = AppUtil.encoding(href, "utf-8"); StringBuffer sb = new StringBuffer(); sb.append("http://www.iteye.com").append(href); newsItem.setLink(sb.toString()); Element div_ele = unit_ele.getElementsByTag("div").get(3); if (div_ele.getElementsByTag("span").size() >= 3) { span_ele = div_ele.getElementsByTag("span").get(2); } else { span_ele = div_ele.getElementsByTag("span").get(1); } //获取图片链接 try {// 可能没有图片 Element img_ele = h3_ele.child(0); String imgLink = img_ele.attr("src"); // System.out.println(imgLink); imgLink = AppUtil.encoding(imgLink, "utf-8"); newsItem.setPicLink(imgLink); } catch (IndexOutOfBoundsException e) { System.out.println("没有图片"); } break; case Constant.NEWS_TYPE_BLOGS: Element a_ele1 = h3_ele.getElementsByTag("a").get(1); // System.out.println("a_ele----------->"+a_ele.toString()); String title1 = a_ele1.text(); title1 = AppUtil.encoding(title1, "utf-8"); newsItem.setTitle(title1); String href1 = a_ele1.attr("href"); href1 = AppUtil.encoding(href1, "utf-8"); newsItem.setLink(href1); Element div_ele2 = unit_ele.getElementsByTag("div").get(4); if (div_ele2.getElementsByTag("span").size() >= 3) { span_ele = div_ele2.getElementsByTag("span").get(4); } else { span_ele = div_ele2.getElementsByTag("span").get(1); } //获取图片链接 try {// 可能没有图片 Element img_ele = unit_ele.getElementsByTag("div").get(2); Element a1_ele = img_ele.getElementsByTag("a").get(0); Element img = a1_ele.getElementsByTag("img").get(0); String imgLink = img.attr("src"); imgLink = AppUtil.encoding(imgLink, "utf-8"); newsItem.setPicLink(imgLink); // System.out.println("img--------"+imgLink); } catch (IndexOutOfBoundsException e) { System.out.println("没有图片"); } break; case Constant.NEWS_TYPE_MAGAZINES: Element a_ele2 = h3_ele.getElementsByTag("a").get(0); String title2 = a_ele2.text(); title2 = AppUtil.encoding(title2, "utf-8"); newsItem.setTitle(title2); String href2 = a_ele2.attr("href"); href2 = AppUtil.encoding(href2, "utf-8"); StringBuffer sb_href = new StringBuffer(); sb_href.append("http://www.iteye.com").append(href2); //System.out.println("sb_href-------"+sb_href.toString()); newsItem.setLink(sb_href.toString()); Element div_ele3 = unit_ele.getElementsByTag("div").get(3); if (div_ele3.getElementsByTag("span").size() >= 3) { span_ele = div_ele3.getElementsByTag("span").get(2); } else { span_ele = div_ele3.getElementsByTag("span").get(1); } //获取图片链接 try {// 可能没有图片 Element img_ele = h3_ele.child(0); String imgLink = img_ele.attr("src"); // System.out.println(imgLink); imgLink = AppUtil.encoding(imgLink, "utf-8"); newsItem.setPicLink(imgLink); } catch (IndexOutOfBoundsException e) { System.out.println("没有图片"); } break; case Constant.NEWS_TYPE_SUBJECTS: Element a_ele3 = h3_ele.getElementsByTag("a").get(1); // System.out.println("a_ele----------->"+a_ele.toString()); String title3 = a_ele3.text(); title3 = AppUtil.encoding(title3, "utf-8"); newsItem.setTitle(title3); String href3 = a_ele3.attr("href"); href3 = AppUtil.encoding(href3, "utf-8"); Element a_ele_3 = h3_ele.getElementsByTag("a").get(1); // System.out.println("a_ele----------->"+a_ele.toString()); newsItem.setLink(href3); Element div_ele1 = unit_ele.getElementsByTag("div").get(4); if (div_ele1.getElementsByTag("span").size() >= 3) { span_ele = div_ele1.getElementsByTag("span").get(1); } else { span_ele = div_ele1.getElementsByTag("span").get(2); } //获取图片链接 try {// 可能没有图片 Element img_ele = unit_ele.getElementsByTag("div").get(2); Element a1_ele = img_ele.getElementsByTag("a").get(0); Element img = a1_ele.getElementsByTag("img").get(0); String imgLink = img.attr("src"); imgLink = AppUtil.encoding(imgLink, "utf-8"); newsItem.setPicLink(imgLink); } catch (IndexOutOfBoundsException e) { System.out.println("没有图片"); } break; } String date = span_ele.text(); date = AppUtil.encoding(date, "utf-8"); StringBuffer date_buffer = new StringBuffer(); date_buffer.append("发布于").append(" ").append(date); newsItem.setDate(date_buffer.toString()); Element h1_ele = unit_ele.getElementsByTag("div").get(1); String content = h1_ele.text(); content = AppUtil.encoding(content, "utf-8"); // System.out.println("h1_ele---------->"+content); // System.out.println("--------"+content); newsItem.setContent(content); newsItems.add(newsItem); } return newsItems; }