jsoup 效果
qq新闻 内容抓取 正则表达 (正则)
http://knight-black-bob.iteye.com/blog/2312411
比较 tika 和正则 ,我更喜欢jsoup
正则 比较难写 ,, ,,,,
下面有jar 包下载
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.9.2</version>
</dependency>
IteyeItemEntity [ userName=wosyingjun, userPicLink=http://www.iteye.com/upload/logo/user/1184026/fa9a8493-f9a7-3e3b-9630-12ad8f65d277-thumb.png?1467599214, userBlogLink=http://wosyingjun.iteye.com, title=推荐几个自己写的Java后端相关的范例项目, content=推荐几个自己写的范例项目 这里推荐几个自己写的范例项目,主要采用SSM(Spring+SpringMVC+Mybatis)框架,分布式架构采用的是(dubbo+zookeeper)。范例项目的好处是简单易懂,在架构一个新的项目的时候可以直接当成脚手架来用,方便快速开发,另外项目中涉及到以及未来可能涉及到的知识点都会不断完善。 三个项目是互相发展而来的,目前仍在不断完善中,依次为: ..., articleLink=http://wosyingjun.iteye.com/blog/2312553, seeNum=有2871人浏览, goodNum=7顶, badNum=0踩, insertTime=2016-07-21 09:04 ] //Connection connection = Jsoup.connect(url); //Document document = connection.get(); Document document = Jsoup.parse(data); Elements indexmain = document.select(".blog"); Iterator<Element> blogIter = indexmain.iterator(); IteyeItemEntity item = null; while (blogIter.hasNext()) { Element element = blogIter.next(); String userName = element.select(".content .blog_info a[title]").text(); String userPicLink = element.select(".content .logo img").attr("src"); String userBlogLink = element.select(".content .blog_info a").attr("href"); String title = element.select(".content h3 a[title]").text(); String content = element.select(".content div").iterator().next().text(); String articleLink = element.select(".content h3 a").last().attr("href"); String seeNum = element.select(".content .blog_info .view").text(); String goodNum = element.select(".content .blog_info .digged .digg").text(); String badNum = element.select(".content .blog_info .digged .bury").text(); String insertTime = element.select(".content .blog_info .date").text(); item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime); list.add(item);
package com.couriousby.iteyedemo.util; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import android.util.Log; import android.widget.Toast; import com.couriousby.iteyedemo.MyApplication; import com.couriousby.iteyedemo.entity.IteyeItemEntity; /** * @author baoyou E-mail:curiousby@163.com * @version 2016-7-22 上午10:58:49 * * desc: ... */ public class IteyeJsoupPerformer { public static List<IteyeItemEntity> getListIteyeEntity(String data){ List<IteyeItemEntity> list = new ArrayList<IteyeItemEntity>(); try{ //Connection connection = Jsoup.connect(url); //Document document = connection.get(); Document document = Jsoup.parse(data); Elements indexmain = document.select(".blog"); Iterator<Element> blogIter = indexmain.iterator(); IteyeItemEntity item = null; while (blogIter.hasNext()) { Element element = blogIter.next(); String userName = element.select(".content .blog_info a[title]").text(); String userPicLink = element.select(".content .logo img").attr("src"); String userBlogLink = element.select(".content .blog_info a").attr("href"); String title = element.select(".content h3 a[title]").text(); String content = element.select(".content div").iterator().next().text(); String articleLink = element.select(".content h3 a").last().attr("href"); String seeNum = element.select(".content .blog_info .view").text(); String goodNum = element.select(".content .blog_info .digged .digg").text(); String badNum = element.select(".content .blog_info .digged .bury").text(); String insertTime = element.select(".content .blog_info .date").text(); item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime); list.add(item); } }catch(Exception e){ e.printStackTrace(); } return list; }
package com.couriousby.iteyedemo.util; import java.util.ArrayList; import java.util.List; import com.couriousby.iteyedemo.R; import com.couriousby.iteyedemo.entity.GridEntity; /** * @author baoyou E-mail:curiousby@163.com * @version 2016-7-22 下午1:10:55 * * desc: ... */ public class Constants { final static String URL_BASE = "http://www.iteye.com/blogs"; public static String getIteyeCategory(int category ) { switch (category) { case 0: return ""; case 1: return "/category/mobile"; case 2: return "/category/web"; case 3: return "/category/architecture"; case 4: return "/category/language"; case 5: return "/category/internet"; case 6: return "/category/opensource"; case 7: return "/category/os"; case 8: return "/category/database"; case 9: return "/category/develop"; case 10: return "/category/industry"; case 11: return "/category/other"; default: return ""; } } public static String getIteyeUrl(int category,int page) { return URL_BASE + getIteyeCategory(category)+"?page="+page; } public static List<GridEntity> getGridItem() { List<GridEntity> list = new ArrayList<GridEntity>(); list.add(new GridEntity(0, "全部分类", R.drawable.iteye_all, 0)); list.add(new GridEntity(1, "移动开发", R.drawable.iteye_mobile, 1)); list.add(new GridEntity(2, "web前端", R.drawable.iteye_web, 2)); list.add(new GridEntity(3, "企业架构", R.drawable.iteye_architecture, 3)); list.add(new GridEntity(4, "编程语言", R.drawable.iteye_language, 4)); list.add(new GridEntity(5, "互联网", R.drawable.iteye_internet, 5)); list.add(new GridEntity(6, "开源软件", R.drawable.iteye_opensource, 6)); list.add(new GridEntity(7, "操作系统", R.drawable.iteye_os, 7)); list.add(new GridEntity(8, "数据库", R.drawable.iteye_database, 8)); list.add(new GridEntity(9, "研发管理", R.drawable.iteye_develop, 9)); list.add(new GridEntity(10, "行业应用", R.drawable.iteye_industry, 10)); list.add(new GridEntity(11, "非技术", R.drawable.iteye_other, 11)); return list; } }
package com.couriousby.iteyedemo.activity; import java.util.ArrayList; import java.util.List; import android.app.Activity; import android.content.Context; import android.content.Intent; import android.graphics.Color; import android.graphics.drawable.BitmapDrawable; import android.graphics.drawable.ColorDrawable; import android.os.Bundle; import android.view.View; import android.view.View.OnClickListener; import android.widget.AdapterView; import android.widget.AdapterView.OnItemClickListener; import android.widget.AdapterView.OnItemSelectedListener; import android.widget.LinearLayout.LayoutParams; import android.widget.GridView; import android.widget.PopupWindow; import android.widget.TextView; import com.couriousby.iteyedemo.MyApplication; import com.couriousby.iteyedemo.R; import com.couriousby.iteyedemo.adapter.IteyeAdapter; import com.couriousby.iteyedemo.adapter.IteyePopwindowGridListAdapter; import com.couriousby.iteyedemo.entity.GridEntity; import com.couriousby.iteyedemo.entity.IteyeItemEntity; import com.couriousby.iteyedemo.listener.OnIteyeGridViewItemclickListener; import com.couriousby.iteyedemo.quote.xlistview.MsgListView; import com.couriousby.iteyedemo.quote.xlistview.MsgListView.IXListViewListener; import com.couriousby.iteyedemo.request.event.IteyeStringHttpEvent; import com.couriousby.iteyedemo.request.event.base.RequestEvent; import com.couriousby.iteyedemo.request.http.IteyeHttpRequest; import com.couriousby.iteyedemo.util.Constants; import com.couriousby.iteyedemo.util.IteyeJsoupPerformer; import de.greenrobot.event.EventBus; public class IteyeMainActivity extends Activity implements IXListViewListener ,OnItemClickListener,OnClickListener,OnIteyeGridViewItemclickListener{ final static String ITEYE_DETAIL_URL = "iteye_detail_url"; private static int start = 1; private static int category = 0; private Context mContext; private MsgListView mListView; private List<IteyeItemEntity> mDataList; private IteyeAdapter mAdapter; private TextView mTopChooseBar; private PopupWindow mPopupWindow; private IteyePopwindowGridListAdapter gridAdapter; private GridView gridView; private List<GridEntity> mGridList; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.iteye_list); this.mContext = IteyeMainActivity.this; initUtils(); initView( ); initListeners(); EventBus.getDefault().register( this ); start =1; category=0; IteyeHttpRequest.getIteyeDate(category, start); } private void initUtils() { mDataList = new ArrayList<IteyeItemEntity>(); mAdapter = new IteyeAdapter(mContext); mAdapter.setmDataList(mDataList); mGridList = Constants.getGridItem(); gridAdapter = new IteyePopwindowGridListAdapter(mContext); gridAdapter.setOnIteyeGridViewItemclickListener(this); gridAdapter.setmList(mGridList); } private void initView() { mListView = (MsgListView) this.findViewById(R.id.qq_news_list); mListView.setAdapter(mAdapter); View baseView = View.inflate( this, R.layout.iteye_topbar, null ); mTopChooseBar = (TextView) this.findViewById( R.id.tv_iteye_topbar); mPopupWindow = new PopupWindow(baseView ,LayoutParams.MATCH_PARENT, LayoutParams.WRAP_CONTENT, false ); mPopupWindow.setBackgroundDrawable( new BitmapDrawable() ); mPopupWindow.setOutsideTouchable( true ); mPopupWindow.setFocusable( true ); gridView = (GridView) baseView.findViewById(R.id.iteye_gr_mlist); gridView.setAdapter(gridAdapter); } private void initListeners() { mTopChooseBar.setOnClickListener(this); mListView.setPullLoadEnable(true); mListView.setPullRefreshEnable(true); mListView.setXListViewListener(this); mListView.setAdapter(mAdapter); mListView.setOnItemClickListener(this); gridView.setSelector(new ColorDrawable(Color.TRANSPARENT)); } @Override public void onDestroy() { EventBus.getDefault().unregister( this ); super.onDestroy(); } public void onEventMainThread(RequestEvent requestEvent){ if(requestEvent instanceof IteyeStringHttpEvent){ IteyeStringHttpEvent event = (IteyeStringHttpEvent) requestEvent; switch(event.status){ case HTTP_ERROR: mListView.stopRefresh(); mListView.stopLoadMore(); mListView.setPullLoadEnable(false); break; case HTTP_START: { mListView.stopRefresh(); mListView.stopLoadMore(); String result = event.data; mAdapter.clearMDataList(); List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result); mAdapter.setmDataList(list); mAdapter.notifyDataSetChanged(); } break; case HTTP_SUCCESS: { mListView.stopRefresh(); mListView.stopLoadMore(); String result = event.data; mAdapter.clearMDataList(); List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result); mAdapter.addMDataList(list); mAdapter.notifyDataSetChanged(); } break; default: break; } }else{ } } @Override public void onItemClick(AdapterView<?> parent, View v, int position, long id) { IteyeItemEntity item = mAdapter.getItem(position - 1 ); if (item != null) { Intent msgIntent = new Intent(); Bundle bundle = new Bundle(); // Bundle的底层是一个HashMap<String, Object bundle.putString(IteyeMainActivity.ITEYE_DETAIL_URL, item.getArticleLink() ); msgIntent.putExtra("bundle", bundle); msgIntent.setClass(MyApplication.newInstance(), IteyeDetailActivity.class); startActivityForResult( msgIntent, 1000 ); } } @Override public void onRefresh() { start =1; mListView.setPullLoadEnable(true); mListView.setPullRefreshEnable(true); IteyeHttpRequest.getIteyeDate(category, start); } @Override public void onLoadMore() { start += 1; IteyeHttpRequest.getIteyeDate(category, start); } @Override public void onClick(View view) { switch (view.getId()) { case R.id.tv_iteye_topbar: if (mPopupWindow.isShowing()) { mPopupWindow.dismiss(); } else { mPopupWindow.showAsDropDown(view); } break; default: break; } } @Override public void OnIteyeGridViewItemclick(GridEntity item) { mTopChooseBar.setText(item.getName() ); mPopupWindow.dismiss(); start = 1; category = item.getId(); IteyeHttpRequest.getIteyeDate(category, start); } }
package com.couriousby.iteyedemo.request.http; import com.android.volley.Response.ErrorListener; import com.android.volley.Response.Listener; import com.android.volley.VolleyError; import com.android.volley.toolbox.StringRequest; import com.android.volley.toolbox.Volley; import com.couriousby.iteyedemo.MyApplication; import com.couriousby.iteyedemo.request.manager.IteyeHttpManager; import com.couriousby.iteyedemo.util.Constants; public class IteyeHttpRequest { public static void getIteyeDate(int catgory ,final int page){ String url = Constants.getIteyeUrl(catgory,page); StringRequest request = new StringRequest(url, new Listener<String>() { @Override public void onResponse(String response) { if (page ==1 ) IteyeHttpManager.getIteyeByPageFirst(response); else IteyeHttpManager.getIteyeByPage(response); } }, new ErrorListener() { @Override public void onErrorResponse(VolleyError error) { IteyeHttpManager.getIteyeByPageError(); } }) /* { @Override protected Response<String> parseNetworkResponse( NetworkResponse response) { String str = null; try { str = new String(response.data,"utf-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return Response.success(str, HttpHeaderParser.parseCacheHeaders(response)); } }*/ ; Volley.newRequestQueue(MyApplication.newInstance()).add(request); } }
捐助开发者
在兴趣的驱动下,写一个免费
的东西,有欣喜,也还有汗水,希望你喜欢我的作品,同时也能支持一下。 当然,有钱捧个钱场(右上角的爱心标志,支持支付宝和PayPal捐助),没钱捧个人场,谢谢各位。
谢谢您的赞助,我会做的更好!