分享26个ASP江湖论坛源码,总有一款适合您
26个ASP江湖论坛源码下载链接:https://pan.baidu.com/s/1WMbd5oulqC7febJ4E9tN8g?pwd=itph
提取码:itph
Python采集代码下载链接:采集代码.zip - 蓝奏云
import os import shutil import requests from bs4 import BeautifulSoup from framework.base.BaseFrame import BaseFrame from sprider.access.SpriderAccess import SpriderAccess from sprider.plugin.SpriderTools import SpriderTools from sprider.plugin.UserAgent import UserAgent class CNSourceCoder: base_url = "http://www.downcode.com" max_pager = 10 # 每页的数量 影响再次采集的起始位置. word_image_count = 5 # word插入图片数量 同时也是明细采集图片和描述的数量 page_end_number = 0 word_content_list = [] haved_sprider_count = 0 # 已经采集的数量 sprider_detail_index = 0 # 明细方法采集的数量 累加
VieBoard 2.50汉化修正版本 Build20021017
网唯论坛 2.0 Build 0920
八闽社区完美版
动网先锋论坛正式版
开发者资源社区(DevelopRes.Com) V2.0
雨水情轩江湖xp商业正式版
动网论坛 Ver5.0 Final 正式版
动网论坛0519 SQL无错版
清风论坛build0828
动网先锋Sql日志清除器 V1.1
动网论坛0519 SP3
BBSXP星空无限加强版
动网论坛0519安全版(0519+sp1+sp2+sp3)
正版阿男世纪江湖6.83版
动网论坛0519sql版(储存过程修改版)
武夷月夏论坛Build0608
最新版彬飞家园论坛
动网论坛V5.0 Final版
上校论坛 P.C.F0618
CBBS(陈氏论坛) 2002 V1.5build0115 纪念版
动网论坛2000(怀旧版)
有风的日子设计论坛
先锋江湖3.0完美修改版
ASP+Flash BBS系统
动网论坛0519完美版(0519+SP1+SP2)
第四代BBS系统-BBSXP1.1版build0723
极限论坛GB6000+u0720非官方免FSO完整版
BBSXP1.0英文版
战士网盟社区SQL版
雪人论坛程序 V1.42 B0801
动网论坛SQL日志清除器
def sprider(self,sprider_name,sprider_count,start_number, pager_number,sprider_type,is_show_browse,root_path, first_column_name,second_column_name,is_record_db): """ http://www.downcode.com/sort/j_1_2_1.shtml :param sprider_name: :return: """ self.first_folder_name=sprider_type.upper() self.base_path=root_path self.first_column_name = first_column_name self.second_column_name = second_column_name self.sprider_start_count=start_number self.is_record_db=is_record_db BaseFrame().debug("开始采集[源码下载站]" + self.first_folder_name + "源码...") BaseFrame().right("本次采集参数:sprider_count(采集数量):" + str(sprider_count) + "") BaseFrame().right("本次采集参数:sprider_name(采集名称):" + sprider_name + "") sprider_url = self.base_url + "/{0}/{1}_1.shtml".format(self.first_column_name, self.second_column_name)# 根据栏目构建URL BaseFrame().debug("本次采集参数:sprider_url:" + sprider_url) self.second_folder_name = str(sprider_count) + "个" + sprider_name # 二级目录也是wordTitle self.merchant = int(self.sprider_start_count) // int(self.max_pager) + 1 # 起始页码用于效率采集 self.file_path = self.base_path + os.sep + self.first_folder_name + os.sep + self.second_folder_name BaseFrame().right("本次采集参数:file_path=" + self.file_path + "") # 浏览器的下载路径 self.down_path = self.base_path + os.sep + self.first_folder_name+ os.sep + self.second_folder_name+ "\\Temp\\" BaseFrame().right("本次采集参数:down_path=" + self.down_path + "") # First/PPT/88个动态PPT模板/动态PPT模板 self.save_path = self.base_path + os.sep + self.first_folder_name + os.sep + self.second_folder_name + os.sep + sprider_name BaseFrame().right("本次采集参数:save_path=" + self.save_path + "") if os.path.exists(self.down_path) is True: shutil.rmtree(self.down_path) if os.path.exists(self.down_path) is False: os.makedirs(self.down_path) if os.path.exists(self.save_path) is True: shutil.rmtree(self.save_path) if os.path.exists(self.save_path) is False: os.makedirs(self.save_path) response = requests.get(sprider_url, timeout=10, headers=UserAgent().get_random_header(self.base_url)) response.encoding = 'gb2312' soup = BeautifulSoup(response.text, "html5lib") #print(soup) element_list = soup.find_all('div', attrs={"class": 'j_text_sort_a'}) page_end_number = 1 page_end_url = "" page_end_title = soup.find("a", attrs={"title": '最后页'}) page_end_url = page_end_title.get("href") if page_end_url is None or page_end_url == "": page_end_number = 1 else: page_end_number = (page_end_url.split(".shtml")[0].split("_")[3]) self.page_count = self.merchant while self.page_count <= int(page_end_number): # 翻完停止 try: if self.page_count == 1: self.sprider_detail(element_list, page_end_number,sprider_count) pass else: if int(self.haved_sprider_count) == int(sprider_count): BaseFrame().debug("sprider采集到达数量采集停止...") BaseFrame().debug("开始写文章...") SpriderTools.builder_word(self.second_folder_name, self.word_content_list, self.file_path, self.word_image_count, self.first_folder_name) SpriderTools.copy_file(self.word_content_list, self.save_path, self.second_folder_name, self.file_path) SpriderTools.gen_passandtxt(self.second_folder_name, self.word_content_list, self.file_path) BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!") break next_url = self.base_url + "/{0}/{1}_{2}.shtml".format(self.first_column_name, self.second_column_name, self.page_count) response = requests.get(next_url, timeout=10, headers=UserAgent().get_random_header(self.base_url)) response.encoding = 'gb2312' soup = BeautifulSoup(response.text, "html5lib") element_list = soup.find_all('div', attrs={"class": 'j_text_sort_a'}) self.sprider_detail(element_list, page_end_number,sprider_count) pass self.page_count = self.page_count + 1 # 页码增加1 except Exception as e: BaseFrame().error("sprider()执行过程出现错误:" + str(e))
最后送大家一首诗:
山高路远坑深,
大军纵横驰奔,
谁敢横刀立马?
惟有点赞加关注大军。