53个ASP江湖论坛源码下载链接:https://pan.baidu.com/s/1FZ3LLkL2eeSCqXUuLfbPQQ?pwd=5x9x
提取码:5x9x
Python采集代码下载链接:采集代码.zip - 蓝奏云
LeadBBS V2.23 论坛完整版
西部论坛IT专业版
KINGBBS V2.0 免费版
LeadBBS V2.22 论坛
def sprider(self,sprider_name):
"""
ASP留言日记源码 http://www.downcode.com/sort/j_1_2_1.shtml
:param sprider_name:
:return:
"""
if sprider_name == "ASP聊天室源码":
self.first_column_name = "sort"
self.second_column_name = "j_1_1"
elif sprider_name=="ASP留言日记源码":
self.first_column_name = "sort"
self.second_column_name = "j_1_2"
elif sprider_name == "ASP江湖论坛源码":
self.first_column_name = "sort"
self.second_column_name = "j_1_3"
elif sprider_name == "ASP新闻文章源码":
self.first_column_name = "sort"
self.second_column_name = "j_1_4"
BaseFrame().debug("开始采集" + self.first_folder_name + "...")
BaseFrame().right("本次采集参数:日志路径=" + str(BaseFrame().LOG_PATH) + "")
BaseFrame().right("本次采集参数:sprider_count=" + str(self.sprider_count) + "")
BaseFrame().right("本次采集参数:sprider_name=" + sprider_name + "")
sprider_url = self.base_url + "/{0}/{1}_1.shtml".format(self.first_column_name, self.second_column_name)# 根据栏目构建URL
BaseFrame().debug("本次采集参数:sprider_url=:" + sprider_url)
self.second_folder_name = str(self.sprider_count) + "个" + sprider_name # 二级目录也是wordTitle
self.merchant = int(self.sprider_start_count) // int(self.max_pager) + 1 # 起始页码用于效率采集
self.file_path = self.base_path + os.sep + self.first_folder_name + os.sep + self.second_folder_name
BaseFrame().right("本次采集参数:file_path=" + self.file_path + "")
# 浏览器的下载路径
self.down_path = self.base_path + os.sep + self.first_folder_name+ os.sep + self.second_folder_name+ "\\Temp\\"
BaseFrame().right("本次采集参数:down_path=" + self.down_path + "")
# First/PPT/88个动态PPT模板/动态PPT模板
self.save_path = self.base_path + os.sep + self.first_folder_name + os.sep + self.second_folder_name + os.sep + sprider_name
BaseFrame().right("本次采集参数:save_path=" + self.save_path + "")
if os.path.exists(self.down_path) is True:
shutil.rmtree(self.down_path)
if os.path.exists(self.down_path) is False:
os.makedirs(self.down_path)
if os.path.exists(self.save_path) is True:
shutil.rmtree(self.save_path)
if os.path.exists(self.save_path) is False:
os.makedirs(self.save_path)
chrome_options = webdriver.ChromeOptions()
diy_prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': '{0}'.format(self.down_path)}
# 添加路径到selenium配置中
chrome_options.add_experimental_option('prefs', diy_prefs)
chrome_options.add_argument('--headless') # 隐藏浏览器
# 实例化chrome浏览器时,关联忽略证书错误
driver = webdriver.Chrome(options=chrome_options)
driver.set_window_size(1280, 800) # 分辨率 1280*800
driver.get(sprider_url)
element_list = driver.find_elements(By.CLASS_NAME, "j_text_sort_a")
# element_list = element_ul.find_elements(By.TAG_NAME, "h2") #
page_end_number = 1
page_end_url = ""
a_elments = driver.find_elements(By.TAG_NAME, "a")
for a in a_elments:
page_end_title = a.get_attribute("title")
if page_end_title == "最后页":
page_end_url = a.get_attribute("href")
print(page_end_url)
break
if page_end_url is None or page_end_url == "":
page_end_number = 1
else:
page_end_number = (page_end_url.split(".shtml")[0].split("_")[3])
self.page_count = self.merchant
while self.page_count <= int(page_end_number): # 翻完停止
try:
if self.page_count == 1:
self.sprider_detail(driver,element_list, page_end_number)
pass
else:
if self.haved_sprider_count == self.sprider_count:
BaseFrame().debug("采集到达数量采集停止...")
BaseFrame().debug("开始写文章...")
SpriderTools.builder_word(self.second_folder_name,
self.word_content_list,
self.file_path,
self.word_image_count,
self.first_folder_name )
SpriderTools.copy_file(self.word_content_list,
self.save_path,
self.second_folder_name,
self.file_path)
SpriderTools.gen_passandtxt(self.second_folder_name, self.word_content_list, self.file_path)
BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
break
next_url = self.base_url + "/{0}/{1}_{2}.shtml".format(self.first_column_name,
self.second_column_name,
self.page_count)
driver.get(next_url)
element_list = driver.find_elements(By.CLASS_NAME, "j_text_sort_a")
self.sprider_detail(driver, element_list, page_end_number)
pass
self.page_count = self.page_count + 1 # 页码增加1
except Exception as e:
print("sprider()执行过程出现错误:" + str(e))
CPB论坛 V1.3 Beta
BBSXP 65Dj加强版 V2.0
BBSXP V1.65 简体中文版
Web Forum Server V1.60.0403 特别版
6K 论坛 V4.0
VieBoard V2.7.1 Build 20030413
雲龍山莊I豪华无错版
妮妮网络江湖无错版
5237论坛V4.2Final最终版
动网论坛 V6.01(99插件版)
动网论坛智能动态菜单
兰山社区 4.10 完全无错增强版
LeadBBS V2.21 论坛
VieBoard Beta V2.6.0.2 Build 20030404
ASP精品屋论坛(Asphouse BBS) V1.0
LeadBBS V2.20 论坛
仙境奇缘江湖社区
互动力量论坛
Web Forum Server V1.60.0319 特别版
射月者论坛 V1.0
LeadBBS V2.10 论坛
LeadBBS V2.05 论坛
IBB超级论坛 V1.5 补丁
A派论坛 V1.0
橙子乐园の社区
兰山社区阳春版
EbbsXP V1.00
BBSXP V1.6 简体中文版
6K BBS V4.0 65DJ.Net修改版
天外天江湖6.95完整版
剑客江湖2003贺岁正式版
动网论坛V6.0 SQL 3.1储存过程版个人修改版
Kingbbs V2.0
异域论坛 V6.0+SP1+SP2+VIP
动网论坛 V6.0.0 SP2 最新补丁
import os
import shutil
def void_folder(path):
# 访问path路径下的文件或文件夹
lst = os.listdir(path)
# 打印每一层的文件或文件夹
for name in lst:
# 拼接名称,得到绝对路径,判断该文件是否符合是文件夹
real_path = os.path.join(path, name)
# 如果是文件夹,则打空格表示,并且递归访问下一层
if os.path.isdir(real_path):
# print(name)
files = os.listdir(real_path)
if len(files) == 0:
print("void_folder():"+name)
shutil.rmtree(real_path)
endindex = len(real_path) - len(name)
real_path = real_path[0:endindex]
void_folder(real_path)
else:
void_folder(real_path)
# 如果不是文件夹,直接打印,不再递归访问下一层
else:
#print(name)
pass
def void_file(dirPath):
dirs = os.listdir(dirPath) # 查找该层文件夹下所有的文件及文件夹,返回列表
for file in dirs:
file_full_name = dirPath + '/' + file
file_ext = os.path.splitext(file_full_name)[-1]
if file_ext is None or file_ext=="":
continue
if "rar" == str(file_ext.split(".")[1]):
os.remove(file_full_name)
if "zip" == str(file_ext.split(".")[1]):
os.remove(file_full_name)
if "gz" == str(file_ext.split(".")[1]):
os.remove(file_full_name)
if "tgz" == str(file_ext.split(".")[1]):
os.remove(file_full_name)
查找指定文件夹下所有相同名称的文件
def search_file(dirPath, fileName):
dirs = os.listdir(dirPath) # 查找该层文件夹下所有的文件及文件夹,返回列表
for currentFile in dirs: # 遍历列表
absPath = dirPath + '/' + currentFile
if os.path.isdir(absPath): # 如果是目录则递归,继续查找该目录下的文件
search_file(absPath, fileName)
elif currentFile == fileName:
print(absPath) # 文件存在,则打印该文件的绝对路径
os.remove(absPath)
Dvbbs6+Sp1+Sp2+光の论坛优化版
寅龙设计论坛V2.0.0版(DVBBS)修改版
ITL V6.0.1版论坛
上速设计封存版V7.0SKIN Dvbbs美化论坛
休闲江湖7.22增强版
龍兒江湖社区最新版本
阿男世纪江湖7.0威力加强版
寅龍設計修改動網論壇V6.0.0終極版
动网论坛Ver6.0.0 For MSSQL
杭州设计师家园论坛(动网论坛美化版)
动网论坛Ver6.0.0 SP2
VieBoard V2.01版本修正版
凤阳社区网
BBSXP1.5 SQL版