diff --git a/国内党媒/CrawlZhongguominzubao.py b/国内党媒/CrawlZhongguominzubao.py index 57a1348..aa53977 100644 --- a/国内党媒/CrawlZhongguominzubao.py +++ b/国内党媒/CrawlZhongguominzubao.py @@ -53,6 +53,7 @@ async def getData(start_date: datetime, end_date: datetime): months = [] # 从开始日期到结束日期,每个月份都添加到列表中 current_date = start_date + current_date = current_date.replace(day=1) while current_date <= end_date: months.append(current_date) # 增加一个月 @@ -82,7 +83,7 @@ async def getData(start_date: datetime, end_date: datetime): banmianming = item2["BM"] banmianhao = item2["BC"] url3 = f"http://210.12.104.26:81/reader/layout/getBmDetailPub.do?bc={item2['IRCATELOG']}&docpubtime={item.replace('/', '')}" - print(url3) + print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), url3) response3 = await client.get(url3) response3.encoding = response3.charset_encoding print(f"三级连接状态:{response3.status_code}") @@ -95,9 +96,9 @@ async def getData(start_date: datetime, end_date: datetime): print(f"四级连接状态:{response4.status_code}") if response4.status_code == 200: data4 = response4.json() - title = BeautifulSoup(data4['docTitle'],"lxml").text - subTitle = BeautifulSoup(data4['fb'],"lxml").text - preTitle = BeautifulSoup(data4['yt'],"lxml").text + title = BeautifulSoup(data4['docTitle'], "lxml").text + subTitle = BeautifulSoup(data4['fb'], "lxml").text + preTitle = BeautifulSoup(data4['yt'], "lxml").text author = data4['docAuthor'] content = BeautifulSoup(data4["content"], "lxml").text await collection.insert_one({