"""
用fiddler爬取央视新闻app, 2020年8月19日的一整天新闻数据
- app中时间链可以选择日期
- 注意我要当天全部新闻信息,可以向下刷新
- 提取以下字段保存为csv数据
'itemTitle': 标题
'operate_time': 发布时间
'detailUrl': 新闻链接
'itemImage': 图片链接
- 请在下方实现代码
"""
import requests
import csv
'''
GET v/api/rest/articleInfo/getScrollList?n=20version=1p=1pubDate=1597806209000app_version=807
User-Agent: SM-G9730
Host: v
Connection: Keep-Alive
Accept-Encoding: gzip
GET v/api/rest/articleInfo/getScrollList?
User-Agent: SM-G9730
Host: v
Connection: Keep-Alive
Accept-Encoding: gzip
n=20
version=1
p=2
pubDate=159********00
app_version=807
'''
number = 0
number2 = 0
张柏芝的儿子try:
while True:
number += 1
url = 'v/api/rest/articleInfo/getScrollList?'
params = {
'n': '20',
'version': '1',
'p': number,
'pubDate': '1597806209000',
'app_version': '807',
}
headers = {
'User-Agent': 'SM-G9730',
'Host': 'v',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip',
}
response = (url=url,headers=headers,params=params)
html = response.json()
for i in range(0,20):
result1 = html['itemList'][i]['itemTitle']
result2 = html['itemList'][i]['operate_time']
result3 = html['itemList'][i]['detailUrl']
result4 = html['itemList'][i]['itemImage']['imgUrl1']
print(result1)
print([result2])
print(result3)
print(result4)
number2 += 1
print()
with open('央视新闻.csv', mode='a', encoding='utf-8', newline='')as f:
csv_writer = csv.writer(f)
csv_writer.writerow([result1,result2,result3,result4])
except:
print(number2)
发布评论