python爬取⽹易云⾳乐热歌榜实例代码
然后更改你要保存的⽬录,⽬录要先建⽴好⽂件夹,例如我的是保存在D盘-360下载-⽹易云热歌榜⽂件夹内,就可以完成下载。
如果⽂件夹没有提前建好,会报错[Errno 2] No such file or directory。
代码实现:
from urllib import request
from bs4 import BeautifulSoup
import re
import requests
import time
class Music(object):
def __init__(self, baseurl, path):
head = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
self.baseurl = baseurl
self.headers = head
self.path = path
def main(self):
html = self.askurl()
bs4 = self.analysis(html)
name1 = self.matching(bs4)明日之后无人机
self.save(name1)
def askurl(self):
req = request.Request(url=self.baseurl, headers=self.headers)
response = request.urlopen(req)
html = ad().decode("utf-8")
return html
def analysis(self, html):
soup = BeautifulSoup(html, "html.parser")
bs4 = soup.find_all("textarea")
bs4 = str(bs4)
return bs4
def matching(self, bs4):
rule0 = repile(r'"name":"(.*?)","tns":[],"alias":[]')
name0 = re.findall(rule0, bs4)
str = ""
for i in name0:
str = str + "," + i
str = place(" a0", " ")
rule1 = repile(r'jpg,(.*?),(.*?)","id":(\d*)')
name1 = re.findall(rule1, str)
return name1
def save(self, name1):
for j in name1:
print("正在下载:" + j[1] + " - " + j[0] + "...")
违章建筑举报url = "music.163/song/media/outer/url?id=" + j[2]
content = (url=url, headers=self.headers).content
with open(self.path + j[1] + " - " + j[0] + ".mp3", "wb") as f:
f.write(content)微客帝国
print(j[1] + " - " + j[0] + "下载完毕。\n")
time.sleep(0.5)
return
if __name__ == "__main__":
baseurl = "music.163/discover/toplist?id=3778678" # 要爬取的热歌榜链接
path = "D:/360下载/⽹易云热歌榜/" # 保存的⽂件⽬录
demo0 = Music(baseurl, path)
demo0.main()
print("下载完毕")
内容扩展:
Python3实战之爬⾍抓取⽹易云⾳乐的热门评论
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
quest
import urllib.parse
import json
def get_all_hotSong(): #获取热歌榜所有歌曲名称和id
url='music.163/discover/toplist?id=3778678' #⽹易云云⾳乐热歌榜url
quest.urlopen(url).read().decode('utf8') #打开url
html=str(html) #转换成str
pat1=r'<ul class="f-hide"><li><a href="/song\?id=\d*?" rel="external nofollow" rel="external nofollow" >.*</a></li></ul>' #进⾏第⼀次筛选的正则表达式
result=repile(pat1).findall(html) #⽤正则表达式进⾏筛选
result=result[0] #获取tuple的第⼀个元素
pat2=r'<li><a href="/song\?id=\d*?" rel="external nofollow" rel="external nofollow" >(.*?)</a></li>' #进⾏歌名筛选的正则表达式
锦户亮陈坤pat3=r'<li><a href="/song\?id=(\d*?)" rel="external nofollow" >.*?</a></li>' #进⾏歌ID筛选的正则表达式
hot_song_name=repile(pat2).findall(result) #获取所有热门歌曲名称
hot_song_id=repile(pat3).findall(result) #获取所有热门歌曲对应的Id
return hot_song_name,hot_song_id
def get_hotComments(hot_song_name,hot_song_id):
url='music.163/weapi/v1/resource/comments/R_SO_4_' + hot_song_id + '?csrf_token=' #歌评url
header={ #请求头部
'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
#post请求表单数据
data={'params':'zC7fzWBKxxsm6TZ3PiRjd056g9iGHtbtc8vjTpBXshKIboaPnUyAXKze+KNi9QiEz/IieyRnZfNztp7yvTFyBXOlVQP/JdYNZw2+GRQDg7grOR2ZjroqoOU2z0TNhy+qDHKSV8ZXOnxUF93w3DA51ADDQHB0IngL+v6N8KthdVZeZBe0d3EsUFS8ZJltNR postdata=urllib.parse.urlencode(data).encode('utf8') #进⾏编码
quest.Request(url,headers=header,data=postdata)
quest.urlopen(request).read().decode('utf8')
json_dict=json.loads(reponse) #获取json
hot_commit=json_dict['hotComments'] #获取json中的热门评论
num=0
fhandle=open('./song_comments','a') #写⼊⽂件
fhandle.write(hot_song_name+':'+'\n')
医疗保险怎么交费for item in hot_commit:
num+=1
fhandle.write(str(num)+'.'+item['content']+'\n')
fhandle.write('\n==============================================\n\n')
fhandle.close()
hot_song_name,hot_song_id=get_all_hotSong() #获取热歌榜所有歌曲名称和id
num=0
while num < len(hot_song_name): #保存所有热歌榜中的热评开机启动项
print('正在抓取第%d⾸歌曲热评...'%(num+1))
get_hotComments(hot_song_name[num],hot_song_id[num])
print('第%d⾸歌曲热评抓取成功'%(num+1))
num+=1
以上就是python爬取⽹易云⾳乐热歌榜实例代码的详细内容,更多关于python爬取⽹易云⾳乐热歌榜的资料请关注其它相关⽂章!
发布评论