# 爬虫
cURL 转 Python 代码 (opens new window)
# 环境搭建
- 安装 Requests(网络请求):
pip install requests
- 安装 Beautifulsoup4(解析网页):
pip install bs4
# 请求网址
import requests
def open_url(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
}
res = requests.get(url, headers=headers)
return res
返回字符串:
res = open_url("网址")
print(res.text)
返回数据:
res = open_url("网址")
print(res.content)
# HTML 解析
import bs4
soup = bs4.BeautifulSoup(HTML内容, "html.parser")
解析列表 = soup.find_all("解析标签名", class_="类名")
for item in 解析列表:
print(item.解析标签名['属性名'])
示例:
import bs4
soup = bs4.BeautifulSoup(res.text, "html.parser")
titles = soup.find_all("li", class_="video-item matrix")
for item in titles:
print(item.a['title'])
提示
可使用`.get_text()`
方法获取 HTML 标签内容
# Json 解析
import json
解析Json = json.loads(Json字符串)
print(解析Json['message'])
print(解析Json['data']['message'])
← 正则表达式