# 爬虫

cURL 转 Python 代码 (opens new window)

# 环境搭建

  • 安装 Requests(网络请求):pip install requests
  • 安装 Beautifulsoup4(解析网页):pip install bs4

# 请求网址

import requests

def open_url(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
    }
    res = requests.get(url, headers=headers)
    return res

返回字符串:

res = open_url("网址")
print(res.text)

返回数据:

res = open_url("网址")
print(res.content)

# HTML 解析

import bs4

soup = bs4.BeautifulSoup(HTML内容, "html.parser")
解析列表 = soup.find_all("解析标签名", class_="类名")
for item in 解析列表:
    print(item.解析标签名['属性名'])

示例:

import bs4

soup = bs4.BeautifulSoup(res.text, "html.parser")
titles = soup.find_all("li", class_="video-item matrix")
for item in titles:
    print(item.a['title'])

提示

可使用`.get_text()`方法获取 HTML 标签内容

# Json 解析

import json

解析Json = json.loads(Json字符串)
print(解析Json['message'])
print(解析Json['data']['message'])
最后更新: 2023/6/25 10:37:51