03 Douban
import json
import requests
from lxml import etree
page = requests.post("https://movie.douban.com/chart", headers={
"referer": "https://movie.douban.com/explore",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
})
parsed = etree.HTML(page.text)
movies = parsed.xpath("""//table/tr""")
data = []
for movie in movies:
cover_url = movie.xpath("""./td[1]/a/img/@src""")[0]
name = movie.xpath("""./td[1]/a/img/@alt""")[0]
description = movie.xpath("""./td[2]/div/p/text()""")[0]
star = movie.xpath("""./td[2]/div/div/span[1]/@class""")[0][7:]
star = float(f"{star[0]}.{star[1]}")
rating_nums = movie.xpath("""./td[2]/div/div/span[@class="rating_nums"]/text()""")
rating_nums = rating_nums[0] if len(rating_nums) == 1 else "无评分"
comment_nums = movie.xpath("""./td[2]/div/div/span[@class="pl"]/text()""")[0]
data.append({
"movie": name,
"cover_url": cover_url,
"description": description,
"star": star,
"rating_nums": rating_nums,
"comment_nums": comment_nums
})
open("a.json", 'w', encoding="utf-8").write(json.dumps({"movies": data}, ensure_ascii=False))