import requests from bs4 import BeautifulSoup from lxml import etree import csv # 请安装:requests,bs4,lxml def doubantop250beauti(): url = 'https://movie.douban.com/top250' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } movies = [] for i in range(0, 250, 25): params = {'start': str(i), 'filter': ''} response = requests.get(url, headers=headers, params=params) soup = BeautifulSoup(response.text, 'html.parser') movie_list = soup.find_all('div', class_='hd') for movie in movie_list: title = movie.a.span.text.strip() link = movie.a['href'] movies.append({'title': title, 'link': link}) for movie in movies: print(movie['title'], movie['link']) def doubantop250xpath(): # 设置UA headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} # 获取Top250页面 url = 'https://movie.douban.com/top250' for i in range(0, 250, 25): params = {'start': str(i), 'filter': ''} response = requests.get(url, headers=headers, params=params) html = response.text # 解析页面 selector = etree.HTML(html) movies = selector.xpath('//div[@class="info"]') # 遍历电影信息 for movie in movies: # 电影名称 title = movie.xpath('div[@class="hd"]/a/span[@class="title"]/text()')[0] # 电影评分 score = movie.xpath('div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0] # 电影链接 link = movie.xpath('div[@class="hd"]/a/@href')[0] # 打印电影信息 print('电影名称:', title) print('电影评分:', score) print('电影链接:', link) print('-----------------------') # 写入CSV文件 with open('top250.csv', 'a', encoding='"utf-8_sig"', newline='') as f: writer = csv.writer(f) for movie in movies: title = movie.xpath('div[@class="hd"]/a/span[@class="title"]/text()')[0] score = movie.xpath('div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0] link = movie.xpath('div[@class="hd"]/a/@href')[0] writer.writerow([title, score, link]) if __name__ == '__main__': doubantop250xpath() #doubantop250beauti() |
用AI生成的获取豆瓣电影top250,xpath和BeautifulSoup
相关推荐
标签:
留言与评论(共有 0 条评论) |