# 1.拿到主页面源代码,提取到子页面的地址 href # 2.拿子页面的内容,从中找到图片下载地址 # 3.下载图片 import requests from bs4 import BeautifulSoup import time domain = "https://www.umei.cc" url = "https://www.umei.cc/bizhitupian/weimeibizhi/" rese = requests.get(url) rese.encoding = "utf-8" # 处理乱码 # 把源代码交给bs page = BeautifulSoup(rese.text, "html.parser") # 指定html解析器 son1 = page.find("div", class_="listlbc_cont_l").find_all("a", class_="img_album_btn") for i in son1: son2 = domain + i.get("href") #直接通过get拿到属性值,进行拼接 # 拿子页面源代码 son_rese = requests.get(son2) son_rese.encoding = "utf-8" # 从子页面拿下载路径 son_page = BeautifulSoup(son_rese.text, "html.parser") down1 = son_page.find("div", class_="big-pic").find("img") down2 = down1.get("src") # 下载图片 down2_rese = requests.get(down2) # down2_rese.content # 拿到图片字节 img_name = down2.split("/")[-1] # 拿到url最后一/的内容 with open("img/"+img_name, mode="wb") as f: f.write(down2_rese.content) #图片内容写入文件 print("over", img_name) time.sleep(1) print("all is done") #新手可以一起交流,推荐写代码用chatgpt,写代码这方面碾压文心一言几条街 |
Python b站的爬虫视频真不错,分享自己跟着写的壁纸爬虫代码
相关推荐
标签:Python
留言与评论(共有 0 条评论) |