|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import urllib.parse
import os
import re
import random
from bs4 import BeautifulSoup
def open_url(url):
ip_list = ["59.66.141.24:1080","115.223.222.65:9000","114.226.65.185:6666","163.125.158.195:8888"]
proxy_services = urllib.request.ProxyHandler({"http":random.choice(ip_list)})
opener = urllib.request.build_opener(proxy_services)
urllib.request.install_opener(opener)
req = urllib.request.Request(url)
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36")
response = urllib.request.urlopen(req)
html = response.read()
return html
def home_page_num(root_url):
html = open_url(root_url)
soup = BeautifulSoup(html,"lxml")
home_page = soup.find("span",class_="current-comment-page").text
print("首页页码:",home_page)
return home_page[1:len(home_page)-1]
def get_page_url(root_url,page_num):
page_url = root_url + "page-" + str(page_num)+"#comments"
print(page_url)
return page_url
def find_image_src(page_url):
image_url = []
html = open_url(page_url)
soup = BeautifulSoup(html,"lxml",from_encoding="utf-8")
links = soup.find_all('a',class_="view_img_link")
for link in links:
print(link.get('href'))
image_url.append(link.get('href'))
return image_url
def down_save_mm(image_url):
for each in image_url:
filename = each.split("/")[-1]
with open(filename,"wb") as f:
img = open_url(each)
f.write(img)
def mm_main():
forlder = "C:\\Users\\Administrator\\Desktop\\MM_Image"
os.mkdir("C:\\Users\\Administrator\\Desktop\\MM_Image")
os.chdir("C:\\Users\\Administrator\\Desktop\\MM_Image")
root_url = "https://jandan.net/ooxx/"
totalpages = int(home_page_num(root_url))
for i in range(totalpages):
current_page = totalpages - i
page_url = get_page_url(root_url,current_page)
image_urls = find_image_src(page_url)
down_save_mm(image_urls)
if __name__ == "__main__":
mm_main() |
|