1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
import urllib.request import bs4,os
page_sum = 10
path = os.getcwd() path = os.path.join(path,'images') if not os.path.exists(path): os.mkdir(path) url = "http://www.tootk.net" headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' ' Chrome/32.0.1700.76 Safari/537.36' }
for count in range(page_sum): req = urllib.request.Request( url = url+"/tupian/bizhi/list_13_"+str(count+1)+".html", headers = headers ) print(req.full_url) content = urllib.request.urlopen(req).read()
soup = bs4.BeautifulSoup(content,"html5lib")
liResult = (soup.find_all("div", attrs={"class": "w170img"}))
for content in liResult: image = content.img lplink = image.get('src') title = image.get('alt') link = url+lplink.replace("-lp","") filename = path + os.sep + title + ".jpg"
print(link)
urllib.request.urlretrieve(link,filename)
|