python爬蟲(爬取圖片)

python爬蟲爬圖片

爬蟲爬baidu圖片

第一步

載入爬蟲模塊html

from requests_html import HTMLSession            #載入爬蟲模塊

第二步

建立session對象python

from requests_html import HTMLSession            #載入爬蟲模塊
session =HTMLSession() #建立完畢

第三步

得到發現百度圖片搜索規律併發起請求並匹配到圖片的urlsession

http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=`咱們搜圖片的關鍵字`併發

from requests_html import HTMLSession            #載入爬蟲模塊
session =HTMLSession() #建立完畢
#拿二傻子爲了
response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子')
#獲取咱們圖片的url的正則匹配格式
img_url_regex = '"thumbURL":"{}",'
#解析並獲取圖片url_list
img_url_list = response.html.search_all(img_url_regex)

第四步

訪問圖片url而且保存下來python爬蟲

from requests_html import HTMLSession            #載入爬蟲模塊
session =HTMLSession() #建立完畢
#拿二傻子爲了
response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子')
#獲取咱們圖片的url的正則匹配格式
img_url_regex = '"thumbURL":"{}",'
#解析並獲取圖片url_list
img_url_list = response.html.search_all(img_url_regex)

mun=0
for url in img_url_list:
    mun+=1
    #訪問圖片連接
    response= session.get(url[0])
    #保存二進制並保存至本地
    with open(f'第{mun}張.jpg','wb') as fw:
        fw.write(response.content)

第五步

類的封裝ui

from requests_html import HTMLSession    

class BaiDuImg:
    session = HTMLSession()
    img_url_regex = '"thumbURL":"{}",'
    url=''
    img_url_list =[]
    
    def get_search(self):
        search=input()
        self.url=f'http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word={search}'
        
    def get_img_url_list(self):
        response=self.session.get(self.url)
        self.img_url_list = response.html.search_all(img_url_regex)
        
    def save_img(self):
        mun = 0
        for url in self.img_url_list:
            mun += 1
            # 訪問圖片連接
            response = self.session.get(url[0])
            # 保存二進制並保存至本地
            with open(f'第{mun}張.jpg', 'wb') as fw:
                fw.write(response.content)
    
    def run(self):
        self.get_search()
        self.get_img_url_list()
        self.save_img()
        
if __name__ == '__main__':
    baidu=BaiDuImg()
    baidu.run()