Python/画像スクレイピング

Python

        from icrawler.builtin import GoogleImageCrawler
import sys
import os
argv = sys.argv

if not os.path.isdir(argv[1]):
    os.makedirs(argv[1])

crawler = GoogleImageCrawler(storage = {"root_dir" : argv[1]})
crawler.crawl(keyword = argv[2], max_num = 100)

python image.py sakura sakura
python image.py 集めた画像を入れるフォルダの名前 集めたい画像のキーワード

Python

#google画像スクレイピング
		
from icrawler.builtin import GoogleImageCrawler

crawler = GoogleImageCrawler(storage={"root_dir": "images"})
crawler.crawl(keyword="犬", max_num=100)

Python

#ニフティ画像スクレイピング		

import requests
import re
import uuid
from bs4 import BeautifulSoup

url = "https://search.nifty.com/imagesearch/search?select=1&chartype=&q=%s&xargs=2&img.fmt=all&img.imtype=color&img.filteradult=no&img.type=all&img.dimensions=large&start=%s&num=20"
keyword = "本"
#pages = [1,20,40,60,80,100]

for p in pages:
        r = requests.get(url%(keyword,p))
        soup = BeautifulSoup(r.text,'lxml')
        #先頭が「https://msp.c.yimg.jp/yjimage」で始まるsrcを探しています。
        imgs = soup.find_all('img', src=re.compile('^https://msp.c.yimg.jp/yjimage'))
        for img in imgs:
                r = requests.get(img['src'])
                #openで指定ファイル,uuidでファイル名が被らないようにし,wが書き込み,bがバイナリの意味。
                with open(str('images/book/') + str(uuid.uuid4()) + str('.jpg'),'wb') as file:
                        file.write(r.content)