bing image search APIを使ってみた
bing image search APIを使うことでwebから特定の画像を収集する
今回はパンダの画像を集めてみた
事前準備
bing image search APIを使用するにはMicrosoft Azureへの登録が必要
登録方法は以下のサイトを参考にさせていただいた
Bingの画像検索APIを使って画像を大量に収集する - Qiita
ソースコード
以下のように実装した
import http.client import json import re import requests import os import math import urllib def make_dir(path): if not os.path.isdir(path): os.mkdir(path) def make_img_path(save_dir_path, url, cnt): save_img_path = os.path.join(save_dir_path, save_dir) make_dir(save_img_path) file_ext = os.path.splitext(url)[-1] if file_ext.lower() in ('.jpg'): full_path = os.path.join(save_img_path, '{0:03d}'.format(cnt) + file_ext.lower()) return full_path else: raise ValueError('Not applicable file extension') def download_image(url, timeout=10): response = requests.get(url, allow_redirects=True, timeout=timeout) if response.status_code != 200: error = Exception("HTTP status: " + response.status_code)def save_image(filename, image): with open(filename, "wb") as fout: fout.write(image) def save_image(filename, image): with open(filename, "wb") as fout: fout.write(image) if __name__ == "__main__": APIKEY = 'XXXXXXXXXXXXXXXXXXXXXXXXX' save_dir_path = './save' keyword = 'panda' save_dir = keyword make_dir(save_dir_path) num_imgs_required = 30 num_imgs_per_transaction = 10 offset_count = math.floor(num_imgs_required / num_imgs_per_transaction) url_list = [] correspondence_table = {} headers = { # Request headers 'Content-Type': 'multipart/form-data', 'Ocp-Apim-Subscription-Key': APIKEY, } for offset in range(offset_count): params = urllib.parse.urlencode({ 'q': keyword, 'mkt': 'ja-JP', 'count': num_imgs_per_transaction, 'offset': offset * num_imgs_per_transaction }) try: conn = http.client.HTTPSConnection('api.cognitive.microsoft.com') conn.request("GET", "/bing/v7.0/images/search?%s" % params, "{body}", headers) response = conn.getresponse() data = response.read() conn.close() except Exception as err: print("[Errno {0}] {1}".format(err.errno, err.strerror)) else: decode_res = data.decode('utf-8') data = json.loads(decode_res) for values in data['value']: unquoted_url = urllib.parse.unquote(values['contentUrl']) url_list.append(unquoted_url) for i, url in enumerate(url_list): try: img_path = make_img_path(save_dir_path, url, i) image = download_image(url) save_image(img_path, image) print('saved image... {}'.format(url)) except KeyboardInterrupt: break except Exception as err: print("%s" % (err))
参考
Bingの画像検索APIを使って画像を大量に収集する - Qiita
Bing Image Search API v7で画像をちょっと保存する方法 - 怠慢mirenn所感