プログラミング備忘録

初級プログラマ。python、DL勉強中

bing image search APIを使ってみた

bing image search APIを使うことでwebから特定の画像を収集する
今回はパンダの画像を集めてみた

f:id:programdl:20171125040304p:plain

事前準備

bing image search APIを使用するにはMicrosoft Azureへの登録が必要
登録方法は以下のサイトを参考にさせていただいた

Bingの画像検索APIを使って画像を大量に収集する - Qiita

ソースコード

以下のように実装した

import http.client
import json
import re
import requests
import os
import math
import urllib


def make_dir(path):
    if not os.path.isdir(path):
        os.mkdir(path)


def make_img_path(save_dir_path, url, cnt):
    save_img_path = os.path.join(save_dir_path, save_dir)
    make_dir(save_img_path)

    file_ext = os.path.splitext(url)[-1]
    if file_ext.lower() in ('.jpg'):
        full_path = os.path.join(save_img_path, '{0:03d}'.format(cnt) + file_ext.lower())
        return full_path
    else:
        raise ValueError('Not applicable file extension')


def download_image(url, timeout=10):
    response = requests.get(url, allow_redirects=True, timeout=timeout)
    if response.status_code != 200:
        error = Exception("HTTP status: " + response.status_code)def save_image(filename, image):
    with open(filename, "wb") as fout:
        fout.write(image)


def save_image(filename, image):
    with open(filename, "wb") as fout:
        fout.write(image)

if __name__ == "__main__":
    APIKEY = 'XXXXXXXXXXXXXXXXXXXXXXXXX'  
    save_dir_path = './save'
    keyword = 'panda'
    save_dir = keyword
    make_dir(save_dir_path)
    num_imgs_required = 30
    num_imgs_per_transaction = 10
    offset_count = math.floor(num_imgs_required / num_imgs_per_transaction)

    url_list = []
    correspondence_table = {}

    headers = {
        # Request headers
        'Content-Type': 'multipart/form-data',
        'Ocp-Apim-Subscription-Key': APIKEY,
    }

    for offset in range(offset_count):
        params = urllib.parse.urlencode({
            'q': keyword,
            'mkt': 'ja-JP',
            'count': num_imgs_per_transaction,
            'offset': offset * num_imgs_per_transaction
        })

        try:
            conn = http.client.HTTPSConnection('api.cognitive.microsoft.com')
            conn.request("GET", "/bing/v7.0/images/search?%s" % params, "{body}", headers)
            response = conn.getresponse()
            data = response.read()

            conn.close()
        except Exception as err:
            print("[Errno {0}] {1}".format(err.errno, err.strerror))

        else:
            decode_res = data.decode('utf-8')
            data = json.loads(decode_res)

            for values in data['value']:
                unquoted_url = urllib.parse.unquote(values['contentUrl'])
                url_list.append(unquoted_url)

    for i, url in enumerate(url_list):
        try:
            img_path = make_img_path(save_dir_path, url, i)
            image = download_image(url)
            save_image(img_path, image)
            print('saved image... {}'.format(url))
        except KeyboardInterrupt:
            break
        except Exception as err:
            print("%s" % (err))


参考

Bingの画像検索APIを使って画像を大量に収集する - Qiita
Bing Image Search API v7で画像をちょっと保存する方法 - 怠慢mirenn所感