百度/腾讯 ocr 试用

2020/01/10 技术

百度/腾讯 ocr 试用

1. 百度 ocr

准备工作:

  • 注册百度云账号, 创建应用, 获取 APP_ID, API_KEY, SECRET_KEY
  • 安装 python sdk, 执行python -c "from aip import AipOcr; AipOcr " 确认安装成功.

代码示例:


import math
import os
import time
import unittest

import cv2
import numpy as np
from aip import AipOcr


class BaiduOCR(object):
    """ 你的 APPID AK SK """
    APP_ID = '<APP_ID>'
    API_KEY = '<API_KEY>'
    SECRET_KEY = '<SECRET_KEY>'

    def __init__(self):
        self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)

    @staticmethod
    def get_image(image_path: str) -> bytes:
        with open(image_path, "rb") as f:
            return f.read()

    @staticmethod
    def image_np2bytes(image_np: np.ndarray) -> bytes:
        """ cv2 的 图片直接生成 bytes """
        # todo BytesIO
        tmp = "x.jpg"
        try:
            cv2.imwrite(tmp, img=image_np, )
            return BaiduOCR.get_image(tmp)
        finally:
            if os.path.exists(tmp):
                os.remove(tmp)

    def ocr(self, image: bytes):
        return self.client.general(image=image, options={
            "language_type": "CHN_ENG",
            "detect_direction": "false",
            "probability": "true",
        })


if __name__ == '__main__':
    image_path = "test.png"
    print(BaiduOCR().ocr(image=BaiduOCR.get_image(image_path)))

2. 腾讯 OCR

准备工作:开通服务。

代码示例:


import base64
import hashlib
import json
import os
import random
import time
from urllib.parse import quote, urlencode
from urllib.request import Request, urlopen

_cur_dir = os.path.dirname(__file__)


def setParams(array, key, value):
    array[key] = value


def genSignString(parser):
    uri_str = ''
    for key in sorted(parser.keys()):
        if key == 'app_key':
            continue
        uri_str += "%s=%s&" % (key, quote(str(parser[key]), safe=""))
    sign_str = uri_str + 'app_key=' + parser['app_key']

    hash_md5 = hashlib.md5(sign_str.encode('utf-8'))
    return hash_md5.hexdigest().upper()


class AiPlat(object):
    url_preffix = 'https://api.ai.qq.com/fcgi-bin/'

    def __init__(self, app_id, app_key):
        self.app_id = app_id
        self.app_key = app_key
        self.data = {}

    def invoke(self, params):
        self.url_data = urlencode(params).encode('utf-8')
        req = Request(self.url, self.url_data)
        try:
            rsp = urlopen(req)
            str_rsp = rsp.read().decode('utf-8')
            dict_rsp = json.loads(str_rsp)
            return dict_rsp
        except Exception as e:
            print(e)
            dict_error = {}
            dict_error['ret'] = -1
            dict_error['httpcode'] = -1
            dict_error['msg'] = "system error"
            return dict_error

    def getOcrGeneralocr(self, image):
        self.url = self.url_preffix + 'ocr/ocr_generalocr'
        setParams(self.data, 'app_id', self.app_id)
        setParams(self.data, 'app_key', self.app_key)
        setParams(self.data, 'time_stamp', int(time.time()))
        setParams(self.data, 'nonce_str', int(time.time()) + random.randint(1000, 100000))
        image_data = base64.b64encode(image).decode("utf-8")
        setParams(self.data, 'image', image_data)
        sign_str = genSignString(self.data)
        setParams(self.data, 'sign', sign_str)
        return self.invoke(self.data)

    def getNlpTextTrans(self, text, type):
        self.url = self.url_preffix + 'nlp/nlp_texttrans'
        setParams(self.data, 'app_id', self.app_id)
        setParams(self.data, 'app_key', self.app_key)
        setParams(self.data, 'time_stamp', int(time.time()))
        setParams(self.data, 'nonce_str', int(time.time()))
        setParams(self.data, 'text', text)
        setParams(self.data, 'type', type)
        sign_str = genSignString(self.data)
        setParams(self.data, 'sign', sign_str)
        return self.invoke(self.data)

    def getAaiWxAsrs(self, chunk, speech_id, end_flag, format_id, rate, bits,
                     seq, chunk_len, cont_res):
        self.url = self.url_preffix + 'aai/aai_wxasrs'
        setParams(self.data, 'app_id', self.app_id)
        setParams(self.data, 'app_key', self.app_key)
        setParams(self.data, 'time_stamp', int(time.time()))
        setParams(self.data, 'nonce_str', int(time.time()))
        speech_chunk = base64.b64encode(chunk)
        setParams(self.data, 'speech_chunk', speech_chunk)
        setParams(self.data, 'speech_id', speech_id)
        setParams(self.data, 'end', end_flag)
        setParams(self.data, 'format', format_id)
        setParams(self.data, 'rate', rate)
        setParams(self.data, 'bits', bits)
        setParams(self.data, 'seq', seq)
        setParams(self.data, 'len', chunk_len)
        setParams(self.data, 'cont_res', cont_res)
        sign_str = genSignString(self.data)
        setParams(self.data, 'sign', sign_str)
        return self.invoke(self.data)


class TencentAPI(object):
    app_id = "<APPID>"
    app_key = "<APP_KEY>"

    def __init__(self):
        self.api = AiPlat(app_id=self.app_id, app_key=self.app_key)

    @staticmethod
    def get_image(image_path: str) -> bytes:
        with open(image_path, "rb") as f:
            return f.read()
        
    def ocr(self, image: bytes):
        return self.api.getOcrGeneralocr(image)


if __name__ == '__main__':
    image_path = "test.png"
    print(TencentAPI().ocr(image=TencentAPI.get_image(image_path)))

Search

    Table of Contents