123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652 |
- # -*- coding: utf-8 -*-
- """
- 图像识别
- """
- import re
- import sys
- import math
- import time
- from .base import AipBase
- from .base import base64
- from .base import json
- from .base import urlencode
- from .base import quote
- class AipOcr(AipBase):
- """
- 图像识别
- """
- __generalBasicUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic'
- __accurateBasicUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic'
- __generalUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general'
- __accurateUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate'
- __generalEnhancedUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_enhanced'
- __webImageUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/webimage'
- __idcardUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/idcard'
- __bankcardUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/bankcard'
- __drivingLicenseUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/driving_license'
- __vehicleLicenseUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/vehicle_license'
- __licensePlateUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/license_plate'
- __businessLicenseUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/business_license'
- __receiptUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/receipt'
- __trainTicketUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/train_ticket'
- __taxiReceiptUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/taxi_receipt'
- __formUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/form'
- __tableRecognizeUrl = 'https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/request'
- __tableResultGetUrl = 'https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/get_request_result'
- __vinCodeUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/vin_code'
- __quotaInvoiceUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/quota_invoice'
- __householdRegisterUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/household_register'
- __HKMacauExitentrypermitUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/HK_Macau_exitentrypermit'
- __taiwanExitentrypermitUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/taiwan_exitentrypermit'
- __birthCertificateUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/birth_certificate'
- __vehicleInvoiceUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/vehicle_invoice'
- __vehicleCertificateUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/vehicle_certificate'
- __invoiceUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/invoice'
- __airTicketUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/air_ticket'
- __insuranceDocumentsUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/insurance_documents'
- __vatInvoiceUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice'
- __qrcodeUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/qrcode'
- __numbersUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/numbers'
- __lotteryUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/lottery'
- __passportUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/passport'
- __businessCardUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/business_card'
- __handwritingUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting'
- __customUrl = 'https://aip.baidubce.com/rest/2.0/solution/v1/iocr/recognise'
-
- def basicGeneral(self, image, options=None):
- """
- 通用文字识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__generalBasicUrl, data)
-
- def basicGeneralUrl(self, url, options=None):
- """
- 通用文字识别
- """
- options = options or {}
- data = {}
- data['url'] = url
- data.update(options)
- return self._request(self.__generalBasicUrl, data)
-
- def basicAccurate(self, image, options=None):
- """
- 通用文字识别(高精度版)
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__accurateBasicUrl, data)
-
- def general(self, image, options=None):
- """
- 通用文字识别(含位置信息版)
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__generalUrl, data)
-
- def generalUrl(self, url, options=None):
- """
- 通用文字识别(含位置信息版)
- """
- options = options or {}
- data = {}
- data['url'] = url
- data.update(options)
- return self._request(self.__generalUrl, data)
-
- def accurate(self, image, options=None):
- """
- 通用文字识别(含位置高精度版)
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__accurateUrl, data)
-
- def enhancedGeneral(self, image, options=None):
- """
- 通用文字识别(含生僻字版)
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__generalEnhancedUrl, data)
-
- def enhancedGeneralUrl(self, url, options=None):
- """
- 通用文字识别(含生僻字版)
- """
- options = options or {}
- data = {}
- data['url'] = url
- data.update(options)
- return self._request(self.__generalEnhancedUrl, data)
-
- def webImage(self, image, options=None):
- """
- 网络图片文字识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__webImageUrl, data)
-
- def webImageUrl(self, url, options=None):
- """
- 网络图片文字识别
- """
- options = options or {}
- data = {}
- data['url'] = url
- data.update(options)
- return self._request(self.__webImageUrl, data)
-
- def idcard(self, image, id_card_side, options=None):
- """
- 身份证识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data['id_card_side'] = id_card_side
- data.update(options)
- return self._request(self.__idcardUrl, data)
-
- def bankcard(self, image, options=None):
- """
- 银行卡识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__bankcardUrl, data)
-
- def drivingLicense(self, image, options=None):
- """
- 驾驶证识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__drivingLicenseUrl, data)
-
- def vehicleLicense(self, image, options=None):
- """
- 行驶证识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__vehicleLicenseUrl, data)
-
- def licensePlate(self, image, options=None):
- """
- 车牌识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__licensePlateUrl, data)
-
- def businessLicense(self, image, options=None):
- """
- 营业执照识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__businessLicenseUrl, data)
-
- def receipt(self, image, options=None):
- """
- 通用票据识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__receiptUrl, data)
-
- def trainTicket(self, image, options=None):
- """
- 火车票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__trainTicketUrl, data)
-
- def taxiReceipt(self, image, options=None):
- """
- 出租车票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__taxiReceiptUrl, data)
-
- def form(self, image, options=None):
- """
- 表格文字识别同步接口
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__formUrl, data)
-
- def tableRecognitionAsync(self, image, options=None):
- """
- 表格文字识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__tableRecognizeUrl, data)
-
- def getTableRecognitionResult(self, request_id, options=None):
- """
- 表格识别结果
- """
- options = options or {}
- data = {}
- data['request_id'] = request_id
- data.update(options)
- return self._request(self.__tableResultGetUrl, data)
-
- def vinCode(self, image, options=None):
- """
- VIN码识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__vinCodeUrl, data)
-
- def quotaInvoice(self, image, options=None):
- """
- 定额发票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__quotaInvoiceUrl, data)
-
- def householdRegister(self, image, options=None):
- """
- 户口本识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__householdRegisterUrl, data)
-
- def HKMacauExitentrypermit(self, image, options=None):
- """
- 港澳通行证识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__HKMacauExitentrypermitUrl, data)
-
- def taiwanExitentrypermit(self, image, options=None):
- """
- 台湾通行证识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__taiwanExitentrypermitUrl, data)
-
- def birthCertificate(self, image, options=None):
- """
- 出生医学证明识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__birthCertificateUrl, data)
-
- def vehicleInvoice(self, image, options=None):
- """
- 机动车销售发票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__vehicleInvoiceUrl, data)
-
- def vehicleCertificate(self, image, options=None):
- """
- 车辆合格证识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__vehicleCertificateUrl, data)
-
- def invoice(self, image, options=None):
- """
- 税务局通用机打发票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__invoiceUrl, data)
-
- def airTicket(self, image, options=None):
- """
- 行程单识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__airTicketUrl, data)
-
- def insuranceDocuments(self, image, options=None):
- """
- 保单识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__insuranceDocumentsUrl, data)
-
- def vatInvoice(self, image, options=None):
- """
- 增值税发票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__vatInvoiceUrl, data)
-
- def qrcode(self, image, options=None):
- """
- 二维码识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__qrcodeUrl, data)
-
- def numbers(self, image, options=None):
- """
- 数字识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__numbersUrl, data)
-
- def lottery(self, image, options=None):
- """
- 彩票识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__lotteryUrl, data)
-
- def passport(self, image, options=None):
- """
- 护照识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__passportUrl, data)
-
- def businessCard(self, image, options=None):
- """
- 名片识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__businessCardUrl, data)
-
- def handwriting(self, image, options=None):
- """
- 手写文字识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__handwritingUrl, data)
-
- def custom(self, image, options=None):
- """
- 自定义模板文字识别
- """
- options = options or {}
- data = {}
- data['image'] = base64.b64encode(image).decode()
- data.update(options)
- return self._request(self.__customUrl, data)
-
- def tableRecognition(self, image, options=None, timeout=10000):
- """
- tableRecognition
- """
-
- result = self.tableRecognitionAsync(image)
- if 'error_code' in result:
- return result
-
- requestId = result['result'][0]['request_id']
- for i in range(int(math.ceil(timeout / 1000.0))):
- result = self.getTableRecognitionResult(requestId, options)
-
- # 完成
- if int(result['result']['ret_code']) == 3:
- break
- time.sleep(1)
- return result
|