# -*- coding: utf-8 -*- # !/usr/bin/env python import os, sys,time,datetime import urllib import requests from mongoengine import register_connection, PointField, DynamicDocument, StringField import simplejson as json import base64 import sys import json import base64 import urllib2 from urllib import quote_plus from urllib2 import urlopen from urllib2 import Request from urllib2 import URLError from urllib import urlencode from django.db.models.fields import DateTimeField PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..') sys.path.insert(0, PROJECT_ROOT) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing") from script.base import init_env init_env(interactive = False) from apps.web.core.db import Searchable # 防止https证书校验不正确 import ssl register_connection(alias = 'spider', name = 'spider', host = '116.62.228.194', port = 27017, username = 'dba', password = 'dayuan@2020..', authentication_source = 'admin') class tiantianPort(Searchable): addr = StringField(default = '') portId = StringField(default = '') meta = { 'collection': 'tiantian_port', 'db_alias': 'spider', 'unique_together': {'portId'} } IS_PY3 = sys.version_info.major == 3 if IS_PY3: from urllib.request import urlopen from urllib.request import Request from urllib.error import URLError from urllib.parse import urlencode from urllib.parse import quote_plus else: import urllib2 from urllib import quote_plus from urllib2 import urlopen from urllib2 import Request from urllib2 import URLError from urllib import urlencode ssl._create_default_https_context = ssl._create_unverified_context # 利用百度APP,直接解析截图中的地址,以及端口编号。 API_KEY = 'OVcN78LP40CBEwWk5REF2Hyu' SECRET_KEY = 'a7luZBdbzjsfU9oE2GD3yPeTBgPty03t' OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token' def fetch_token(): params = {'grant_type': 'client_credentials', 'client_id': API_KEY, 'client_secret': SECRET_KEY} post_data = urlencode(params) if (IS_PY3): post_data = post_data.encode('utf-8') req = Request(TOKEN_URL, post_data) try: f = urlopen(req, timeout=5) result_str = f.read() except URLError as err: print(err) if (IS_PY3): result_str = result_str.decode() result = json.loads(result_str) if ('access_token' in result.keys() and 'scope' in result.keys()): if not 'brain_all_scope' in result['scope'].split(' '): print ('please ensure has check the ability') exit() return result['access_token'] else: print ('please overwrite the correct API_KEY and SECRET_KEY') exit() request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" # accurate_basic def read_file(image_path): f = None try: f = open(image_path, 'rb') return f.read() except: print('read image file fail') return None finally: if f: f.close() def request(url, data): req = Request(url, data.encode('utf-8')) has_error = False try: f = urlopen(req) result_str = f.read() if (IS_PY3): result_str = result_str.decode() return result_str except URLError as err: print(err) # 二进制方式打开图片文件 # 获取access token token = fetch_token() # 拼接通用文字识别高精度url # rootdir = u'F:/蔚景云截图/' listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件 resultList = [] for i in range(0, len(listFile)): print i try: path = os.path.join(rootdir, listFile[i]) f = open(path, 'rb') img = base64.b64encode(f.read()) if f: f.close() # 调用文字识别服务 params = {"image":img} access_token = token request_url = request_url + "?access_token=" + token headers = {'content-type': 'application/x-www-form-urlencoded'} response = requests.post(request_url, data=params, headers=headers) # if response: # print (response.json()) # 解析返回结果 result_json = response.json() addr = '' for words_result in result_json["words_result"]: text = words_result["words"] if len(text) < 7: continue if u'度起' in text or u'共' in text or 'km' in text or u'搜索' in text or u'闲' in text or u'VIP' in text or u'停车' in text or u'度' in text or '.' in text: continue resultList.append(text) except Exception: continue resultList = list(set(resultList)) for result in resultList: print result # 打印文字 print('OK')