123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- # -*- coding: utf-8 -*-
- # !/usr/bin/env python
- import os, sys,time,datetime
- import urllib
- import requests
- from mongoengine import register_connection, PointField, DynamicDocument, StringField
- import simplejson as json
- import base64
- import sys
- import json
- import base64
- import urllib2
- from urllib import quote_plus
- from urllib2 import urlopen
- from urllib2 import Request
- from urllib2 import URLError
- from urllib import urlencode
- from django.db.models.fields import DateTimeField
- PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
- sys.path.insert(0, PROJECT_ROOT)
- os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
- from script.base import init_env
- init_env(interactive = False)
- from apps.web.core.db import Searchable
- # 防止https证书校验不正确
- import ssl
- register_connection(alias = 'spider',
- name = 'spider',
- host = '116.62.228.194',
- port = 27017,
- username = 'dba',
- password = 'dayuan@2020..',
- authentication_source = 'admin')
- class tiantianPort(Searchable):
- addr = StringField(default = '')
- portId = StringField(default = '')
-
- meta = {
- 'collection': 'tiantian_port',
- 'db_alias': 'spider',
- 'unique_together': {'portId'}
- }
- IS_PY3 = sys.version_info.major == 3
- if IS_PY3:
- from urllib.request import urlopen
- from urllib.request import Request
- from urllib.error import URLError
- from urllib.parse import urlencode
- from urllib.parse import quote_plus
- else:
- import urllib2
- from urllib import quote_plus
- from urllib2 import urlopen
- from urllib2 import Request
- from urllib2 import URLError
- from urllib import urlencode
-
- ssl._create_default_https_context = ssl._create_unverified_context
- # 利用百度APP,直接解析截图中的地址,以及端口编号。
- API_KEY = 'OVcN78LP40CBEwWk5REF2Hyu'
- SECRET_KEY = 'a7luZBdbzjsfU9oE2GD3yPeTBgPty03t'
- OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
- TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
- def fetch_token():
- params = {'grant_type': 'client_credentials',
- 'client_id': API_KEY,
- 'client_secret': SECRET_KEY}
- post_data = urlencode(params)
- if (IS_PY3):
- post_data = post_data.encode('utf-8')
- req = Request(TOKEN_URL, post_data)
- try:
- f = urlopen(req, timeout=5)
- result_str = f.read()
- except URLError as err:
- print(err)
-
- if (IS_PY3):
- result_str = result_str.decode()
-
- result = json.loads(result_str)
- if ('access_token' in result.keys() and 'scope' in result.keys()):
- if not 'brain_all_scope' in result['scope'].split(' '):
- print ('please ensure has check the ability')
- exit()
- return result['access_token']
- else:
- print ('please overwrite the correct API_KEY and SECRET_KEY')
- exit()
-
- request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" # accurate_basic
- def read_file(image_path):
- f = None
- try:
- f = open(image_path, 'rb')
- return f.read()
- except:
- print('read image file fail')
- return None
- finally:
- if f:
- f.close()
-
- def request(url, data):
- req = Request(url, data.encode('utf-8'))
- has_error = False
- try:
- f = urlopen(req)
- result_str = f.read()
- if (IS_PY3):
- result_str = result_str.decode()
- return result_str
- except URLError as err:
- print(err)
-
- # 二进制方式打开图片文件
- # 获取access token
- token = fetch_token()
- # 拼接通用文字识别高精度url
- #
- rootdir = u'F:/蔚景云截图/'
- listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件
- resultList = []
- for i in range(0, len(listFile)):
- print i
- try:
- path = os.path.join(rootdir, listFile[i])
- f = open(path, 'rb')
- img = base64.b64encode(f.read())
- if f:
- f.close()
- # 调用文字识别服务
- params = {"image":img}
- access_token = token
- request_url = request_url + "?access_token=" + token
- headers = {'content-type': 'application/x-www-form-urlencoded'}
- response = requests.post(request_url, data=params, headers=headers)
- # if response:
- # print (response.json())
-
- # 解析返回结果
- result_json = response.json()
-
- addr = ''
- for words_result in result_json["words_result"]:
- text = words_result["words"]
- if len(text) < 7:
- continue
- if u'度起' in text or u'共' in text or 'km' in text or u'搜索' in text or u'闲' in text or u'VIP' in text or u'停车' in text or u'度' in text or '.' in text:
- continue
-
- resultList.append(text)
- except Exception:
- continue
-
- resultList = list(set(resultList))
- for result in resultList:
- print result
- # 打印文字
- print('OK')
|