dayuan
/
manyi


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
							# -*- coding: utf-8 -*-
# !/usr/bin/env python

import os, sys,time,datetime
import urllib
import requests
from mongoengine import register_connection, PointField, DynamicDocument, StringField
import simplejson as json
import base64
import sys
import json
import base64
import urllib2
from urllib import quote_plus
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import URLError
from urllib import urlencode

from django.db.models.fields import DateTimeField


PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
sys.path.insert(0, PROJECT_ROOT)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")

from script.base import init_env

init_env(interactive = False)

from apps.web.core.db import Searchable


# 防止https证书校验不正确
import ssl

register_connection(alias = 'spider',
                    name = 'spider',
                    host = '116.62.228.194',
                    port = 27017,
                    username = 'dba',
                    password = 'dayuan@2020..',
                    authentication_source = 'admin')


class tiantianPort(Searchable):    
    addr = StringField(default = '')
    portId = StringField(default = '')
    
    meta = {
        'collection': 'tiantian_port',
        'db_alias': 'spider',
        'unique_together': {'portId'}
    }

IS_PY3 = sys.version_info.major == 3
if IS_PY3:
    from urllib.request import urlopen
    from urllib.request import Request
    from urllib.error import URLError
    from urllib.parse import urlencode
    from urllib.parse import quote_plus
else:
    import urllib2
    from urllib import quote_plus
    from urllib2 import urlopen
    from urllib2 import Request
    from urllib2 import URLError
    from urllib import urlencode
    
ssl._create_default_https_context = ssl._create_unverified_context

# 利用百度APP，直接解析截图中的地址，以及端口编号。
API_KEY = 'OVcN78LP40CBEwWk5REF2Hyu'
SECRET_KEY = 'a7luZBdbzjsfU9oE2GD3yPeTBgPty03t'
OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'

def fetch_token():
    params = {'grant_type': 'client_credentials',
              'client_id': API_KEY,
              'client_secret': SECRET_KEY}
    post_data = urlencode(params)
    if (IS_PY3):
        post_data = post_data.encode('utf-8')
    req = Request(TOKEN_URL, post_data)
    try:
        f = urlopen(req, timeout=5)
        result_str = f.read()
    except URLError as err:
        print(err)
    
    if (IS_PY3):
        result_str = result_str.decode()
        
    result = json.loads(result_str)

    if ('access_token' in result.keys() and 'scope' in result.keys()):
        if not 'brain_all_scope' in result['scope'].split(' '):
            print ('please ensure has check the  ability')
            exit()
        return result['access_token']
    else:
        print ('please overwrite the correct API_KEY and SECRET_KEY')
        exit()
        
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" # accurate_basic

def read_file(image_path):
    f = None
    try:
        f = open(image_path, 'rb')
        return f.read()
    except:
        print('read image file fail')
        return None
    finally:
        if f:
            f.close()
            
def request(url, data):
    req = Request(url, data.encode('utf-8'))
    has_error = False
    try:
        f = urlopen(req)
        result_str = f.read()
        if (IS_PY3):
            result_str = result_str.decode()
        return result_str
    except  URLError as err:
        print(err)
        
# 二进制方式打开图片文件
# 获取access token
token = fetch_token()
# 拼接通用文字识别高精度url
#
rootdir = u'F:/蔚景云截图/'
listFile = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
resultList = []
for i in range(0, len(listFile)):
    print i
    try:
        path = os.path.join(rootdir, listFile[i])
        f = open(path, 'rb')
        img = base64.b64encode(f.read())
        if f:
            f.close()
        # 调用文字识别服务
        params = {"image":img}
        access_token = token
        request_url = request_url + "?access_token=" + token
        headers = {'content-type': 'application/x-www-form-urlencoded'}
        response = requests.post(request_url, data=params, headers=headers)
#         if response:
#             print (response.json())
    
        # 解析返回结果
        result_json = response.json()
        
        addr = ''
        for words_result in result_json["words_result"]:
            text = words_result["words"]
            if len(text) < 7:
                continue
            if u'度起' in text or u'共' in text or 'km' in text or u'搜索' in text or u'闲' in text or u'VIP' in text or u'停车' in text or u'度' in text or '.' in text:
                continue
            
            resultList.append(text)
    except Exception:
        continue
    
resultList = list(set(resultList))
for result in resultList:
    print result    

# 打印文字
print('OK')