dayuan
/
manyi


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
							# -*- coding: utf-8 -*-
# !/usr/bin/env python

import os, sys,time,datetime
import urllib
import requests
from mongoengine import register_connection, PointField, DynamicDocument, StringField
import simplejson as json
import base64
import sys
import json
import base64
import urllib2
from urllib import quote_plus
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import URLError
from urllib import urlencode

from django.db.models.fields import DateTimeField

import shutil

PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
sys.path.insert(0, PROJECT_ROOT)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")

from script.base import init_env

init_env(interactive = False)

from apps.web.core.db import Searchable


# 防止https证书校验不正确
import ssl

register_connection(alias = 'spider',
                    name = 'spider',
                    host = '211.159.224.10',
                    port = 27017,
                    username = 'dba',
                    password = 'dayuan@2020..',
                    authentication_source = 'admin')


class xingxingStation(Searchable):    
    name = StringField(default = '')
    servicePhone = StringField(default = '')
    company = StringField(default = '')
    
    
    meta = {
        'collection': 'xingxing_station',
        'db_alias': 'spider',
        'unique_together': {'name'}
    }

IS_PY3 = sys.version_info.major == 3
if IS_PY3:
    from urllib.request import urlopen
    from urllib.request import Request
    from urllib.error import URLError
    from urllib.parse import urlencode
    from urllib.parse import quote_plus
else:
    import urllib2
    from urllib import quote_plus
    from urllib2 import urlopen
    from urllib2 import Request
    from urllib2 import URLError
    from urllib import urlencode
    
ssl._create_default_https_context = ssl._create_unverified_context

# 利用百度APP，直接解析截图中的地址，以及端口编号。
API_KEY = 'OVcN78LP40CBEwWk5REF2Hyu'
SECRET_KEY = 'a7luZBdbzjsfU9oE2GD3yPeTBgPty03t'
OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'

def fetch_token():
    params = {'grant_type': 'client_credentials',
              'client_id': API_KEY,
              'client_secret': SECRET_KEY}
    post_data = urlencode(params)
    if (IS_PY3):
        post_data = post_data.encode('utf-8')
    req = Request(TOKEN_URL, post_data)
    try:
        f = urlopen(req, timeout=5)
        result_str = f.read()
    except URLError as err:
        print(err)
    
    if (IS_PY3):
        result_str = result_str.decode()
        
    result = json.loads(result_str)

    if ('access_token' in result.keys() and 'scope' in result.keys()):
        if not 'brain_all_scope' in result['scope'].split(' '):
            print ('please ensure has check the  ability')
            exit()
        return result['access_token']
    else:
        print ('please overwrite the correct API_KEY and SECRET_KEY')
        exit()
        
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" # accurate_basic

def read_file(image_path):
    f = None
    try:
        f = open(image_path, 'rb')
        return f.read()
    except:
        print('read image file fail')
        return None
    finally:
        if f:
            f.close()
            
def request(url, data):
    req = Request(url, data.encode('utf-8'))
    has_error = False
    try:
        f = urlopen(req)
        result_str = f.read()
        if (IS_PY3):
            result_str = result_str.decode()
        return result_str
    except  URLError as err:
        print(err)
        
# 二进制方式打开图片文件
# 获取access token
token = fetch_token()
# 拼接通用文字识别高精度url
#
rootdir = u'Q:/友商信息/汽车桩/星星充电/站列表截图'
listFile = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
resultList = []
for i in range(0, len(listFile)):
    print i
    try:
        path = os.path.join(rootdir, listFile[i])
        f = open(path, 'rb')
        img = base64.b64encode(f.read())
        if f:
            f.close()
        # 调用文字识别服务
        params = {"image":img}
        access_token = token
        request_url1 = request_url + "?access_token=" + token
        headers = {'content-type': 'application/x-www-form-urlencoded'}
        response = requests.post(request_url1, data=params, headers=headers)
#         if response:
#             print (response.json())
    
        # 解析返回结果
        result_json = response.json()
        
        needCopy = False 
        for words_result in result_json["words_result"]:
            text = words_result["words"]
            
            if u'他营' in text or u'联营' in text:
                needCopy = True
        
        if needCopy:
            print 'get one',i 
            shutil.copyfile(rootdir+'/' + listFile[i], 'Q:/友商信息/汽车桩/星星充电/他营/%s' % listFile[i])
    except Exception,e:
        continue
    
resultList = list(set(resultList))
for result in resultList:
    print result    

# 打印文字
print('OK')