xingxing_pic_2_stationname.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. import os, sys,time,datetime
  4. import urllib
  5. import requests
  6. from mongoengine import register_connection, PointField, DynamicDocument, StringField
  7. import simplejson as json
  8. import base64
  9. import sys
  10. import json
  11. import base64
  12. import urllib2
  13. from urllib import quote_plus
  14. from urllib2 import urlopen
  15. from urllib2 import Request
  16. from urllib2 import URLError
  17. from urllib import urlencode
  18. from django.db.models.fields import DateTimeField
  19. import shutil
  20. PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
  21. sys.path.insert(0, PROJECT_ROOT)
  22. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
  23. from script.base import init_env
  24. init_env(interactive = False)
  25. from apps.web.core.db import Searchable
  26. # 防止https证书校验不正确
  27. import ssl
  28. register_connection(alias = 'spider',
  29. name = 'spider',
  30. host = '211.159.224.10',
  31. port = 27017,
  32. username = 'dba',
  33. password = 'dayuan@2020..',
  34. authentication_source = 'admin')
  35. class xingxingStation(Searchable):
  36. name = StringField(default = '')
  37. servicePhone = StringField(default = '')
  38. company = StringField(default = '')
  39. meta = {
  40. 'collection': 'xingxing_station',
  41. 'db_alias': 'spider',
  42. 'unique_together': {'name'}
  43. }
  44. IS_PY3 = sys.version_info.major == 3
  45. if IS_PY3:
  46. from urllib.request import urlopen
  47. from urllib.request import Request
  48. from urllib.error import URLError
  49. from urllib.parse import urlencode
  50. from urllib.parse import quote_plus
  51. else:
  52. import urllib2
  53. from urllib import quote_plus
  54. from urllib2 import urlopen
  55. from urllib2 import Request
  56. from urllib2 import URLError
  57. from urllib import urlencode
  58. ssl._create_default_https_context = ssl._create_unverified_context
  59. # 利用百度APP,直接解析截图中的地址,以及端口编号。
  60. API_KEY = 'OVcN78LP40CBEwWk5REF2Hyu'
  61. SECRET_KEY = 'a7luZBdbzjsfU9oE2GD3yPeTBgPty03t'
  62. OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
  63. TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
  64. def fetch_token():
  65. params = {'grant_type': 'client_credentials',
  66. 'client_id': API_KEY,
  67. 'client_secret': SECRET_KEY}
  68. post_data = urlencode(params)
  69. if (IS_PY3):
  70. post_data = post_data.encode('utf-8')
  71. req = Request(TOKEN_URL, post_data)
  72. try:
  73. f = urlopen(req, timeout=5)
  74. result_str = f.read()
  75. except URLError as err:
  76. print(err)
  77. if (IS_PY3):
  78. result_str = result_str.decode()
  79. result = json.loads(result_str)
  80. if ('access_token' in result.keys() and 'scope' in result.keys()):
  81. if not 'brain_all_scope' in result['scope'].split(' '):
  82. print ('please ensure has check the ability')
  83. exit()
  84. return result['access_token']
  85. else:
  86. print ('please overwrite the correct API_KEY and SECRET_KEY')
  87. exit()
  88. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" # accurate_basic
  89. def read_file(image_path):
  90. f = None
  91. try:
  92. f = open(image_path, 'rb')
  93. return f.read()
  94. except:
  95. print('read image file fail')
  96. return None
  97. finally:
  98. if f:
  99. f.close()
  100. def request(url, data):
  101. req = Request(url, data.encode('utf-8'))
  102. has_error = False
  103. try:
  104. f = urlopen(req)
  105. result_str = f.read()
  106. if (IS_PY3):
  107. result_str = result_str.decode()
  108. return result_str
  109. except URLError as err:
  110. print(err)
  111. # 二进制方式打开图片文件
  112. # 获取access token
  113. token = fetch_token()
  114. # 拼接通用文字识别高精度url
  115. #
  116. rootdir = u'Q:/友商信息/汽车桩/星星充电/站列表截图'
  117. listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件
  118. resultList = []
  119. for i in range(0, len(listFile)):
  120. print i
  121. try:
  122. path = os.path.join(rootdir, listFile[i])
  123. f = open(path, 'rb')
  124. img = base64.b64encode(f.read())
  125. if f:
  126. f.close()
  127. # 调用文字识别服务
  128. params = {"image":img}
  129. access_token = token
  130. request_url1 = request_url + "?access_token=" + token
  131. headers = {'content-type': 'application/x-www-form-urlencoded'}
  132. response = requests.post(request_url1, data=params, headers=headers)
  133. # if response:
  134. # print (response.json())
  135. # 解析返回结果
  136. result_json = response.json()
  137. needCopy = False
  138. for words_result in result_json["words_result"]:
  139. text = words_result["words"]
  140. if u'他营' in text or u'联营' in text:
  141. needCopy = True
  142. if needCopy:
  143. print 'get one',i
  144. shutil.copyfile(rootdir+'/' + listFile[i], 'Q:/友商信息/汽车桩/星星充电/他营/%s' % listFile[i])
  145. except Exception,e:
  146. continue
  147. resultList = list(set(resultList))
  148. for result in resultList:
  149. print result
  150. # 打印文字
  151. print('OK')