weijingyun_pic_2_stationname.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. import os, sys,time,datetime
  4. import urllib
  5. import requests
  6. from mongoengine import register_connection, PointField, DynamicDocument, StringField
  7. import simplejson as json
  8. import base64
  9. import sys
  10. import json
  11. import base64
  12. import urllib2
  13. from urllib import quote_plus
  14. from urllib2 import urlopen
  15. from urllib2 import Request
  16. from urllib2 import URLError
  17. from urllib import urlencode
  18. from django.db.models.fields import DateTimeField
  19. PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
  20. sys.path.insert(0, PROJECT_ROOT)
  21. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
  22. from script.base import init_env
  23. init_env(interactive = False)
  24. from apps.web.core.db import Searchable
  25. # 防止https证书校验不正确
  26. import ssl
  27. register_connection(alias = 'spider',
  28. name = 'spider',
  29. host = '116.62.228.194',
  30. port = 27017,
  31. username = 'dba',
  32. password = 'dayuan@2020..',
  33. authentication_source = 'admin')
  34. class tiantianPort(Searchable):
  35. addr = StringField(default = '')
  36. portId = StringField(default = '')
  37. meta = {
  38. 'collection': 'tiantian_port',
  39. 'db_alias': 'spider',
  40. 'unique_together': {'portId'}
  41. }
  42. IS_PY3 = sys.version_info.major == 3
  43. if IS_PY3:
  44. from urllib.request import urlopen
  45. from urllib.request import Request
  46. from urllib.error import URLError
  47. from urllib.parse import urlencode
  48. from urllib.parse import quote_plus
  49. else:
  50. import urllib2
  51. from urllib import quote_plus
  52. from urllib2 import urlopen
  53. from urllib2 import Request
  54. from urllib2 import URLError
  55. from urllib import urlencode
  56. ssl._create_default_https_context = ssl._create_unverified_context
  57. # 利用百度APP,直接解析截图中的地址,以及端口编号。
  58. API_KEY = 'OVcN78LP40CBEwWk5REF2Hyu'
  59. SECRET_KEY = 'a7luZBdbzjsfU9oE2GD3yPeTBgPty03t'
  60. OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
  61. TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
  62. def fetch_token():
  63. params = {'grant_type': 'client_credentials',
  64. 'client_id': API_KEY,
  65. 'client_secret': SECRET_KEY}
  66. post_data = urlencode(params)
  67. if (IS_PY3):
  68. post_data = post_data.encode('utf-8')
  69. req = Request(TOKEN_URL, post_data)
  70. try:
  71. f = urlopen(req, timeout=5)
  72. result_str = f.read()
  73. except URLError as err:
  74. print(err)
  75. if (IS_PY3):
  76. result_str = result_str.decode()
  77. result = json.loads(result_str)
  78. if ('access_token' in result.keys() and 'scope' in result.keys()):
  79. if not 'brain_all_scope' in result['scope'].split(' '):
  80. print ('please ensure has check the ability')
  81. exit()
  82. return result['access_token']
  83. else:
  84. print ('please overwrite the correct API_KEY and SECRET_KEY')
  85. exit()
  86. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" # accurate_basic
  87. def read_file(image_path):
  88. f = None
  89. try:
  90. f = open(image_path, 'rb')
  91. return f.read()
  92. except:
  93. print('read image file fail')
  94. return None
  95. finally:
  96. if f:
  97. f.close()
  98. def request(url, data):
  99. req = Request(url, data.encode('utf-8'))
  100. has_error = False
  101. try:
  102. f = urlopen(req)
  103. result_str = f.read()
  104. if (IS_PY3):
  105. result_str = result_str.decode()
  106. return result_str
  107. except URLError as err:
  108. print(err)
  109. # 二进制方式打开图片文件
  110. # 获取access token
  111. token = fetch_token()
  112. # 拼接通用文字识别高精度url
  113. #
  114. rootdir = u'F:/蔚景云截图/'
  115. listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件
  116. resultList = []
  117. for i in range(0, len(listFile)):
  118. print i
  119. try:
  120. path = os.path.join(rootdir, listFile[i])
  121. f = open(path, 'rb')
  122. img = base64.b64encode(f.read())
  123. if f:
  124. f.close()
  125. # 调用文字识别服务
  126. params = {"image":img}
  127. access_token = token
  128. request_url = request_url + "?access_token=" + token
  129. headers = {'content-type': 'application/x-www-form-urlencoded'}
  130. response = requests.post(request_url, data=params, headers=headers)
  131. # if response:
  132. # print (response.json())
  133. # 解析返回结果
  134. result_json = response.json()
  135. addr = ''
  136. for words_result in result_json["words_result"]:
  137. text = words_result["words"]
  138. if len(text) < 7:
  139. continue
  140. if u'度起' in text or u'共' in text or 'km' in text or u'搜索' in text or u'闲' in text or u'VIP' in text or u'停车' in text or u'度' in text or '.' in text:
  141. continue
  142. resultList.append(text)
  143. except Exception:
  144. continue
  145. resultList = list(set(resultList))
  146. for result in resultList:
  147. print result
  148. # 打印文字
  149. print('OK')