58.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. import os, sys
  4. import urllib
  5. import requests
  6. from mongoengine import register_connection, PointField, DynamicDocument, StringField
  7. import simplejson as json
  8. PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
  9. sys.path.insert(0, PROJECT_ROOT)
  10. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
  11. from script.base import init_env
  12. init_env(interactive = False)
  13. CITY_MAP = {
  14. 'hf': u'合肥'
  15. }
  16. register_connection(alias = 'spider',
  17. name = 'spider',
  18. host = '127.0.0.1',
  19. port = 27017)
  20. class Village(DynamicDocument):
  21. city = StringField(default = "")
  22. area = StringField(default = "")
  23. name = StringField(default = '')
  24. type = StringField(default = "")
  25. address = StringField(default = "")
  26. location = PointField(default = None)
  27. alias = StringField(default = '')
  28. meta = {
  29. 'collection': 'village',
  30. 'db_alias': 'spider',
  31. 'unique_together': {'city', 'area', 'name'}
  32. }
  33. def get_location(name):
  34. url_template = 'https://apis.map.qq.com/jsapi?qt=geoc&addr={}&key=FBOBZ-VODWU-C7SVF-B2BDI-UK3JE-YBFUS&output=jsonp&pf=jsapi&ref=jsapi'
  35. url = url_template.format(urllib.quote(name))
  36. print url
  37. strhtml = requests.get(url, timeout = 15).text
  38. result = json.loads(strhtml)
  39. print result
  40. longitude = float(result['detail']['pointx'])
  41. latitude = float(result['detail']['pointy'])
  42. return {
  43. 'type': 'Point', 'coordinates': [longitude, latitude]
  44. }
  45. def get_location2(name):
  46. url_template = 'https://apis.map.qq.com/jsapi?qt=poi&wd={}&pn=0&rn=10&rich_source=qipao&rich=web&nj=0&c=1&key=FBOBZ-VODWU-C7SVF-B2BDI-UK3JE-YBFUS&pf=jsapi&ref=jsapi'
  47. url = url_template.format(name)
  48. strhtml = requests.get(url, timeout = 15).text
  49. result = json.loads(strhtml)
  50. try:
  51. poi = result['detail']['pois'][0]
  52. longitude = float(poi['pointx'])
  53. latitude = float(poi['pointy'])
  54. return {
  55. 'type': 'Point', 'coordinates': [longitude, latitude]
  56. }
  57. except Exception as e:
  58. print e.message
  59. print result
  60. print url.encode('utf-8')
  61. def spider_one_city(city, local_name):
  62. curr_page = 1
  63. total_page = 9999
  64. while curr_page < total_page:
  65. print 'curr = {}, total = {}'.format(curr_page, total_page)
  66. url = 'https://m.58.com/xiaoquweb/getXiaoquList/?city={city}&key=&page={page}&price=&sort=&completiontime=&latlon=&stationid='.format(
  67. city = city, page = curr_page)
  68. strhtml = requests.get(url, timeout = 15).text
  69. result = json.loads(strhtml)
  70. dto_page = result['data']['pageDTO']
  71. if int(dto_page['totalPage']) > total_page or total_page == 9999:
  72. total_page = int(dto_page['totalPage'])
  73. info_list = result['data']['infoList']
  74. for item in info_list:
  75. address = item['address']
  76. alias = item['alias']
  77. area = item['areaName']
  78. village_type = item['infoParamEntity']['map']['propertytype']
  79. name = item['name']
  80. try:
  81. village = Village(city = local_name, area = area, name = name, type = village_type, address = address,
  82. alias = alias)
  83. village.save()
  84. except Exception as e:
  85. print e.message
  86. curr_page = curr_page + 1
  87. try:
  88. spider_one_city('su', u'苏州')
  89. except Exception as e:
  90. print e.message
  91. items = Village.objects.filter(city = u'苏州', location = None)
  92. for item in items:
  93. try:
  94. find_name = u'{} {} {}'.format(item.city, item.area, item.name)
  95. item.location = get_location2(find_name.encode('utf8'))
  96. item.save()
  97. except Exception as e:
  98. print e.message