123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- # -*- coding: utf-8 -*-
- # !/usr/bin/env python
- import os, sys
- import urllib
- import requests
- from mongoengine import register_connection, PointField, DynamicDocument, StringField
- import simplejson as json
- PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
- sys.path.insert(0, PROJECT_ROOT)
- os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
- from script.base import init_env
- init_env(interactive = False)
- CITY_MAP = {
- 'hf': u'合肥'
- }
- register_connection(alias = 'spider',
- name = 'spider',
- host = '127.0.0.1',
- port = 27017)
- class Village(DynamicDocument):
- city = StringField(default = "")
- area = StringField(default = "")
- name = StringField(default = '')
- type = StringField(default = "")
- address = StringField(default = "")
- location = PointField(default = None)
- alias = StringField(default = '')
- meta = {
- 'collection': 'village',
- 'db_alias': 'spider',
- 'unique_together': {'city', 'area', 'name'}
- }
- def get_location(name):
- url_template = 'https://apis.map.qq.com/jsapi?qt=geoc&addr={}&key=FBOBZ-VODWU-C7SVF-B2BDI-UK3JE-YBFUS&output=jsonp&pf=jsapi&ref=jsapi'
- url = url_template.format(urllib.quote(name))
- print url
- strhtml = requests.get(url, timeout = 15).text
- result = json.loads(strhtml)
- print result
- longitude = float(result['detail']['pointx'])
- latitude = float(result['detail']['pointy'])
- return {
- 'type': 'Point', 'coordinates': [longitude, latitude]
- }
- def get_location2(name):
- url_template = 'https://apis.map.qq.com/jsapi?qt=poi&wd={}&pn=0&rn=10&rich_source=qipao&rich=web&nj=0&c=1&key=FBOBZ-VODWU-C7SVF-B2BDI-UK3JE-YBFUS&pf=jsapi&ref=jsapi'
- url = url_template.format(name)
- strhtml = requests.get(url, timeout = 15).text
- result = json.loads(strhtml)
- try:
- poi = result['detail']['pois'][0]
- longitude = float(poi['pointx'])
- latitude = float(poi['pointy'])
- return {
- 'type': 'Point', 'coordinates': [longitude, latitude]
- }
- except Exception as e:
- print e.message
- print result
- print url.encode('utf-8')
- def spider_one_city(city, local_name):
- curr_page = 1
- total_page = 9999
- while curr_page < total_page:
- print 'curr = {}, total = {}'.format(curr_page, total_page)
- url = 'https://m.58.com/xiaoquweb/getXiaoquList/?city={city}&key=&page={page}&price=&sort=&completiontime=&latlon=&stationid='.format(
- city = city, page = curr_page)
- strhtml = requests.get(url, timeout = 15).text
- result = json.loads(strhtml)
- dto_page = result['data']['pageDTO']
- if int(dto_page['totalPage']) > total_page or total_page == 9999:
- total_page = int(dto_page['totalPage'])
- info_list = result['data']['infoList']
- for item in info_list:
- address = item['address']
- alias = item['alias']
- area = item['areaName']
- village_type = item['infoParamEntity']['map']['propertytype']
- name = item['name']
- try:
- village = Village(city = local_name, area = area, name = name, type = village_type, address = address,
- alias = alias)
- village.save()
- except Exception as e:
- print e.message
- curr_page = curr_page + 1
- try:
- spider_one_city('su', u'苏州')
- except Exception as e:
- print e.message
- items = Village.objects.filter(city = u'苏州', location = None)
- for item in items:
- try:
- find_name = u'{} {} {}'.format(item.city, item.area, item.name)
- item.location = get_location2(find_name.encode('utf8'))
- item.save()
- except Exception as e:
- print e.message
|