123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- # -*- coding: utf-8 -*-
- # !/usr/bin/env python
- import os, sys,time,datetime
- import urllib
- import requests
- from mongoengine import register_connection, PointField, DynamicDocument, StringField
- import simplejson as json
- from django.db.models.fields import DateTimeField
- PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
- sys.path.insert(0, PROJECT_ROOT)
- os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
- from script.base import init_env
- init_env(interactive = False)
- from apps.web.core.db import Searchable
- register_connection(alias = 'spider',
- name = 'spider',
- host = '116.62.228.194',
- port = 27017,
- username = 'dba',
- password = 'dayuan@2020..',
- authentication_source = 'admin')
- def get_shops_info(lat,lng):
- url = 'https://u.zhinengxiyifang.cn/api/Stores/findNearStoreInc'
- payload = {"lat":lat,"lont":lng,"skip":0,"limit":10}
- # url = 'https://www.baidu.com'
- # payload = {}
- proxies = {'http':'http://140.246.89.239:19382','https':'https://140.246.89.239:19382'}
- strhtml = requests.post(url, json = payload,proxies = proxies,timeout = 15).text
- result = json.loads(strhtml)
- return result
- class School(Searchable):
- province = StringField(default = "")
- city = StringField(default = "")
- name = StringField(default = '')
- lat = StringField(default = '')
- lng = StringField(default = '')
-
- meta = {
- 'collection': 'School',
- 'db_alias': 'spider',
- 'unique_together': {'lat', 'lng'}
- }
- class ujing(Searchable):
- meta = {
- 'collection': 'ujing',
- 'db_alias': 'spider',
- 'unique_together': {'_id'}
- }
- class spiderLog(Searchable):
- lat = StringField(default = '')
- lng = StringField(default = '')
- addedTime = DateTimeField(default = datetime.datetime.now())
- meta = {
- 'collection': 'spiderLog',
- 'db_alias': 'spider',
- 'unique_together': {'lat','lng'}
- }
-
- def upsert_shops(shopsInfo):
- if not (shopsInfo.has_key('data') and shopsInfo['data'].has_key('storeList') and len(shopsInfo['data']['storeList']) > 0):
- return
- for shop in shopsInfo['data']['storeList']:
- try:
- ujing.get_collection().update({'_id':shop['_id']},{'$set':shop},upsert = True)
- except Exception,e:
- continue
-
- locations = School.get_collection().find()
- for loc in locations:
- try:
- count = spiderLog.objects.filter(lat = loc['lat'],lng = loc['lng']).count()
- if count > 0:
- continue
- shopsInfo = get_shops_info(loc['lat'], loc['lng'])
- spiderLog(lat = loc['lat'],lng = loc['lng']).save()
- print shopsInfo
- time.sleep(1)
- upsert_shops(shopsInfo)
- except Exception,e:
- print('error!!!!!!!!!!!!!!!!!!!',e)
- continue
|