# -*- coding: utf-8 -*- # !/usr/bin/env python import os, sys,time,datetime import urllib import requests from mongoengine import register_connection, PointField, DynamicDocument, StringField import simplejson as json from django.db.models.fields import DateTimeField PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..') sys.path.insert(0, PROJECT_ROOT) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing") from script.base import init_env init_env(interactive = False) from apps.web.core.db import Searchable register_connection(alias = 'spider', name = 'spider', host = '116.62.228.194', port = 27017, username = 'dba', password = 'dayuan@2020..', authentication_source = 'admin') def get_shops_info(lat,lng): url = 'https://u.zhinengxiyifang.cn/api/Stores/findNearStoreInc' payload = {"lat":lat,"lont":lng,"skip":0,"limit":10} # url = 'https://www.baidu.com' # payload = {} proxies = {'http':'http://140.246.89.239:19382','https':'https://140.246.89.239:19382'} strhtml = requests.post(url, json = payload,proxies = proxies,timeout = 15).text result = json.loads(strhtml) return result class School(Searchable): province = StringField(default = "") city = StringField(default = "") name = StringField(default = '') lat = StringField(default = '') lng = StringField(default = '') meta = { 'collection': 'School', 'db_alias': 'spider', 'unique_together': {'lat', 'lng'} } class ujing(Searchable): meta = { 'collection': 'ujing', 'db_alias': 'spider', 'unique_together': {'_id'} } class spiderLog(Searchable): lat = StringField(default = '') lng = StringField(default = '') addedTime = DateTimeField(default = datetime.datetime.now()) meta = { 'collection': 'spiderLog', 'db_alias': 'spider', 'unique_together': {'lat','lng'} } def upsert_shops(shopsInfo): if not (shopsInfo.has_key('data') and shopsInfo['data'].has_key('storeList') and len(shopsInfo['data']['storeList']) > 0): return for shop in shopsInfo['data']['storeList']: try: ujing.get_collection().update({'_id':shop['_id']},{'$set':shop},upsert = True) except Exception,e: continue locations = School.get_collection().find() for loc in locations: try: count = spiderLog.objects.filter(lat = loc['lat'],lng = loc['lng']).count() if count > 0: continue shopsInfo = get_shops_info(loc['lat'], loc['lng']) spiderLog(lat = loc['lat'],lng = loc['lng']).save() print shopsInfo time.sleep(1) upsert_shops(shopsInfo) except Exception,e: print('error!!!!!!!!!!!!!!!!!!!',e) continue