# -*- coding: utf-8 -*- # !/usr/bin/env python import os, sys,time,datetime import urllib, urllib2, sys import requests from mongoengine import register_connection, PointField, DynamicDocument, StringField import simplejson as json import ssl import xlrd from xlrd import xldate_as_tuple from collections import OrderedDict PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..') sys.path.insert(0, PROJECT_ROOT) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing") from script.base import init_env init_env(interactive = False) from apps.web.core.db import Searchable from apps.web.core.utils import generate_excel_report register_connection(alias = 'spider', name = 'spider', host = '211.159.224.10', port = 27119, username = 'service', password = 'oOzjoQcO5DyyiN97AY0NpzJ6vztjNpx5', authentication_source = 'admin') class weitiandiDevice(Searchable): meta = { 'collection': 'weitiandi_device', 'db_alias': 'spider', } # 先把所有的设备二维码编号爬下来 # page=0 # while True: # url = 'https://trade.api3.sdaascloud.com/trade/nearbyChargers?key=&page=%s&size=10&latitude=30.48029&longitude=114.42073&raidus=3000000000' % page # strhtml = requests.get(url,timeout = 15).text # result = json.loads(strhtml) # if result.has_key('data') and result['data'].has_key('content') and len(result['data']['content']) == 0: # break # for dev in result['data']['content']: # xzzDevice.get_collection().update({'qrId':dev['qrId']},{'$set':dev},upsert = True) # page += 1 # 根据二维码编号,获取设备更详细的信息 def get_tel_zone(tel): if not tel or len(tel) <= 7: return '','' tel = tel.replace('-','') tel = tel.replace(' ','') print tel host = 'https://api04.aliyun.venuscn.com' path = '/mobile' method = 'GET' appcode = '8296fcd952e34713ba91fbdceb13e915' querys = 'mobile=%s' % tel bodys = {} url = host + path + '?' + querys try: request = urllib2.Request(url) request.add_header('Authorization', 'APPCODE ' + appcode) ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE response = urllib2.urlopen(request, timeout = 15, context=ctx) content = response.read() if (content): result = json.loads(content) if result['msg'] != 'success': return '', '' return result['data']['prov'],result['data']['city'] except Exception,e: return '','' # ownerDict = {} # for shopInfo in pianyichongDevice.get_collection().find(): # if not shopInfo.has_key('manager'): # continue # if ownerDict.has_key(shopInfo['manager']): # ownerDict[shopInfo['manager']] += 1 # else: # ownerDict[shopInfo['manager']] = 1 # records = [] # ii = 0 # for rcd in shankailaidianDevice.get_collection().find({}): # ii += 1 # if ii % 100 == 0: # print ii # dataList = [ # (u'编号', str(rcd['vStationNo'])), # (u'总收入', float(rcd['dTotalIncome'])), # (u'地域ID', str(rcd['iAreaId'])), # (u'businessType', str(rcd['iBusinessType'])), # (u'iManagerId', str(rcd['iManagerId'])), # (u'添加时间', str(rcd['tAddTime'])), # (u'打开时间', str(rcd['tOpenTime'])), # (u'地址', str(rcd['vAddress'])), # (u'地域', str(rcd['vAreaName'])), # (u'电话', rcd['vHelpMobile'] if rcd['vHelpMobile'] != '' else rcd['vTelephone']), # # (u'充电站名称', str(rcd['vStationName'])), # ] # # records.append(OrderedDict(dataList)) # # generate_excel_report('F:/shankailaidian1.xlsx', records,True) records = [] owerDict = {} ii = 0 # for rcd in weitiandiDevice.get_collection().find({'sn':{'$gte':'GD1B90000'}}): # ii += 1 # if ii % 100 == 0: # print ii # dataList = [ # (u'编号', rcd['sn']), # (u'联系方式', rcd['mobile']), # ] # # records.append(OrderedDict(dataList)) # generate_excel_report('F:/weitiandi_4G.xlsx', records,True) for rcd in weitiandiDevice.get_collection().find({'spiderData':'0421'}): if rcd['mobile'] not in owerDict: owerDict[rcd['mobile']] = 1 else: owerDict[rcd['mobile']] += 1 for mobile,count in owerDict.items(): ii += 1 if ii % 100 == 0: print ii dataList = [ (u'联系方式', mobile), (u'个数', count), ] records.append(OrderedDict(dataList)) generate_excel_report('F:/weitiandi_4G_count2.xlsx', records,True) print('OK')