123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- # -*- coding: utf-8 -*-
- # !/usr/bin/env python
- import os, sys,time,datetime
- import urllib
- import requests
- from mongoengine import register_connection, PointField, DynamicDocument, StringField
- import simplejson as json
- from django.db.models.fields import DateTimeField
- PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
- sys.path.insert(0, PROJECT_ROOT)
- os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
- from script.base import init_env
- init_env(interactive = False)
- from apps.web.core.db import Searchable
- import pyautogui
- register_connection(alias = 'spider',
- name = 'spider',
- host = '116.62.228.194',
- port = 27017,
- username = 'dba',
- password = 'dayuan@2020..',
- authentication_source = 'admin')
- class xujiuDevice(Searchable):
- meta = {
- 'collection': 'xujiu_device',
- 'db_alias': 'spider',
- 'unique_together': {'device_id'}
- }
- rootdir = u'F:/爬虫相关/蓄久分析/all2'
- listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件
- for i in range(0, len(listFile)):
- print i
- try:
- path = os.path.join(rootdir, listFile[i])
- with open(path, 'rt') as f:
- raw_data = ''.join(f.readlines())
- data = json.loads(raw_data)
- dataList = data['data']
- for dev in dataList:
- try:
- xujiuDevice.get_collection().update({'device_id':dev['device_id']},{'$set':dev},upsert = True)
- except Exception,e:
- continue
- except Exception,e:
- continue
- print 'OK'
|