# -*- coding: utf-8 -*- # !/usr/bin/env python import os, sys,time,datetime import urllib import requests from mongoengine import register_connection, PointField, DynamicDocument, StringField import simplejson as json from django.db.models.fields import DateTimeField PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..') sys.path.insert(0, PROJECT_ROOT) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing") from script.base import init_env init_env(interactive = False) from apps.web.core.db import Searchable import pyautogui register_connection(alias = 'spider', name = 'spider', host = '116.62.228.194', port = 27017, username = 'dba', password = 'dayuan@2020..', authentication_source = 'admin') class xujiuDevice(Searchable): meta = { 'collection': 'xujiu_device', 'db_alias': 'spider', 'unique_together': {'device_id'} } rootdir = u'F:/爬虫相关/蓄久分析/all2' listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件 for i in range(0, len(listFile)): print i try: path = os.path.join(rootdir, listFile[i]) with open(path, 'rt') as f: raw_data = ''.join(f.readlines()) data = json.loads(raw_data) dataList = data['data'] for dev in dataList: try: xujiuDevice.get_collection().update({'device_id':dev['device_id']},{'$set':dev},upsert = True) except Exception,e: continue except Exception,e: continue print 'OK'