xujiu_batch_dev.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. import os, sys,time,datetime
  4. import urllib
  5. import requests
  6. from mongoengine import register_connection, PointField, DynamicDocument, StringField
  7. import simplejson as json
  8. from django.db.models.fields import DateTimeField
  9. PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
  10. sys.path.insert(0, PROJECT_ROOT)
  11. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "configs.testing")
  12. from script.base import init_env
  13. init_env(interactive = False)
  14. from apps.web.core.db import Searchable
  15. import pyautogui
  16. register_connection(alias = 'spider',
  17. name = 'spider',
  18. host = '116.62.228.194',
  19. port = 27017,
  20. username = 'dba',
  21. password = 'dayuan@2020..',
  22. authentication_source = 'admin')
  23. class xujiuDevice(Searchable):
  24. meta = {
  25. 'collection': 'xujiu_device',
  26. 'db_alias': 'spider',
  27. 'unique_together': {'device_id'}
  28. }
  29. rootdir = u'F:/爬虫相关/蓄久分析/all2'
  30. listFile = os.listdir(rootdir) # 列出文件夹下所有的目录与文件
  31. for i in range(0, len(listFile)):
  32. print i
  33. try:
  34. path = os.path.join(rootdir, listFile[i])
  35. with open(path, 'rt') as f:
  36. raw_data = ''.join(f.readlines())
  37. data = json.loads(raw_data)
  38. dataList = data['data']
  39. for dev in dataList:
  40. try:
  41. xujiuDevice.get_collection().update({'device_id':dev['device_id']},{'$set':dev},upsert = True)
  42. except Exception,e:
  43. continue
  44. except Exception,e:
  45. continue
  46. print 'OK'