123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- # -*- coding: utf-8 -*-
- # !/usr/bin/env python
- """
- 删除已经迁移到历史数据库的数据.date作为过滤条件
- """
- import getopt
- import os
- import sys
- try:
- options, args = getopt.getopt(sys.argv[1:], 'l:e:y:m:o:c:',
- ['log=', 'env=', 'year=', 'month=', 'model=', 'check='])
- except getopt.GetoptError as e:
- print(str(e))
- sys.exit()
- log_file = None
- platform_env = 'testing'
- year = None
- month = None
- model_name = None
- check = True
- for name, value in options:
- if name in ('-l', '--log'):
- log_file = value
- if name in ('-e', '--env'):
- platform_env = value
- if name in ('-y', '--year'):
- year = int(value)
- if name in ('-m', '--month'):
- month = int(value)
- if name in ('-o', '--model'):
- model_name = value
- if name in ('-c', '--check'):
- check = True if value in ['y', 'Y'] else False
- os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'configs.{env}'.format(env = platform_env))
- PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
- sys.path.insert(0, PROJECT_ROOT)
- from script.base import init_env, setup_logger, get_logger
- init_env(interactive = False)
- if log_file:
- logger = setup_logger(filename = log_file, namespace = __name__)
- else:
- logger = get_logger(__name__)
- from apps.web.models import ArchivedModelProxyConfig
- from apilib.utils_mongo import BulkHandlerEx
- item = ArchivedModelProxyConfig.objects(model = model_name).first() # type: ArchivedModelProxyConfig
- if not item:
- item = ArchivedModelProxyConfig.objects(model = 'default').first() # type: ArchivedModelProxyConfig
- his_data_line = '-'.join(item.startDay.split('-')[0:2])
- print his_data_line
- if not year or not month or not model_name:
- logger.error('year or month or model is not valid.')
- sys.exit(1)
- start_day = '%04d-%02d-%02d' % (year, month, 1)
- if month < 12:
- next_year = year
- next_month = month + 1
- else:
- next_year = (year + 1)
- next_month = 1
- end_day = '%04d-%02d-%02d' % (next_year, next_month, 1)
- print start_day
- print end_day
- if check and '%04d-%02d' % (year, month) >= his_data_line:
- logger.error('{} >= {}'.format('%04d-%02d' % (year, month), his_data_line))
- sys.exit(1)
- delete_id_list = []
- from script.db import MODEL_MAP
- model_class = MODEL_MAP.get(model_name)
- items = model_class.get_collection().find({
- 'date': {'$gte': start_day, '$lt': end_day}
- }, {'_id': 1}).batch_size(50000).limit(1000000)
- for item in items:
- print item['_id']
- delete_id_list.append(item['_id'])
- logger.debug('try to delete {} items.'.format(len(delete_id_list)))
- bulker = BulkHandlerEx(model_class.get_collection()) # type: BulkHandlerEx
- for _id in delete_id_list:
- bulker.delete(query_dict = {'_id': _id})
- if len(bulker.requests) >= 2000:
- bulker.execute()
- bulker = BulkHandlerEx(model_class.get_collection()) # type: BulkHandlerEx
- if len(bulker.requests) > 0:
- bulker.execute()
- bulker = None
|