# -*- coding: utf-8 -*- # !/usr/bin/env python """ 删除已经迁移到历史数据库的数据.date作为过滤条件 """ import getopt import os import sys try: options, args = getopt.getopt(sys.argv[1:], 'l:e:y:m:o:c:', ['log=', 'env=', 'year=', 'month=', 'model=', 'check=']) except getopt.GetoptError as e: print(str(e)) sys.exit() log_file = None platform_env = 'testing' year = None month = None model_name = None check = True for name, value in options: if name in ('-l', '--log'): log_file = value if name in ('-e', '--env'): platform_env = value if name in ('-y', '--year'): year = int(value) if name in ('-m', '--month'): month = int(value) if name in ('-o', '--model'): model_name = value if name in ('-c', '--check'): check = True if value in ['y', 'Y'] else False os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'configs.{env}'.format(env = platform_env)) PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..') sys.path.insert(0, PROJECT_ROOT) from script.base import init_env, setup_logger, get_logger init_env(interactive = False) if log_file: logger = setup_logger(filename = log_file, namespace = __name__) else: logger = get_logger(__name__) from apps.web.models import ArchivedModelProxyConfig from apilib.utils_mongo import BulkHandlerEx item = ArchivedModelProxyConfig.objects(model = model_name).first() # type: ArchivedModelProxyConfig if not item: item = ArchivedModelProxyConfig.objects(model = 'default').first() # type: ArchivedModelProxyConfig his_data_line = '-'.join(item.startDay.split('-')[0:2]) print his_data_line if not year or not month or not model_name: logger.error('year or month or model is not valid.') sys.exit(1) start_day = '%04d-%02d-%02d' % (year, month, 1) if month < 12: next_year = year next_month = month + 1 else: next_year = (year + 1) next_month = 1 end_day = '%04d-%02d-%02d' % (next_year, next_month, 1) print start_day print end_day if check and '%04d-%02d' % (year, month) >= his_data_line: logger.error('{} >= {}'.format('%04d-%02d' % (year, month), his_data_line)) sys.exit(1) delete_id_list = [] from script.db import MODEL_MAP model_class = MODEL_MAP.get(model_name) items = model_class.get_collection().find({ 'date': {'$gte': start_day, '$lt': end_day} }, {'_id': 1}).batch_size(50000).limit(1000000) for item in items: print item['_id'] delete_id_list.append(item['_id']) logger.debug('try to delete {} items.'.format(len(delete_id_list))) bulker = BulkHandlerEx(model_class.get_collection()) # type: BulkHandlerEx for _id in delete_id_list: bulker.delete(query_dict = {'_id': _id}) if len(bulker.requests) >= 2000: bulker.execute() bulker = BulkHandlerEx(model_class.get_collection()) # type: BulkHandlerEx if len(bulker.requests) > 0: bulker.execute() bulker = None