| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 | # -*- coding: utf-8 -*-# !/usr/bin/env python"""删除已经迁移到历史数据库的数据.date作为过滤条件"""import getoptimport osimport systry:    options, args = getopt.getopt(sys.argv[1:], 'l:e:y:m:o:c:',                                  ['log=', 'env=', 'year=', 'month=', 'model=', 'check='])except getopt.GetoptError as e:    print(str(e))    sys.exit()log_file = Noneplatform_env = 'testing'year = Nonemonth = Nonemodel_name = Nonecheck = Truefor name, value in options:    if name in ('-l', '--log'):        log_file = value    if name in ('-e', '--env'):        platform_env = value    if name in ('-y', '--year'):        year = int(value)    if name in ('-m', '--month'):        month = int(value)    if name in ('-o', '--model'):        model_name = value    if name in ('-c', '--check'):        check = True if value in ['y', 'Y'] else Falseos.environ.setdefault('DJANGO_SETTINGS_MODULE', 'configs.{env}'.format(env = platform_env))PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')sys.path.insert(0, PROJECT_ROOT)from script.base import init_env, setup_logger, get_loggerinit_env(interactive = False)if log_file:    logger = setup_logger(filename = log_file, namespace = __name__)else:    logger = get_logger(__name__)from apps.web.models import ArchivedModelProxyConfigfrom apilib.utils_mongo import BulkHandlerExitem = ArchivedModelProxyConfig.objects(model = model_name).first()  # type: ArchivedModelProxyConfigif not item:    item = ArchivedModelProxyConfig.objects(model = 'default').first()  # type: ArchivedModelProxyConfighis_data_line = '-'.join(item.startDay.split('-')[0:2])print his_data_lineif not year or not month or not model_name:    logger.error('year or month or model is not valid.')    sys.exit(1)start_day = '%04d-%02d-%02d' % (year, month, 1)if month < 12:    next_year = year    next_month = month + 1else:    next_year = (year + 1)    next_month = 1end_day = '%04d-%02d-%02d' % (next_year, next_month, 1)print start_dayprint end_dayif check and '%04d-%02d' % (year, month) >= his_data_line:    logger.error('{} >= {}'.format('%04d-%02d' % (year, month), his_data_line))    sys.exit(1)delete_id_list = []from script.db import MODEL_MAPmodel_class = MODEL_MAP.get(model_name)items = model_class.get_collection().find({    'date': {'$gte': start_day, '$lt': end_day}}, {'_id': 1}).batch_size(50000).limit(1000000)for item in items:    print item['_id']    delete_id_list.append(item['_id'])logger.debug('try to delete {} items.'.format(len(delete_id_list)))bulker = BulkHandlerEx(model_class.get_collection())  # type: BulkHandlerExfor _id in delete_id_list:    bulker.delete(query_dict = {'_id': _id})    if len(bulker.requests) >= 2000:        bulker.execute()        bulker = BulkHandlerEx(model_class.get_collection())  # type: BulkHandlerExif len(bulker.requests) > 0:    bulker.execute()    bulker = None
 |