remove_old_data_by_date.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. """
  4. 删除已经迁移到历史数据库的数据.date作为过滤条件
  5. """
  6. import getopt
  7. import os
  8. import sys
  9. try:
  10. options, args = getopt.getopt(sys.argv[1:], 'l:e:y:m:o:c:',
  11. ['log=', 'env=', 'year=', 'month=', 'model=', 'check='])
  12. except getopt.GetoptError as e:
  13. print(str(e))
  14. sys.exit()
  15. log_file = None
  16. platform_env = 'testing'
  17. year = None
  18. month = None
  19. model_name = None
  20. check = True
  21. for name, value in options:
  22. if name in ('-l', '--log'):
  23. log_file = value
  24. if name in ('-e', '--env'):
  25. platform_env = value
  26. if name in ('-y', '--year'):
  27. year = int(value)
  28. if name in ('-m', '--month'):
  29. month = int(value)
  30. if name in ('-o', '--model'):
  31. model_name = value
  32. if name in ('-c', '--check'):
  33. check = True if value in ['y', 'Y'] else False
  34. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'configs.{env}'.format(env = platform_env))
  35. PROJECT_ROOT = os.path.join(os.path.abspath(os.path.split(os.path.realpath(__file__))[0] + "/.."), '..')
  36. sys.path.insert(0, PROJECT_ROOT)
  37. from script.base import init_env, setup_logger, get_logger
  38. init_env(interactive = False)
  39. if log_file:
  40. logger = setup_logger(filename = log_file, namespace = __name__)
  41. else:
  42. logger = get_logger(__name__)
  43. from apps.web.models import ArchivedModelProxyConfig
  44. from apilib.utils_mongo import BulkHandlerEx
  45. item = ArchivedModelProxyConfig.objects(model = model_name).first() # type: ArchivedModelProxyConfig
  46. if not item:
  47. item = ArchivedModelProxyConfig.objects(model = 'default').first() # type: ArchivedModelProxyConfig
  48. his_data_line = '-'.join(item.startDay.split('-')[0:2])
  49. print his_data_line
  50. if not year or not month or not model_name:
  51. logger.error('year or month or model is not valid.')
  52. sys.exit(1)
  53. start_day = '%04d-%02d-%02d' % (year, month, 1)
  54. if month < 12:
  55. next_year = year
  56. next_month = month + 1
  57. else:
  58. next_year = (year + 1)
  59. next_month = 1
  60. end_day = '%04d-%02d-%02d' % (next_year, next_month, 1)
  61. print start_day
  62. print end_day
  63. if check and '%04d-%02d' % (year, month) >= his_data_line:
  64. logger.error('{} >= {}'.format('%04d-%02d' % (year, month), his_data_line))
  65. sys.exit(1)
  66. delete_id_list = []
  67. from script.db import MODEL_MAP
  68. model_class = MODEL_MAP.get(model_name)
  69. items = model_class.get_collection().find({
  70. 'date': {'$gte': start_day, '$lt': end_day}
  71. }, {'_id': 1}).batch_size(50000).limit(1000000)
  72. for item in items:
  73. print item['_id']
  74. delete_id_list.append(item['_id'])
  75. logger.debug('try to delete {} items.'.format(len(delete_id_list)))
  76. bulker = BulkHandlerEx(model_class.get_collection()) # type: BulkHandlerEx
  77. for _id in delete_id_list:
  78. bulker.delete(query_dict = {'_id': _id})
  79. if len(bulker.requests) >= 2000:
  80. bulker.execute()
  81. bulker = BulkHandlerEx(model_class.get_collection()) # type: BulkHandlerEx
  82. if len(bulker.requests) > 0:
  83. bulker.execute()
  84. bulker = None