unreferenced_files.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # -*- coding: utf-8 -*-
  2. import os
  3. from collections import defaultdict
  4. from django.apps import apps
  5. from django.conf import settings
  6. from django.core.management.base import BaseCommand, CommandError
  7. from django.db import models
  8. from django_extensions.management.utils import signalcommand
  9. class Command(BaseCommand):
  10. help = "Prints a list of all files in MEDIA_ROOT that are not referenced in the database."
  11. @signalcommand
  12. def handle(self, *args, **options):
  13. if not getattr(settings, 'MEDIA_ROOT'):
  14. raise CommandError("MEDIA_ROOT is not set, nothing to do")
  15. # Get a list of all files under MEDIA_ROOT
  16. media = set()
  17. for root, dirs, files in os.walk(settings.MEDIA_ROOT):
  18. for f in files:
  19. media.add(os.path.abspath(os.path.join(root, f)))
  20. # Get list of all fields (value) for each model (key)
  21. # that is a FileField or subclass of a FileField
  22. model_dict = defaultdict(list)
  23. for model in apps.get_models():
  24. for field in model._meta.fields:
  25. if issubclass(field.__class__, models.FileField):
  26. model_dict[model].append(field)
  27. # Get a list of all files referenced in the database
  28. referenced = set()
  29. for model in model_dict:
  30. all = model.objects.all().iterator()
  31. for object in all:
  32. for field in model_dict[model]:
  33. target_file = getattr(object, field.name)
  34. if target_file:
  35. referenced.add(os.path.abspath(target_file.path))
  36. # Print each file in MEDIA_ROOT that is not referenced in the database
  37. not_referenced = media - referenced
  38. for f in not_referenced:
  39. print(f)