pipeline.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # -*- coding: utf-8 -*-
  2. #!/usr/bin/env python
  3. import os
  4. import fnmatch
  5. import gzip
  6. import bz2
  7. import re
  8. def gen_find(filepat, top):
  9. """
  10. Find all filenames in a directory tree that match a shell wildcard pattern
  11. """
  12. for path, dirlist, filelist in os.walk(top):
  13. for name in fnmatch.filter(filelist, filepat):
  14. yield os.path.join(path,name)
  15. def gen_opener(filenames):
  16. """
  17. Open a sequence of filenames one at a time producing a file object.
  18. The file is closed immediately when proceeding to the next iteration.
  19. """
  20. for filename in filenames:
  21. if filename.endswith('.gz'):
  22. f = gzip.open(filename, 'rt')
  23. elif filename.endswith('.bz2'):
  24. f = bz2.open(filename, 'rt')
  25. else:
  26. f = open(filename, 'rt')
  27. yield f
  28. f.close()
  29. def gen_concatenate(iterators):
  30. """
  31. Chain a sequence of iterators together into a single sequence.
  32. """
  33. for it in iterators:
  34. for _ in it:
  35. yield _
  36. def gen_grep(pattern, lines):
  37. """
  38. Look for a regex pattern in a sequence of lines
  39. """
  40. pat = re.compile(pattern)
  41. for line in lines:
  42. if pat.search(line):
  43. yield line