_diffcommand.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import optparse
  2. import sys
  3. import re
  4. import os
  5. from lxml.html.diff import htmldiff
  6. description = """\
  7. """
  8. parser = optparse.OptionParser(
  9. usage="%prog [OPTIONS] FILE1 FILE2\n"
  10. "%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...",
  11. description=description,
  12. )
  13. parser.add_option(
  14. '-o', '--output',
  15. metavar="FILE",
  16. dest="output",
  17. default="-",
  18. help="File to write the difference to",
  19. )
  20. parser.add_option(
  21. '-a', '--annotation',
  22. action="store_true",
  23. dest="annotation",
  24. help="Do an annotation")
  25. def main(args=None):
  26. if args is None:
  27. args = sys.argv[1:]
  28. options, args = parser.parse_args(args)
  29. if options.annotation:
  30. return annotate(options, args)
  31. if len(args) != 2:
  32. print('Error: you must give two files')
  33. parser.print_help()
  34. sys.exit(1)
  35. file1, file2 = args
  36. input1 = read_file(file1)
  37. input2 = read_file(file2)
  38. body1 = split_body(input1)[1]
  39. pre, body2, post = split_body(input2)
  40. result = htmldiff(body1, body2)
  41. result = pre + result + post
  42. if options.output == '-':
  43. if not result.endswith('\n'):
  44. result += '\n'
  45. sys.stdout.write(result)
  46. else:
  47. f = open(options.output, 'wb')
  48. f.write(result)
  49. f.close()
  50. def read_file(filename):
  51. if filename == '-':
  52. c = sys.stdin.read()
  53. elif not os.path.exists(filename):
  54. raise OSError(
  55. "Input file %s does not exist" % filename)
  56. else:
  57. f = open(filename, 'rb')
  58. c = f.read()
  59. f.close()
  60. return c
  61. body_start_re = re.compile(
  62. r"<body.*?>", re.I|re.S)
  63. body_end_re = re.compile(
  64. r"</body.*?>", re.I|re.S)
  65. def split_body(html):
  66. match = body_start_re.search(html)
  67. if match:
  68. pre = html[:match.end()]
  69. html = html[match.end():]
  70. match = body_end_re.search(html)
  71. if match:
  72. post = html[match.start():]
  73. html = html[:match.start()]
  74. return pre, html, post
  75. def annotate(options, args):
  76. print("Not yet implemented")
  77. sys.exit(1)