ascii85.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python
  2. """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
  3. This code is in the public domain.
  4. """
  5. import re
  6. import struct
  7. # ascii85decode(data)
  8. def ascii85decode(data):
  9. """
  10. In ASCII85 encoding, every four bytes are encoded with five ASCII
  11. letters, using 85 different types of characters (as 256**4 < 85**5).
  12. When the length of the original bytes is not a multiple of 4, a special
  13. rule is used for round up.
  14. The Adobe's ASCII85 implementation is slightly different from
  15. its original in handling the last characters.
  16. The sample string is taken from:
  17. http://en.wikipedia.org/w/index.php?title=Ascii85
  18. >>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
  19. 'Man is distinguished'
  20. >>> ascii85decode('E,9)oF*2M7/c~>')
  21. 'pleasure.'
  22. """
  23. n = b = 0
  24. out = ''
  25. for c in data:
  26. if '!' <= c and c <= 'u':
  27. n += 1
  28. b = b*85+(ord(c)-33)
  29. if n == 5:
  30. out += struct.pack('>L', b)
  31. n = b = 0
  32. elif c == 'z':
  33. assert n == 0
  34. out += '\0\0\0\0'
  35. elif c == '~':
  36. if n:
  37. for _ in range(5-n):
  38. b = b*85+84
  39. out += struct.pack('>L', b)[:n-1]
  40. break
  41. return out
  42. # asciihexdecode(data)
  43. hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE)
  44. trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
  45. def asciihexdecode(data):
  46. """
  47. ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
  48. For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
  49. ASCIIHexDecode filter produces one byte of binary data. All white-space
  50. characters are ignored. A right angle bracket character (>) indicates
  51. EOD. Any other characters will cause an error. If the filter encounters
  52. the EOD marker after reading an odd number of hexadecimal digits, it
  53. will behave as if a 0 followed the last digit.
  54. >>> asciihexdecode('61 62 2e6364 65')
  55. 'ab.cde'
  56. >>> asciihexdecode('61 62 2e6364 657>')
  57. 'ab.cdep'
  58. >>> asciihexdecode('7>')
  59. 'p'
  60. """
  61. decode = (lambda hx: chr(int(hx, 16)))
  62. out = map(decode, hex_re.findall(data))
  63. m = trail_re.search(data)
  64. if m:
  65. out.append(decode("%c0" % m.group(1)))
  66. return ''.join(out)
  67. if __name__ == '__main__':
  68. import doctest
  69. doctest.testmod()