encodingdb.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env python
  2. import re
  3. from psparser import PSLiteral
  4. from glyphlist import glyphname2unicode
  5. from latin_enc import ENCODING
  6. STRIP_NAME = re.compile(r'[0-9]+')
  7. ## name2unicode
  8. ##
  9. def name2unicode(name):
  10. """Converts Adobe glyph names to Unicode numbers."""
  11. if name in glyphname2unicode:
  12. return glyphname2unicode[name]
  13. m = STRIP_NAME.search(name)
  14. if not m:
  15. raise KeyError(name)
  16. return unichr(int(m.group(0)))
  17. ## EncodingDB
  18. ##
  19. class EncodingDB(object):
  20. std2unicode = {}
  21. mac2unicode = {}
  22. win2unicode = {}
  23. pdf2unicode = {}
  24. for (name, std, mac, win, pdf) in ENCODING:
  25. c = name2unicode(name)
  26. if std:
  27. std2unicode[std] = c
  28. if mac:
  29. mac2unicode[mac] = c
  30. if win:
  31. win2unicode[win] = c
  32. if pdf:
  33. pdf2unicode[pdf] = c
  34. encodings = {
  35. 'StandardEncoding': std2unicode,
  36. 'MacRomanEncoding': mac2unicode,
  37. 'WinAnsiEncoding': win2unicode,
  38. 'PDFDocEncoding': pdf2unicode,
  39. }
  40. @classmethod
  41. def get_encoding(klass, name, diff=None):
  42. cid2unicode = klass.encodings.get(name, klass.std2unicode)
  43. if diff:
  44. cid2unicode = cid2unicode.copy()
  45. cid = 0
  46. for x in diff:
  47. if isinstance(x, int):
  48. cid = x
  49. elif isinstance(x, PSLiteral):
  50. try:
  51. cid2unicode[cid] = name2unicode(x.name)
  52. except KeyError:
  53. pass
  54. cid += 1
  55. return cid2unicode