pdfinterp.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. #!/usr/bin/env python
  2. import sys
  3. import re
  4. try:
  5. from cStringIO import StringIO
  6. except ImportError:
  7. from StringIO import StringIO
  8. from cmapdb import CMapDB, CMap
  9. from psparser import PSTypeError, PSEOF
  10. from psparser import PSKeyword, literal_name, keyword_name
  11. from psparser import PSStackParser
  12. from psparser import LIT, KWD, STRICT
  13. from pdftypes import PDFException, PDFStream, PDFObjRef
  14. from pdftypes import resolve1
  15. from pdftypes import list_value, dict_value, stream_value
  16. from pdffont import PDFFontError
  17. from pdffont import PDFType1Font, PDFTrueTypeFont, PDFType3Font
  18. from pdffont import PDFCIDFont
  19. from pdfcolor import PDFColorSpace
  20. from pdfcolor import PREDEFINED_COLORSPACE
  21. from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
  22. from pdfcolor import LITERAL_DEVICE_CMYK
  23. from utils import choplist
  24. from utils import mult_matrix, MATRIX_IDENTITY
  25. ## Exceptions
  26. ##
  27. class PDFResourceError(PDFException):
  28. pass
  29. class PDFInterpreterError(PDFException):
  30. pass
  31. ## Constants
  32. ##
  33. LITERAL_PDF = LIT('PDF')
  34. LITERAL_TEXT = LIT('Text')
  35. LITERAL_FONT = LIT('Font')
  36. LITERAL_FORM = LIT('Form')
  37. LITERAL_IMAGE = LIT('Image')
  38. ## PDFTextState
  39. ##
  40. class PDFTextState(object):
  41. def __init__(self):
  42. self.font = None
  43. self.fontsize = 0
  44. self.charspace = 0
  45. self.wordspace = 0
  46. self.scaling = 100
  47. self.leading = 0
  48. self.render = 0
  49. self.rise = 0
  50. self.reset()
  51. # self.matrix is set
  52. # self.linematrix is set
  53. return
  54. def __repr__(self):
  55. return ('<PDFTextState: font=%r, fontsize=%r, charspace=%r, wordspace=%r, '
  56. ' scaling=%r, leading=%r, render=%r, rise=%r, '
  57. ' matrix=%r, linematrix=%r>' %
  58. (self.font, self.fontsize, self.charspace, self.wordspace,
  59. self.scaling, self.leading, self.render, self.rise,
  60. self.matrix, self.linematrix))
  61. def copy(self):
  62. obj = PDFTextState()
  63. obj.font = self.font
  64. obj.fontsize = self.fontsize
  65. obj.charspace = self.charspace
  66. obj.wordspace = self.wordspace
  67. obj.scaling = self.scaling
  68. obj.leading = self.leading
  69. obj.render = self.render
  70. obj.rise = self.rise
  71. obj.matrix = self.matrix
  72. obj.linematrix = self.linematrix
  73. return obj
  74. def reset(self):
  75. self.matrix = MATRIX_IDENTITY
  76. self.linematrix = (0, 0)
  77. return
  78. ## PDFGraphicState
  79. ##
  80. class PDFGraphicState(object):
  81. def __init__(self):
  82. self.linewidth = 0
  83. self.linecap = None
  84. self.linejoin = None
  85. self.miterlimit = None
  86. self.dash = None
  87. self.intent = None
  88. self.flatness = None
  89. return
  90. def copy(self):
  91. obj = PDFGraphicState()
  92. obj.linewidth = self.linewidth
  93. obj.linecap = self.linecap
  94. obj.linejoin = self.linejoin
  95. obj.miterlimit = self.miterlimit
  96. obj.dash = self.dash
  97. obj.intent = self.intent
  98. obj.flatness = self.flatness
  99. return obj
  100. def __repr__(self):
  101. return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
  102. ' miterlimit=%r, dash=%r, intent=%r, flatness=%r>' %
  103. (self.linewidth, self.linecap, self.linejoin,
  104. self.miterlimit, self.dash, self.intent, self.flatness))
  105. ## Resource Manager
  106. ##
  107. class PDFResourceManager(object):
  108. """Repository of shared resources.
  109. ResourceManager facilitates reuse of shared resources
  110. such as fonts and images so that large objects are not
  111. allocated multiple times.
  112. """
  113. debug = 0
  114. def __init__(self, caching=True):
  115. self.caching = caching
  116. self._cached_fonts = {}
  117. return
  118. def get_procset(self, procs):
  119. for proc in procs:
  120. if proc is LITERAL_PDF:
  121. pass
  122. elif proc is LITERAL_TEXT:
  123. pass
  124. else:
  125. #raise PDFResourceError('ProcSet %r is not supported.' % proc)
  126. pass
  127. return
  128. def get_cmap(self, cmapname, strict=False):
  129. try:
  130. return CMapDB.get_cmap(cmapname)
  131. except CMapDB.CMapNotFound:
  132. if strict:
  133. raise
  134. return CMap()
  135. def get_font(self, objid, spec):
  136. if objid and objid in self._cached_fonts:
  137. font = self._cached_fonts[objid]
  138. else:
  139. if 2 <= self.debug:
  140. print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec)
  141. if STRICT:
  142. if spec['Type'] is not LITERAL_FONT:
  143. raise PDFFontError('Type is not /Font')
  144. # Create a Font object.
  145. if 'Subtype' in spec:
  146. subtype = literal_name(spec['Subtype'])
  147. else:
  148. if STRICT:
  149. raise PDFFontError('Font Subtype is not specified.')
  150. subtype = 'Type1'
  151. if subtype in ('Type1', 'MMType1'):
  152. # Type1 Font
  153. font = PDFType1Font(self, spec)
  154. elif subtype == 'TrueType':
  155. # TrueType Font
  156. font = PDFTrueTypeFont(self, spec)
  157. elif subtype == 'Type3':
  158. # Type3 Font
  159. font = PDFType3Font(self, spec)
  160. elif subtype in ('CIDFontType0', 'CIDFontType2'):
  161. # CID Font
  162. font = PDFCIDFont(self, spec)
  163. elif subtype == 'Type0':
  164. # Type0 Font
  165. dfonts = list_value(spec['DescendantFonts'])
  166. assert dfonts
  167. subspec = dict_value(dfonts[0]).copy()
  168. for k in ('Encoding', 'ToUnicode'):
  169. if k in spec:
  170. subspec[k] = resolve1(spec[k])
  171. font = self.get_font(None, subspec)
  172. else:
  173. if STRICT:
  174. raise PDFFontError('Invalid Font spec: %r' % spec)
  175. font = PDFType1Font(self, spec) # this is so wrong!
  176. if objid and self.caching:
  177. self._cached_fonts[objid] = font
  178. return font
  179. ## PDFContentParser
  180. ##
  181. class PDFContentParser(PSStackParser):
  182. def __init__(self, streams):
  183. self.streams = streams
  184. self.istream = 0
  185. PSStackParser.__init__(self, None)
  186. return
  187. def fillfp(self):
  188. if not self.fp:
  189. if self.istream < len(self.streams):
  190. strm = stream_value(self.streams[self.istream])
  191. self.istream += 1
  192. else:
  193. raise PSEOF('Unexpected EOF, file truncated?')
  194. self.fp = StringIO(strm.get_data())
  195. return
  196. def seek(self, pos):
  197. self.fillfp()
  198. PSStackParser.seek(self, pos)
  199. return
  200. def fillbuf(self):
  201. if self.charpos < len(self.buf):
  202. return
  203. while 1:
  204. self.fillfp()
  205. self.bufpos = self.fp.tell()
  206. self.buf = self.fp.read(self.BUFSIZ)
  207. if self.buf:
  208. break
  209. self.fp = None
  210. self.charpos = 0
  211. return
  212. def get_inline_data(self, pos, target='EI'):
  213. self.seek(pos)
  214. i = 0
  215. data = ''
  216. while i <= len(target):
  217. self.fillbuf()
  218. if i:
  219. c = self.buf[self.charpos]
  220. data += c
  221. self.charpos += 1
  222. if len(target) <= i and c.isspace():
  223. i += 1
  224. elif i < len(target) and c == target[i]:
  225. i += 1
  226. else:
  227. i = 0
  228. else:
  229. try:
  230. j = self.buf.index(target[0], self.charpos)
  231. #print 'found', (0, self.buf[j:j+10])
  232. data += self.buf[self.charpos:j+1]
  233. self.charpos = j+1
  234. i = 1
  235. except ValueError:
  236. data += self.buf[self.charpos:]
  237. self.charpos = len(self.buf)
  238. data = data[:-(len(target)+1)] # strip the last part
  239. data = re.sub(r'(\x0d\x0a|[\x0d\x0a])$', '', data)
  240. return (pos, data)
  241. def flush(self):
  242. self.add_results(*self.popall())
  243. return
  244. KEYWORD_BI = KWD('BI')
  245. KEYWORD_ID = KWD('ID')
  246. KEYWORD_EI = KWD('EI')
  247. def do_keyword(self, pos, token):
  248. if token is self.KEYWORD_BI:
  249. # inline image within a content stream
  250. self.start_type(pos, 'inline')
  251. elif token is self.KEYWORD_ID:
  252. try:
  253. (_, objs) = self.end_type('inline')
  254. if len(objs) % 2 != 0:
  255. raise PSTypeError('Invalid dictionary construct: %r' % objs)
  256. d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
  257. (pos, data) = self.get_inline_data(pos+len('ID '))
  258. obj = PDFStream(d, data)
  259. self.push((pos, obj))
  260. self.push((pos, self.KEYWORD_EI))
  261. except PSTypeError:
  262. if STRICT:
  263. raise
  264. else:
  265. self.push((pos, token))
  266. return
  267. ## Interpreter
  268. ##
  269. class PDFPageInterpreter(object):
  270. debug = 0
  271. def __init__(self, rsrcmgr, device):
  272. self.rsrcmgr = rsrcmgr
  273. self.device = device
  274. return
  275. def dup(self):
  276. return self.__class__(self.rsrcmgr, self.device)
  277. # init_resources(resources):
  278. # Prepare the fonts and XObjects listed in the Resource attribute.
  279. def init_resources(self, resources):
  280. self.resources = resources
  281. self.fontmap = {}
  282. self.xobjmap = {}
  283. self.csmap = PREDEFINED_COLORSPACE.copy()
  284. if not resources:
  285. return
  286. def get_colorspace(spec):
  287. if isinstance(spec, list):
  288. name = literal_name(spec[0])
  289. else:
  290. name = literal_name(spec)
  291. if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
  292. return PDFColorSpace(name, stream_value(spec[1])['N'])
  293. elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
  294. return PDFColorSpace(name, len(list_value(spec[1])))
  295. else:
  296. return PREDEFINED_COLORSPACE.get(name)
  297. for (k, v) in dict_value(resources).iteritems():
  298. if 2 <= self.debug:
  299. print >>sys.stderr, 'Resource: %r: %r' % (k, v)
  300. if k == 'Font':
  301. for (fontid, spec) in dict_value(v).iteritems():
  302. objid = None
  303. if isinstance(spec, PDFObjRef):
  304. objid = spec.objid
  305. spec = dict_value(spec)
  306. self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
  307. elif k == 'ColorSpace':
  308. for (csid, spec) in dict_value(v).iteritems():
  309. self.csmap[csid] = get_colorspace(resolve1(spec))
  310. elif k == 'ProcSet':
  311. self.rsrcmgr.get_procset(list_value(v))
  312. elif k == 'XObject':
  313. for (xobjid, xobjstrm) in dict_value(v).iteritems():
  314. self.xobjmap[xobjid] = xobjstrm
  315. return
  316. # init_state(ctm)
  317. # Initialize the text and graphic states for rendering a page.
  318. def init_state(self, ctm):
  319. # gstack: stack for graphical states.
  320. self.gstack = []
  321. self.ctm = ctm
  322. self.device.set_ctm(self.ctm)
  323. self.textstate = PDFTextState()
  324. self.graphicstate = PDFGraphicState()
  325. self.curpath = []
  326. # argstack: stack for command arguments.
  327. self.argstack = []
  328. # set some global states.
  329. self.scs = self.ncs = None
  330. if self.csmap:
  331. self.scs = self.ncs = self.csmap.values()[0]
  332. return
  333. def push(self, obj):
  334. self.argstack.append(obj)
  335. return
  336. def pop(self, n):
  337. if n == 0:
  338. return []
  339. x = self.argstack[-n:]
  340. self.argstack = self.argstack[:-n]
  341. return x
  342. def get_current_state(self):
  343. return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
  344. def set_current_state(self, state):
  345. (self.ctm, self.textstate, self.graphicstate) = state
  346. self.device.set_ctm(self.ctm)
  347. return
  348. # gsave
  349. def do_q(self):
  350. self.gstack.append(self.get_current_state())
  351. return
  352. # grestore
  353. def do_Q(self):
  354. if self.gstack:
  355. self.set_current_state(self.gstack.pop())
  356. return
  357. # concat-matrix
  358. def do_cm(self, a1, b1, c1, d1, e1, f1):
  359. self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm)
  360. self.device.set_ctm(self.ctm)
  361. return
  362. # setlinewidth
  363. def do_w(self, linewidth):
  364. self.graphicstate.linewidth = linewidth
  365. return
  366. # setlinecap
  367. def do_J(self, linecap):
  368. self.graphicstate.linecap = linecap
  369. return
  370. # setlinejoin
  371. def do_j(self, linejoin):
  372. self.graphicstate.linejoin = linejoin
  373. return
  374. # setmiterlimit
  375. def do_M(self, miterlimit):
  376. self.graphicstate.miterlimit = miterlimit
  377. return
  378. # setdash
  379. def do_d(self, dash, phase):
  380. self.graphicstate.dash = (dash, phase)
  381. return
  382. # setintent
  383. def do_ri(self, intent):
  384. self.graphicstate.intent = intent
  385. return
  386. # setflatness
  387. def do_i(self, flatness):
  388. self.graphicstate.flatness = flatness
  389. return
  390. # load-gstate
  391. def do_gs(self, name):
  392. #XXX
  393. return
  394. # moveto
  395. def do_m(self, x, y):
  396. self.curpath.append(('m', x, y))
  397. return
  398. # lineto
  399. def do_l(self, x, y):
  400. self.curpath.append(('l', x, y))
  401. return
  402. # curveto
  403. def do_c(self, x1, y1, x2, y2, x3, y3):
  404. self.curpath.append(('c', x1, y1, x2, y2, x3, y3))
  405. return
  406. # urveto
  407. def do_v(self, x2, y2, x3, y3):
  408. self.curpath.append(('v', x2, y2, x3, y3))
  409. return
  410. # rveto
  411. def do_y(self, x1, y1, x3, y3):
  412. self.curpath.append(('y', x1, y1, x3, y3))
  413. return
  414. # closepath
  415. def do_h(self):
  416. self.curpath.append(('h',))
  417. return
  418. # rectangle
  419. def do_re(self, x, y, w, h):
  420. self.curpath.append(('m', x, y))
  421. self.curpath.append(('l', x+w, y))
  422. self.curpath.append(('l', x+w, y+h))
  423. self.curpath.append(('l', x, y+h))
  424. self.curpath.append(('h',))
  425. return
  426. # stroke
  427. def do_S(self):
  428. self.device.paint_path(self.graphicstate, True, False, False, self.curpath)
  429. self.curpath = []
  430. return
  431. # close-and-stroke
  432. def do_s(self):
  433. self.do_h()
  434. self.do_S()
  435. return
  436. # fill
  437. def do_f(self):
  438. self.device.paint_path(self.graphicstate, False, True, False, self.curpath)
  439. self.curpath = []
  440. return
  441. # fill (obsolete)
  442. do_F = do_f
  443. # fill-even-odd
  444. def do_f_a(self):
  445. self.device.paint_path(self.graphicstate, False, True, True, self.curpath)
  446. self.curpath = []
  447. return
  448. # fill-and-stroke
  449. def do_B(self):
  450. self.device.paint_path(self.graphicstate, True, True, False, self.curpath)
  451. self.curpath = []
  452. return
  453. # fill-and-stroke-even-odd
  454. def do_B_a(self):
  455. self.device.paint_path(self.graphicstate, True, True, True, self.curpath)
  456. self.curpath = []
  457. return
  458. # close-fill-and-stroke
  459. def do_b(self):
  460. self.do_h()
  461. self.do_B()
  462. return
  463. # close-fill-and-stroke-even-odd
  464. def do_b_a(self):
  465. self.do_h()
  466. self.do_B_a()
  467. return
  468. # close-only
  469. def do_n(self):
  470. self.curpath = []
  471. return
  472. # clip
  473. def do_W(self):
  474. return
  475. # clip-even-odd
  476. def do_W_a(self):
  477. return
  478. # setcolorspace-stroking
  479. def do_CS(self, name):
  480. try:
  481. self.scs = self.csmap[literal_name(name)]
  482. except KeyError:
  483. if STRICT:
  484. raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
  485. return
  486. # setcolorspace-non-strokine
  487. def do_cs(self, name):
  488. try:
  489. self.ncs = self.csmap[literal_name(name)]
  490. except KeyError:
  491. if STRICT:
  492. raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
  493. return
  494. # setgray-stroking
  495. def do_G(self, gray):
  496. #self.do_CS(LITERAL_DEVICE_GRAY)
  497. return
  498. # setgray-non-stroking
  499. def do_g(self, gray):
  500. #self.do_cs(LITERAL_DEVICE_GRAY)
  501. return
  502. # setrgb-stroking
  503. def do_RG(self, r, g, b):
  504. #self.do_CS(LITERAL_DEVICE_RGB)
  505. return
  506. # setrgb-non-stroking
  507. def do_rg(self, r, g, b):
  508. #self.do_cs(LITERAL_DEVICE_RGB)
  509. return
  510. # setcmyk-stroking
  511. def do_K(self, c, m, y, k):
  512. #self.do_CS(LITERAL_DEVICE_CMYK)
  513. return
  514. # setcmyk-non-stroking
  515. def do_k(self, c, m, y, k):
  516. #self.do_cs(LITERAL_DEVICE_CMYK)
  517. return
  518. # setcolor
  519. def do_SCN(self):
  520. if self.scs:
  521. n = self.scs.ncomponents
  522. else:
  523. if STRICT:
  524. raise PDFInterpreterError('No colorspace specified!')
  525. n = 1
  526. self.pop(n)
  527. return
  528. def do_scn(self):
  529. if self.ncs:
  530. n = self.ncs.ncomponents
  531. else:
  532. if STRICT:
  533. raise PDFInterpreterError('No colorspace specified!')
  534. n = 1
  535. self.pop(n)
  536. return
  537. def do_SC(self):
  538. self.do_SCN()
  539. return
  540. def do_sc(self):
  541. self.do_scn()
  542. return
  543. # sharing-name
  544. def do_sh(self, name):
  545. return
  546. # begin-text
  547. def do_BT(self):
  548. self.textstate.reset()
  549. return
  550. # end-text
  551. def do_ET(self):
  552. return
  553. # begin-compat
  554. def do_BX(self):
  555. return
  556. # end-compat
  557. def do_EX(self):
  558. return
  559. # marked content operators
  560. def do_MP(self, tag):
  561. self.device.do_tag(tag)
  562. return
  563. def do_DP(self, tag, props):
  564. self.device.do_tag(tag, props)
  565. return
  566. def do_BMC(self, tag):
  567. self.device.begin_tag(tag)
  568. return
  569. def do_BDC(self, tag, props):
  570. self.device.begin_tag(tag, props)
  571. return
  572. def do_EMC(self):
  573. self.device.end_tag()
  574. return
  575. # setcharspace
  576. def do_Tc(self, space):
  577. self.textstate.charspace = space
  578. return
  579. # setwordspace
  580. def do_Tw(self, space):
  581. self.textstate.wordspace = space
  582. return
  583. # textscale
  584. def do_Tz(self, scale):
  585. self.textstate.scaling = scale
  586. return
  587. # setleading
  588. def do_TL(self, leading):
  589. self.textstate.leading = -leading
  590. return
  591. # selectfont
  592. def do_Tf(self, fontid, fontsize):
  593. try:
  594. self.textstate.font = self.fontmap[literal_name(fontid)]
  595. except KeyError:
  596. if STRICT:
  597. raise PDFInterpreterError('Undefined Font id: %r' % fontid)
  598. self.textstate.font = self.rsrcmgr.get_font(None, {})
  599. self.textstate.fontsize = fontsize
  600. return
  601. # setrendering
  602. def do_Tr(self, render):
  603. self.textstate.render = render
  604. return
  605. # settextrise
  606. def do_Ts(self, rise):
  607. self.textstate.rise = rise
  608. return
  609. # text-move
  610. def do_Td(self, tx, ty):
  611. (a, b, c, d, e, f) = self.textstate.matrix
  612. self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
  613. self.textstate.linematrix = (0, 0)
  614. #print >>sys.stderr, 'Td(%r,%r): %r' % (tx, ty, self.textstate)
  615. return
  616. # text-move
  617. def do_TD(self, tx, ty):
  618. (a, b, c, d, e, f) = self.textstate.matrix
  619. self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
  620. self.textstate.leading = ty
  621. self.textstate.linematrix = (0, 0)
  622. #print >>sys.stderr, 'TD(%r,%r): %r' % (tx, ty, self.textstate)
  623. return
  624. # textmatrix
  625. def do_Tm(self, a, b, c, d, e, f):
  626. self.textstate.matrix = (a, b, c, d, e, f)
  627. self.textstate.linematrix = (0, 0)
  628. return
  629. # nextline
  630. def do_T_a(self):
  631. (a, b, c, d, e, f) = self.textstate.matrix
  632. self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e, self.textstate.leading*d+f)
  633. self.textstate.linematrix = (0, 0)
  634. return
  635. # show-pos
  636. def do_TJ(self, seq):
  637. #print >>sys.stderr, 'TJ(%r): %r' % (seq, self.textstate)
  638. if self.textstate.font is None:
  639. if STRICT:
  640. raise PDFInterpreterError('No font specified!')
  641. return
  642. self.device.render_string(self.textstate, seq)
  643. return
  644. # show
  645. def do_Tj(self, s):
  646. self.do_TJ([s])
  647. return
  648. # quote
  649. def do__q(self, s):
  650. self.do_T_a()
  651. self.do_TJ([s])
  652. return
  653. # doublequote
  654. def do__w(self, aw, ac, s):
  655. self.do_Tw(aw)
  656. self.do_Tc(ac)
  657. self.do_TJ([s])
  658. return
  659. # inline image
  660. def do_BI(self): # never called
  661. return
  662. def do_ID(self): # never called
  663. return
  664. def do_EI(self, obj):
  665. if 'W' in obj and 'H' in obj:
  666. iobjid = str(id(obj))
  667. self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
  668. self.device.render_image(iobjid, obj)
  669. self.device.end_figure(iobjid)
  670. return
  671. # invoke an XObject
  672. def do_Do(self, xobjid):
  673. xobjid = literal_name(xobjid)
  674. try:
  675. xobj = stream_value(self.xobjmap[xobjid])
  676. except KeyError:
  677. if STRICT:
  678. raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
  679. return
  680. if 1 <= self.debug:
  681. print >>sys.stderr, 'Processing xobj: %r' % xobj
  682. subtype = xobj.get('Subtype')
  683. if subtype is LITERAL_FORM and 'BBox' in xobj:
  684. interpreter = self.dup()
  685. bbox = list_value(xobj['BBox'])
  686. matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
  687. # According to PDF reference 1.7 section 4.9.1, XObjects in
  688. # earlier PDFs (prior to v1.2) use the page's Resources entry
  689. # instead of having their own Resources entry.
  690. resources = dict_value(xobj.get('Resources')) or self.resources.copy()
  691. self.device.begin_figure(xobjid, bbox, matrix)
  692. interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
  693. self.device.end_figure(xobjid)
  694. elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
  695. self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
  696. self.device.render_image(xobjid, xobj)
  697. self.device.end_figure(xobjid)
  698. else:
  699. # unsupported xobject type.
  700. pass
  701. return
  702. def process_page(self, page):
  703. if 1 <= self.debug:
  704. print >>sys.stderr, 'Processing page: %r' % page
  705. (x0, y0, x1, y1) = page.mediabox
  706. if page.rotate == 90:
  707. ctm = (0, -1, 1, 0, -y0, x1)
  708. elif page.rotate == 180:
  709. ctm = (-1, 0, 0, -1, x1, y1)
  710. elif page.rotate == 270:
  711. ctm = (0, 1, -1, 0, y1, -x0)
  712. else:
  713. ctm = (1, 0, 0, 1, -x0, -y0)
  714. self.device.begin_page(page, ctm)
  715. self.render_contents(page.resources, page.contents, ctm=ctm)
  716. self.device.end_page(page)
  717. return
  718. # render_contents(resources, streams, ctm)
  719. # Render the content streams.
  720. # This method may be called recursively.
  721. def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
  722. if 1 <= self.debug:
  723. print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' %
  724. (resources, streams, ctm))
  725. self.init_resources(resources)
  726. self.init_state(ctm)
  727. self.execute(list_value(streams))
  728. return
  729. def execute(self, streams):
  730. try:
  731. parser = PDFContentParser(streams)
  732. except PSEOF:
  733. # empty page
  734. return
  735. while 1:
  736. try:
  737. (_, obj) = parser.nextobject()
  738. except PSEOF:
  739. break
  740. if isinstance(obj, PSKeyword):
  741. name = keyword_name(obj)
  742. method = 'do_%s' % name.replace('*', '_a').replace('"', '_w').replace("'", '_q')
  743. if hasattr(self, method):
  744. func = getattr(self, method)
  745. nargs = func.func_code.co_argcount-1
  746. if nargs:
  747. args = self.pop(nargs)
  748. if 2 <= self.debug:
  749. print >>sys.stderr, 'exec: %s %r' % (name, args)
  750. if len(args) == nargs:
  751. func(*args)
  752. else:
  753. if 2 <= self.debug:
  754. print >>sys.stderr, 'exec: %s' % (name)
  755. func()
  756. else:
  757. if STRICT:
  758. raise PDFInterpreterError('Unknown operator: %r' % name)
  759. else:
  760. self.push(obj)
  761. return