catalog.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822
  1. # -*- coding: utf-8 -*-
  2. """
  3. babel.messages.catalog
  4. ~~~~~~~~~~~~~~~~~~~~~~
  5. Data structures for message catalogs.
  6. :copyright: (c) 2013 by the Babel Team.
  7. :license: BSD, see LICENSE for more details.
  8. """
  9. import re
  10. import time
  11. from cgi import parse_header
  12. from datetime import datetime, time as time_
  13. from difflib import get_close_matches
  14. from email import message_from_string
  15. from copy import copy
  16. from babel import __version__ as VERSION
  17. from babel.core import Locale
  18. from babel.dates import format_datetime
  19. from babel.messages.plurals import get_plural
  20. from babel.util import odict, distinct, LOCALTZ, FixedOffsetTimezone
  21. from babel._compat import string_types, number_types, PY2, cmp
  22. __all__ = ['Message', 'Catalog', 'TranslationError']
  23. PYTHON_FORMAT = re.compile(r'''
  24. \%
  25. (?:\(([\w]*)\))?
  26. (
  27. [-#0\ +]?(?:\*|[\d]+)?
  28. (?:\.(?:\*|[\d]+))?
  29. [hlL]?
  30. )
  31. ([diouxXeEfFgGcrs%])
  32. ''', re.VERBOSE)
  33. def _parse_datetime_header(value):
  34. match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
  35. tt = time.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
  36. ts = time.mktime(tt)
  37. dt = datetime.fromtimestamp(ts)
  38. # Separate the offset into a sign component, hours, and # minutes
  39. tzoffset = match.group('tzoffset')
  40. if tzoffset is not None:
  41. plus_minus_s, rest = tzoffset[0], tzoffset[1:]
  42. hours_offset_s, mins_offset_s = rest[:2], rest[2:]
  43. # Make them all integers
  44. plus_minus = int(plus_minus_s + '1')
  45. hours_offset = int(hours_offset_s)
  46. mins_offset = int(mins_offset_s)
  47. # Calculate net offset
  48. net_mins_offset = hours_offset * 60
  49. net_mins_offset += mins_offset
  50. net_mins_offset *= plus_minus
  51. # Create an offset object
  52. tzoffset = FixedOffsetTimezone(net_mins_offset)
  53. # Store the offset in a datetime object
  54. dt = dt.replace(tzinfo=tzoffset)
  55. return dt
  56. class Message(object):
  57. """Representation of a single message in a catalog."""
  58. def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
  59. user_comments=(), previous_id=(), lineno=None, context=None):
  60. """Create the message object.
  61. :param id: the message ID, or a ``(singular, plural)`` tuple for
  62. pluralizable messages
  63. :param string: the translated message string, or a
  64. ``(singular, plural)`` tuple for pluralizable messages
  65. :param locations: a sequence of ``(filename, lineno)`` tuples
  66. :param flags: a set or sequence of flags
  67. :param auto_comments: a sequence of automatic comments for the message
  68. :param user_comments: a sequence of user comments for the message
  69. :param previous_id: the previous message ID, or a ``(singular, plural)``
  70. tuple for pluralizable messages
  71. :param lineno: the line number on which the msgid line was found in the
  72. PO file, if any
  73. :param context: the message context
  74. """
  75. self.id = id
  76. if not string and self.pluralizable:
  77. string = (u'', u'')
  78. self.string = string
  79. self.locations = list(distinct(locations))
  80. self.flags = set(flags)
  81. if id and self.python_format:
  82. self.flags.add('python-format')
  83. else:
  84. self.flags.discard('python-format')
  85. self.auto_comments = list(distinct(auto_comments))
  86. self.user_comments = list(distinct(user_comments))
  87. if isinstance(previous_id, string_types):
  88. self.previous_id = [previous_id]
  89. else:
  90. self.previous_id = list(previous_id)
  91. self.lineno = lineno
  92. self.context = context
  93. def __repr__(self):
  94. return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
  95. list(self.flags))
  96. def __cmp__(self, obj):
  97. """Compare Messages, taking into account plural ids"""
  98. def values_to_compare():
  99. if isinstance(obj, Message):
  100. plural = self.pluralizable
  101. obj_plural = obj.pluralizable
  102. if plural and obj_plural:
  103. return self.id[0], obj.id[0]
  104. elif plural:
  105. return self.id[0], obj.id
  106. elif obj_plural:
  107. return self.id, obj.id[0]
  108. return self.id, obj.id
  109. this, other = values_to_compare()
  110. return cmp(this, other)
  111. def __gt__(self, other):
  112. return self.__cmp__(other) > 0
  113. def __lt__(self, other):
  114. return self.__cmp__(other) < 0
  115. def __ge__(self, other):
  116. return self.__cmp__(other) >= 0
  117. def __le__(self, other):
  118. return self.__cmp__(other) <= 0
  119. def __eq__(self, other):
  120. return self.__cmp__(other) == 0
  121. def __ne__(self, other):
  122. return self.__cmp__(other) != 0
  123. def clone(self):
  124. return Message(*map(copy, (self.id, self.string, self.locations,
  125. self.flags, self.auto_comments,
  126. self.user_comments, self.previous_id,
  127. self.lineno, self.context)))
  128. def check(self, catalog=None):
  129. """Run various validation checks on the message. Some validations
  130. are only performed if the catalog is provided. This method returns
  131. a sequence of `TranslationError` objects.
  132. :rtype: ``iterator``
  133. :param catalog: A catalog instance that is passed to the checkers
  134. :see: `Catalog.check` for a way to perform checks for all messages
  135. in a catalog.
  136. """
  137. from babel.messages.checkers import checkers
  138. errors = []
  139. for checker in checkers:
  140. try:
  141. checker(catalog, self)
  142. except TranslationError as e:
  143. errors.append(e)
  144. return errors
  145. @property
  146. def fuzzy(self):
  147. """Whether the translation is fuzzy.
  148. >>> Message('foo').fuzzy
  149. False
  150. >>> msg = Message('foo', 'foo', flags=['fuzzy'])
  151. >>> msg.fuzzy
  152. True
  153. >>> msg
  154. <Message 'foo' (flags: ['fuzzy'])>
  155. :type: `bool`"""
  156. return 'fuzzy' in self.flags
  157. @property
  158. def pluralizable(self):
  159. """Whether the message is plurizable.
  160. >>> Message('foo').pluralizable
  161. False
  162. >>> Message(('foo', 'bar')).pluralizable
  163. True
  164. :type: `bool`"""
  165. return isinstance(self.id, (list, tuple))
  166. @property
  167. def python_format(self):
  168. """Whether the message contains Python-style parameters.
  169. >>> Message('foo %(name)s bar').python_format
  170. True
  171. >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
  172. True
  173. :type: `bool`"""
  174. ids = self.id
  175. if not isinstance(ids, (list, tuple)):
  176. ids = [ids]
  177. return any(PYTHON_FORMAT.search(id) for id in ids)
  178. class TranslationError(Exception):
  179. """Exception thrown by translation checkers when invalid message
  180. translations are encountered."""
  181. DEFAULT_HEADER = u"""\
  182. # Translations template for PROJECT.
  183. # Copyright (C) YEAR ORGANIZATION
  184. # This file is distributed under the same license as the PROJECT project.
  185. # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
  186. #"""
  187. if PY2:
  188. def _parse_header(header_string):
  189. # message_from_string only works for str, not for unicode
  190. headers = message_from_string(header_string.encode('utf8'))
  191. decoded_headers = {}
  192. for name, value in headers.items():
  193. name = name.decode('utf8')
  194. value = value.decode('utf8')
  195. decoded_headers[name] = value
  196. return decoded_headers
  197. else:
  198. _parse_header = message_from_string
  199. class Catalog(object):
  200. """Representation of a message catalog."""
  201. def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
  202. project=None, version=None, copyright_holder=None,
  203. msgid_bugs_address=None, creation_date=None,
  204. revision_date=None, last_translator=None, language_team=None,
  205. charset=None, fuzzy=True):
  206. """Initialize the catalog object.
  207. :param locale: the locale identifier or `Locale` object, or `None`
  208. if the catalog is not bound to a locale (which basically
  209. means it's a template)
  210. :param domain: the message domain
  211. :param header_comment: the header comment as string, or `None` for the
  212. default header
  213. :param project: the project's name
  214. :param version: the project's version
  215. :param copyright_holder: the copyright holder of the catalog
  216. :param msgid_bugs_address: the email address or URL to submit bug
  217. reports to
  218. :param creation_date: the date the catalog was created
  219. :param revision_date: the date the catalog was revised
  220. :param last_translator: the name and email of the last translator
  221. :param language_team: the name and email of the language team
  222. :param charset: the encoding to use in the output (defaults to utf-8)
  223. :param fuzzy: the fuzzy bit on the catalog header
  224. """
  225. self.domain = domain
  226. if locale:
  227. locale = Locale.parse(locale)
  228. self.locale = locale
  229. self._header_comment = header_comment
  230. self._messages = odict()
  231. self.project = project or 'PROJECT'
  232. self.version = version or 'VERSION'
  233. self.copyright_holder = copyright_holder or 'ORGANIZATION'
  234. self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
  235. self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
  236. """Name and email address of the last translator."""
  237. self.language_team = language_team or 'LANGUAGE <LL@li.org>'
  238. """Name and email address of the language team."""
  239. self.charset = charset or 'utf-8'
  240. if creation_date is None:
  241. creation_date = datetime.now(LOCALTZ)
  242. elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
  243. creation_date = creation_date.replace(tzinfo=LOCALTZ)
  244. self.creation_date = creation_date
  245. if revision_date is None:
  246. revision_date = 'YEAR-MO-DA HO:MI+ZONE'
  247. elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
  248. revision_date = revision_date.replace(tzinfo=LOCALTZ)
  249. self.revision_date = revision_date
  250. self.fuzzy = fuzzy
  251. self.obsolete = odict() # Dictionary of obsolete messages
  252. self._num_plurals = None
  253. self._plural_expr = None
  254. def _get_header_comment(self):
  255. comment = self._header_comment
  256. year = datetime.now(LOCALTZ).strftime('%Y')
  257. if hasattr(self.revision_date, 'strftime'):
  258. year = self.revision_date.strftime('%Y')
  259. comment = comment.replace('PROJECT', self.project) \
  260. .replace('VERSION', self.version) \
  261. .replace('YEAR', year) \
  262. .replace('ORGANIZATION', self.copyright_holder)
  263. if self.locale:
  264. comment = comment.replace('Translations template', '%s translations'
  265. % self.locale.english_name)
  266. return comment
  267. def _set_header_comment(self, string):
  268. self._header_comment = string
  269. header_comment = property(_get_header_comment, _set_header_comment, doc="""\
  270. The header comment for the catalog.
  271. >>> catalog = Catalog(project='Foobar', version='1.0',
  272. ... copyright_holder='Foo Company')
  273. >>> print(catalog.header_comment) #doctest: +ELLIPSIS
  274. # Translations template for Foobar.
  275. # Copyright (C) ... Foo Company
  276. # This file is distributed under the same license as the Foobar project.
  277. # FIRST AUTHOR <EMAIL@ADDRESS>, ....
  278. #
  279. The header can also be set from a string. Any known upper-case variables
  280. will be replaced when the header is retrieved again:
  281. >>> catalog = Catalog(project='Foobar', version='1.0',
  282. ... copyright_holder='Foo Company')
  283. >>> catalog.header_comment = '''\\
  284. ... # The POT for my really cool PROJECT project.
  285. ... # Copyright (C) 1990-2003 ORGANIZATION
  286. ... # This file is distributed under the same license as the PROJECT
  287. ... # project.
  288. ... #'''
  289. >>> print(catalog.header_comment)
  290. # The POT for my really cool Foobar project.
  291. # Copyright (C) 1990-2003 Foo Company
  292. # This file is distributed under the same license as the Foobar
  293. # project.
  294. #
  295. :type: `unicode`
  296. """)
  297. def _get_mime_headers(self):
  298. headers = []
  299. headers.append(('Project-Id-Version',
  300. '%s %s' % (self.project, self.version)))
  301. headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
  302. headers.append(('POT-Creation-Date',
  303. format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
  304. locale='en')))
  305. if isinstance(self.revision_date, (datetime, time_) + number_types):
  306. headers.append(('PO-Revision-Date',
  307. format_datetime(self.revision_date,
  308. 'yyyy-MM-dd HH:mmZ', locale='en')))
  309. else:
  310. headers.append(('PO-Revision-Date', self.revision_date))
  311. headers.append(('Last-Translator', self.last_translator))
  312. if self.locale is not None:
  313. headers.append(('Language', str(self.locale)))
  314. if (self.locale is not None) and ('LANGUAGE' in self.language_team):
  315. headers.append(('Language-Team',
  316. self.language_team.replace('LANGUAGE',
  317. str(self.locale))))
  318. else:
  319. headers.append(('Language-Team', self.language_team))
  320. if self.locale is not None:
  321. headers.append(('Plural-Forms', self.plural_forms))
  322. headers.append(('MIME-Version', '1.0'))
  323. headers.append(('Content-Type',
  324. 'text/plain; charset=%s' % self.charset))
  325. headers.append(('Content-Transfer-Encoding', '8bit'))
  326. headers.append(('Generated-By', 'Babel %s\n' % VERSION))
  327. return headers
  328. def _set_mime_headers(self, headers):
  329. for name, value in headers:
  330. name = name.lower()
  331. if name == 'project-id-version':
  332. parts = value.split(' ')
  333. self.project = u' '.join(parts[:-1])
  334. self.version = parts[-1]
  335. elif name == 'report-msgid-bugs-to':
  336. self.msgid_bugs_address = value
  337. elif name == 'last-translator':
  338. self.last_translator = value
  339. elif name == 'language':
  340. self.locale = Locale.parse(value)
  341. elif name == 'language-team':
  342. self.language_team = value
  343. elif name == 'content-type':
  344. mimetype, params = parse_header(value)
  345. if 'charset' in params:
  346. self.charset = params['charset'].lower()
  347. elif name == 'plural-forms':
  348. _, params = parse_header(' ;' + value)
  349. self._num_plurals = int(params.get('nplurals', 2))
  350. self._plural_expr = params.get('plural', '(n != 1)')
  351. elif name == 'pot-creation-date':
  352. self.creation_date = _parse_datetime_header(value)
  353. elif name == 'po-revision-date':
  354. # Keep the value if it's not the default one
  355. if 'YEAR' not in value:
  356. self.revision_date = _parse_datetime_header(value)
  357. mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
  358. The MIME headers of the catalog, used for the special ``msgid ""`` entry.
  359. The behavior of this property changes slightly depending on whether a locale
  360. is set or not, the latter indicating that the catalog is actually a template
  361. for actual translations.
  362. Here's an example of the output for such a catalog template:
  363. >>> from babel.dates import UTC
  364. >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
  365. >>> catalog = Catalog(project='Foobar', version='1.0',
  366. ... creation_date=created)
  367. >>> for name, value in catalog.mime_headers:
  368. ... print('%s: %s' % (name, value))
  369. Project-Id-Version: Foobar 1.0
  370. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  371. POT-Creation-Date: 1990-04-01 15:30+0000
  372. PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
  373. Last-Translator: FULL NAME <EMAIL@ADDRESS>
  374. Language-Team: LANGUAGE <LL@li.org>
  375. MIME-Version: 1.0
  376. Content-Type: text/plain; charset=utf-8
  377. Content-Transfer-Encoding: 8bit
  378. Generated-By: Babel ...
  379. And here's an example of the output when the locale is set:
  380. >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
  381. >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
  382. ... creation_date=created, revision_date=revised,
  383. ... last_translator='John Doe <jd@example.com>',
  384. ... language_team='de_DE <de@example.com>')
  385. >>> for name, value in catalog.mime_headers:
  386. ... print('%s: %s' % (name, value))
  387. Project-Id-Version: Foobar 1.0
  388. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  389. POT-Creation-Date: 1990-04-01 15:30+0000
  390. PO-Revision-Date: 1990-08-03 12:00+0000
  391. Last-Translator: John Doe <jd@example.com>
  392. Language: de_DE
  393. Language-Team: de_DE <de@example.com>
  394. Plural-Forms: nplurals=2; plural=(n != 1)
  395. MIME-Version: 1.0
  396. Content-Type: text/plain; charset=utf-8
  397. Content-Transfer-Encoding: 8bit
  398. Generated-By: Babel ...
  399. :type: `list`
  400. """)
  401. @property
  402. def num_plurals(self):
  403. """The number of plurals used by the catalog or locale.
  404. >>> Catalog(locale='en').num_plurals
  405. 2
  406. >>> Catalog(locale='ga').num_plurals
  407. 5
  408. :type: `int`"""
  409. if self._num_plurals is None:
  410. num = 2
  411. if self.locale:
  412. num = get_plural(self.locale)[0]
  413. self._num_plurals = num
  414. return self._num_plurals
  415. @property
  416. def plural_expr(self):
  417. """The plural expression used by the catalog or locale.
  418. >>> Catalog(locale='en').plural_expr
  419. '(n != 1)'
  420. >>> Catalog(locale='ga').plural_expr
  421. '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
  422. :type: `string_types`"""
  423. if self._plural_expr is None:
  424. expr = '(n != 1)'
  425. if self.locale:
  426. expr = get_plural(self.locale)[1]
  427. self._plural_expr = expr
  428. return self._plural_expr
  429. @property
  430. def plural_forms(self):
  431. """Return the plural forms declaration for the locale.
  432. >>> Catalog(locale='en').plural_forms
  433. 'nplurals=2; plural=(n != 1)'
  434. >>> Catalog(locale='pt_BR').plural_forms
  435. 'nplurals=2; plural=(n > 1)'
  436. :type: `str`"""
  437. return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
  438. def __contains__(self, id):
  439. """Return whether the catalog has a message with the specified ID."""
  440. return self._key_for(id) in self._messages
  441. def __len__(self):
  442. """The number of messages in the catalog.
  443. This does not include the special ``msgid ""`` entry."""
  444. return len(self._messages)
  445. def __iter__(self):
  446. """Iterates through all the entries in the catalog, in the order they
  447. were added, yielding a `Message` object for every entry.
  448. :rtype: ``iterator``"""
  449. buf = []
  450. for name, value in self.mime_headers:
  451. buf.append('%s: %s' % (name, value))
  452. flags = set()
  453. if self.fuzzy:
  454. flags |= set(['fuzzy'])
  455. yield Message(u'', '\n'.join(buf), flags=flags)
  456. for key in self._messages:
  457. yield self._messages[key]
  458. def __repr__(self):
  459. locale = ''
  460. if self.locale:
  461. locale = ' %s' % self.locale
  462. return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
  463. def __delitem__(self, id):
  464. """Delete the message with the specified ID."""
  465. self.delete(id)
  466. def __getitem__(self, id):
  467. """Return the message with the specified ID.
  468. :param id: the message ID
  469. """
  470. return self.get(id)
  471. def __setitem__(self, id, message):
  472. """Add or update the message with the specified ID.
  473. >>> catalog = Catalog()
  474. >>> catalog[u'foo'] = Message(u'foo')
  475. >>> catalog[u'foo']
  476. <Message u'foo' (flags: [])>
  477. If a message with that ID is already in the catalog, it is updated
  478. to include the locations and flags of the new message.
  479. >>> catalog = Catalog()
  480. >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
  481. >>> catalog[u'foo'].locations
  482. [('main.py', 1)]
  483. >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
  484. >>> catalog[u'foo'].locations
  485. [('main.py', 1), ('utils.py', 5)]
  486. :param id: the message ID
  487. :param message: the `Message` object
  488. """
  489. assert isinstance(message, Message), 'expected a Message object'
  490. key = self._key_for(id, message.context)
  491. current = self._messages.get(key)
  492. if current:
  493. if message.pluralizable and not current.pluralizable:
  494. # The new message adds pluralization
  495. current.id = message.id
  496. current.string = message.string
  497. current.locations = list(distinct(current.locations +
  498. message.locations))
  499. current.auto_comments = list(distinct(current.auto_comments +
  500. message.auto_comments))
  501. current.user_comments = list(distinct(current.user_comments +
  502. message.user_comments))
  503. current.flags |= message.flags
  504. message = current
  505. elif id == '':
  506. # special treatment for the header message
  507. self.mime_headers = _parse_header(message.string).items()
  508. self.header_comment = '\n'.join([('# %s' % c).rstrip() for c
  509. in message.user_comments])
  510. self.fuzzy = message.fuzzy
  511. else:
  512. if isinstance(id, (list, tuple)):
  513. assert isinstance(message.string, (list, tuple)), \
  514. 'Expected sequence but got %s' % type(message.string)
  515. self._messages[key] = message
  516. def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
  517. user_comments=(), previous_id=(), lineno=None, context=None):
  518. """Add or update the message with the specified ID.
  519. >>> catalog = Catalog()
  520. >>> catalog.add(u'foo')
  521. <Message ...>
  522. >>> catalog[u'foo']
  523. <Message u'foo' (flags: [])>
  524. This method simply constructs a `Message` object with the given
  525. arguments and invokes `__setitem__` with that object.
  526. :param id: the message ID, or a ``(singular, plural)`` tuple for
  527. pluralizable messages
  528. :param string: the translated message string, or a
  529. ``(singular, plural)`` tuple for pluralizable messages
  530. :param locations: a sequence of ``(filename, lineno)`` tuples
  531. :param flags: a set or sequence of flags
  532. :param auto_comments: a sequence of automatic comments
  533. :param user_comments: a sequence of user comments
  534. :param previous_id: the previous message ID, or a ``(singular, plural)``
  535. tuple for pluralizable messages
  536. :param lineno: the line number on which the msgid line was found in the
  537. PO file, if any
  538. :param context: the message context
  539. """
  540. message = Message(id, string, list(locations), flags, auto_comments,
  541. user_comments, previous_id, lineno=lineno,
  542. context=context)
  543. self[id] = message
  544. return message
  545. def check(self):
  546. """Run various validation checks on the translations in the catalog.
  547. For every message which fails validation, this method yield a
  548. ``(message, errors)`` tuple, where ``message`` is the `Message` object
  549. and ``errors`` is a sequence of `TranslationError` objects.
  550. :rtype: ``iterator``
  551. """
  552. for message in self._messages.values():
  553. errors = message.check(catalog=self)
  554. if errors:
  555. yield message, errors
  556. def get(self, id, context=None):
  557. """Return the message with the specified ID and context.
  558. :param id: the message ID
  559. :param context: the message context, or ``None`` for no context
  560. """
  561. return self._messages.get(self._key_for(id, context))
  562. def delete(self, id, context=None):
  563. """Delete the message with the specified ID and context.
  564. :param id: the message ID
  565. :param context: the message context, or ``None`` for no context
  566. """
  567. key = self._key_for(id, context)
  568. if key in self._messages:
  569. del self._messages[key]
  570. def update(self, template, no_fuzzy_matching=False, update_header_comment=False):
  571. """Update the catalog based on the given template catalog.
  572. >>> from babel.messages import Catalog
  573. >>> template = Catalog()
  574. >>> template.add('green', locations=[('main.py', 99)])
  575. <Message ...>
  576. >>> template.add('blue', locations=[('main.py', 100)])
  577. <Message ...>
  578. >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
  579. <Message ...>
  580. >>> catalog = Catalog(locale='de_DE')
  581. >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
  582. <Message ...>
  583. >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
  584. <Message ...>
  585. >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
  586. ... locations=[('util.py', 38)])
  587. <Message ...>
  588. >>> catalog.update(template)
  589. >>> len(catalog)
  590. 3
  591. >>> msg1 = catalog['green']
  592. >>> msg1.string
  593. >>> msg1.locations
  594. [('main.py', 99)]
  595. >>> msg2 = catalog['blue']
  596. >>> msg2.string
  597. u'blau'
  598. >>> msg2.locations
  599. [('main.py', 100)]
  600. >>> msg3 = catalog['salad']
  601. >>> msg3.string
  602. (u'Salat', u'Salate')
  603. >>> msg3.locations
  604. [('util.py', 42)]
  605. Messages that are in the catalog but not in the template are removed
  606. from the main collection, but can still be accessed via the `obsolete`
  607. member:
  608. >>> 'head' in catalog
  609. False
  610. >>> list(catalog.obsolete.values())
  611. [<Message 'head' (flags: [])>]
  612. :param template: the reference catalog, usually read from a POT file
  613. :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
  614. """
  615. messages = self._messages
  616. remaining = messages.copy()
  617. self._messages = odict()
  618. # Prepare for fuzzy matching
  619. fuzzy_candidates = []
  620. if not no_fuzzy_matching:
  621. fuzzy_candidates = dict([
  622. (self._key_for(msgid), messages[msgid].context)
  623. for msgid in messages if msgid and messages[msgid].string
  624. ])
  625. fuzzy_matches = set()
  626. def _merge(message, oldkey, newkey):
  627. message = message.clone()
  628. fuzzy = False
  629. if oldkey != newkey:
  630. fuzzy = True
  631. fuzzy_matches.add(oldkey)
  632. oldmsg = messages.get(oldkey)
  633. if isinstance(oldmsg.id, string_types):
  634. message.previous_id = [oldmsg.id]
  635. else:
  636. message.previous_id = list(oldmsg.id)
  637. else:
  638. oldmsg = remaining.pop(oldkey, None)
  639. message.string = oldmsg.string
  640. if isinstance(message.id, (list, tuple)):
  641. if not isinstance(message.string, (list, tuple)):
  642. fuzzy = True
  643. message.string = tuple(
  644. [message.string] + ([u''] * (len(message.id) - 1))
  645. )
  646. elif len(message.string) != self.num_plurals:
  647. fuzzy = True
  648. message.string = tuple(message.string[:len(oldmsg.string)])
  649. elif isinstance(message.string, (list, tuple)):
  650. fuzzy = True
  651. message.string = message.string[0]
  652. message.flags |= oldmsg.flags
  653. if fuzzy:
  654. message.flags |= set([u'fuzzy'])
  655. self[message.id] = message
  656. for message in template:
  657. if message.id:
  658. key = self._key_for(message.id, message.context)
  659. if key in messages:
  660. _merge(message, key, key)
  661. else:
  662. if no_fuzzy_matching is False:
  663. # do some fuzzy matching with difflib
  664. if isinstance(key, tuple):
  665. matchkey = key[0] # just the msgid, no context
  666. else:
  667. matchkey = key
  668. matches = get_close_matches(matchkey.lower().strip(),
  669. fuzzy_candidates.keys(), 1)
  670. if matches:
  671. newkey = matches[0]
  672. newctxt = fuzzy_candidates[newkey]
  673. if newctxt is not None:
  674. newkey = newkey, newctxt
  675. _merge(message, newkey, key)
  676. continue
  677. self[message.id] = message
  678. for msgid in remaining:
  679. if no_fuzzy_matching or msgid not in fuzzy_matches:
  680. self.obsolete[msgid] = remaining[msgid]
  681. if update_header_comment:
  682. # Allow the updated catalog's header to be rewritten based on the
  683. # template's header
  684. self.header_comment = template.header_comment
  685. # Make updated catalog's POT-Creation-Date equal to the template
  686. # used to update the catalog
  687. self.creation_date = template.creation_date
  688. def _key_for(self, id, context=None):
  689. """The key for a message is just the singular ID even for pluralizable
  690. messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
  691. messages.
  692. """
  693. key = id
  694. if isinstance(key, (list, tuple)):
  695. key = id[0]
  696. if context is not None:
  697. key = (key, context)
  698. return key