test_builder_registry.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. """Tests of the builder registry."""
  2. import unittest
  3. from bs4 import BeautifulSoup
  4. from bs4.builder import (
  5. builder_registry as registry,
  6. HTMLParserTreeBuilder,
  7. TreeBuilderRegistry,
  8. )
  9. try:
  10. from bs4.builder import HTML5TreeBuilder
  11. HTML5LIB_PRESENT = True
  12. except ImportError:
  13. HTML5LIB_PRESENT = False
  14. try:
  15. from bs4.builder import (
  16. LXMLTreeBuilderForXML,
  17. LXMLTreeBuilder,
  18. )
  19. LXML_PRESENT = True
  20. except ImportError:
  21. LXML_PRESENT = False
  22. class BuiltInRegistryTest(unittest.TestCase):
  23. """Test the built-in registry with the default builders registered."""
  24. def test_combination(self):
  25. if LXML_PRESENT:
  26. self.assertEqual(registry.lookup('fast', 'html'),
  27. LXMLTreeBuilder)
  28. if LXML_PRESENT:
  29. self.assertEqual(registry.lookup('permissive', 'xml'),
  30. LXMLTreeBuilderForXML)
  31. self.assertEqual(registry.lookup('strict', 'html'),
  32. HTMLParserTreeBuilder)
  33. if HTML5LIB_PRESENT:
  34. self.assertEqual(registry.lookup('html5lib', 'html'),
  35. HTML5TreeBuilder)
  36. def test_lookup_by_markup_type(self):
  37. if LXML_PRESENT:
  38. self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
  39. self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
  40. else:
  41. self.assertEqual(registry.lookup('xml'), None)
  42. if HTML5LIB_PRESENT:
  43. self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
  44. else:
  45. self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
  46. def test_named_library(self):
  47. if LXML_PRESENT:
  48. self.assertEqual(registry.lookup('lxml', 'xml'),
  49. LXMLTreeBuilderForXML)
  50. self.assertEqual(registry.lookup('lxml', 'html'),
  51. LXMLTreeBuilder)
  52. if HTML5LIB_PRESENT:
  53. self.assertEqual(registry.lookup('html5lib'),
  54. HTML5TreeBuilder)
  55. self.assertEqual(registry.lookup('html.parser'),
  56. HTMLParserTreeBuilder)
  57. def test_beautifulsoup_constructor_does_lookup(self):
  58. # You can pass in a string.
  59. BeautifulSoup("", features="html")
  60. # Or a list of strings.
  61. BeautifulSoup("", features=["html", "fast"])
  62. # You'll get an exception if BS can't find an appropriate
  63. # builder.
  64. self.assertRaises(ValueError, BeautifulSoup,
  65. "", features="no-such-feature")
  66. class RegistryTest(unittest.TestCase):
  67. """Test the TreeBuilderRegistry class in general."""
  68. def setUp(self):
  69. self.registry = TreeBuilderRegistry()
  70. def builder_for_features(self, *feature_list):
  71. cls = type('Builder_' + '_'.join(feature_list),
  72. (object,), {'features' : feature_list})
  73. self.registry.register(cls)
  74. return cls
  75. def test_register_with_no_features(self):
  76. builder = self.builder_for_features()
  77. # Since the builder advertises no features, you can't find it
  78. # by looking up features.
  79. self.assertEqual(self.registry.lookup('foo'), None)
  80. # But you can find it by doing a lookup with no features, if
  81. # this happens to be the only registered builder.
  82. self.assertEqual(self.registry.lookup(), builder)
  83. def test_register_with_features_makes_lookup_succeed(self):
  84. builder = self.builder_for_features('foo', 'bar')
  85. self.assertEqual(self.registry.lookup('foo'), builder)
  86. self.assertEqual(self.registry.lookup('bar'), builder)
  87. def test_lookup_fails_when_no_builder_implements_feature(self):
  88. builder = self.builder_for_features('foo', 'bar')
  89. self.assertEqual(self.registry.lookup('baz'), None)
  90. def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
  91. builder1 = self.builder_for_features('foo')
  92. builder2 = self.builder_for_features('bar')
  93. self.assertEqual(self.registry.lookup(), builder2)
  94. def test_lookup_fails_when_no_tree_builders_registered(self):
  95. self.assertEqual(self.registry.lookup(), None)
  96. def test_lookup_gets_most_recent_builder_supporting_all_features(self):
  97. has_one = self.builder_for_features('foo')
  98. has_the_other = self.builder_for_features('bar')
  99. has_both_early = self.builder_for_features('foo', 'bar', 'baz')
  100. has_both_late = self.builder_for_features('foo', 'bar', 'quux')
  101. lacks_one = self.builder_for_features('bar')
  102. has_the_other = self.builder_for_features('foo')
  103. # There are two builders featuring 'foo' and 'bar', but
  104. # the one that also features 'quux' was registered later.
  105. self.assertEqual(self.registry.lookup('foo', 'bar'),
  106. has_both_late)
  107. # There is only one builder featuring 'foo', 'bar', and 'baz'.
  108. self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
  109. has_both_early)
  110. def test_lookup_fails_when_cannot_reconcile_requested_features(self):
  111. builder1 = self.builder_for_features('foo', 'bar')
  112. builder2 = self.builder_for_features('foo', 'baz')
  113. self.assertEqual(self.registry.lookup('bar', 'baz'), None)