test_url.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819
  1. # -*- test-case-name: twisted.python.test.test_url -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Tests for L{twisted.python.url}.
  6. """
  7. from __future__ import unicode_literals
  8. from ..url import URL
  9. unicode = type(u'')
  10. from twisted.trial.unittest import SynchronousTestCase
  11. theurl = "http://www.foo.com/a/nice/path/?zot=23&zut"
  12. # Examples from RFC 3986 section 5.4, Reference Resolution Examples
  13. relativeLinkBaseForRFC3986 = 'http://a/b/c/d;p?q'
  14. relativeLinkTestsForRFC3986 = [
  15. # "Normal"
  16. #('g:h', 'g:h'), # Not supported: scheme with relative path
  17. ('g', 'http://a/b/c/g'),
  18. ('./g', 'http://a/b/c/g'),
  19. ('g/', 'http://a/b/c/g/'),
  20. ('/g', 'http://a/g'),
  21. ('//g', 'http://g'),
  22. ('?y', 'http://a/b/c/d;p?y'),
  23. ('g?y', 'http://a/b/c/g?y'),
  24. ('#s', 'http://a/b/c/d;p?q#s'),
  25. ('g#s', 'http://a/b/c/g#s'),
  26. ('g?y#s', 'http://a/b/c/g?y#s'),
  27. (';x', 'http://a/b/c/;x'),
  28. ('g;x', 'http://a/b/c/g;x'),
  29. ('g;x?y#s', 'http://a/b/c/g;x?y#s'),
  30. ('', 'http://a/b/c/d;p?q'),
  31. ('.', 'http://a/b/c/'),
  32. ('./', 'http://a/b/c/'),
  33. ('..', 'http://a/b/'),
  34. ('../', 'http://a/b/'),
  35. ('../g', 'http://a/b/g'),
  36. ('../..', 'http://a/'),
  37. ('../../', 'http://a/'),
  38. ('../../g', 'http://a/g'),
  39. # Abnormal examples
  40. # ".." cannot be used to change the authority component of a URI.
  41. ('../../../g', 'http://a/g'),
  42. ('../../../../g', 'http://a/g'),
  43. # Only include "." and ".." when they are only part of a larger segment,
  44. # not by themselves.
  45. ('/./g', 'http://a/g'),
  46. ('/../g', 'http://a/g'),
  47. ('g.', 'http://a/b/c/g.'),
  48. ('.g', 'http://a/b/c/.g'),
  49. ('g..', 'http://a/b/c/g..'),
  50. ('..g', 'http://a/b/c/..g'),
  51. # Unnecessary or nonsensical forms of "." and "..".
  52. ('./../g', 'http://a/b/g'),
  53. ('./g/.', 'http://a/b/c/g/'),
  54. ('g/./h', 'http://a/b/c/g/h'),
  55. ('g/../h', 'http://a/b/c/h'),
  56. ('g;x=1/./y', 'http://a/b/c/g;x=1/y'),
  57. ('g;x=1/../y', 'http://a/b/c/y'),
  58. # Separating the reference's query and fragment components from the path.
  59. ('g?y/./x', 'http://a/b/c/g?y/./x'),
  60. ('g?y/../x', 'http://a/b/c/g?y/../x'),
  61. ('g#s/./x', 'http://a/b/c/g#s/./x'),
  62. ('g#s/../x', 'http://a/b/c/g#s/../x'),
  63. # Not supported: scheme with relative path
  64. #("http:g", "http:g"), # strict
  65. #("http:g", "http://a/b/c/g"), # non-strict
  66. ]
  67. _percentenc = lambda s: ''.join('%%%02X' % ord(c) for c in s)
  68. class TestURL(SynchronousTestCase):
  69. """
  70. Tests for L{URL}.
  71. """
  72. def assertUnicoded(self, u):
  73. """
  74. The given L{URL}'s components should be L{unicode}.
  75. @param u: The L{URL} to test.
  76. """
  77. self.assertTrue(isinstance(u.scheme, unicode)
  78. or u.scheme is None, repr(u))
  79. self.assertTrue(isinstance(u.host, unicode)
  80. or u.host is None, repr(u))
  81. for seg in u.path:
  82. self.assertIsInstance(seg, unicode, repr(u))
  83. for (k, v) in u.query:
  84. self.assertIsInstance(k, unicode, repr(u))
  85. self.assertTrue(v is None or isinstance(v, unicode), repr(u))
  86. self.assertIsInstance(u.fragment, unicode, repr(u))
  87. def assertURL(self, u, scheme, host, path, query,
  88. fragment, port, userinfo=u''):
  89. """
  90. The given L{URL} should have the given components.
  91. @param u: The actual L{URL} to examine.
  92. @param scheme: The expected scheme.
  93. @param host: The expected host.
  94. @param path: The expected path.
  95. @param query: The expected query.
  96. @param fragment: The expected fragment.
  97. @param port: The expected port.
  98. @param userinfo: The expected userinfo.
  99. """
  100. actual = (u.scheme, u.host, u.path, u.query,
  101. u.fragment, u.port, u.userinfo)
  102. expected = (scheme, host, tuple(path), tuple(query),
  103. fragment, port, u.userinfo)
  104. self.assertEqual(actual, expected)
  105. def test_initDefaults(self):
  106. """
  107. L{URL} should have appropriate default values.
  108. """
  109. def check(u):
  110. self.assertUnicoded(u)
  111. self.assertURL(u, u'http', u'', [], [], u'', 80, u'')
  112. check(URL(u'http', u''))
  113. check(URL(u'http', u'', [], []))
  114. check(URL(u'http', u'', [], [], u''))
  115. def test_init(self):
  116. """
  117. L{URL} should accept L{unicode} parameters.
  118. """
  119. u = URL(u's', u'h', [u'p'], [(u'k', u'v'), (u'k', None)], u'f')
  120. self.assertUnicoded(u)
  121. self.assertURL(u, u's', u'h', [u'p'], [(u'k', u'v'), (u'k', None)],
  122. u'f', None)
  123. self.assertURL(URL(u'http', u'\xe0', [u'\xe9'],
  124. [(u'\u03bb', u'\u03c0')], u'\u22a5'),
  125. u'http', u'\xe0', [u'\xe9'],
  126. [(u'\u03bb', u'\u03c0')], u'\u22a5', 80)
  127. def test_initPercent(self):
  128. """
  129. L{URL} should accept (and not interpret) percent characters.
  130. """
  131. u = URL(u's', u'%68', [u'%70'], [(u'%6B', u'%76'), (u'%6B', None)],
  132. u'%66')
  133. self.assertUnicoded(u)
  134. self.assertURL(u,
  135. u's', u'%68', [u'%70'],
  136. [(u'%6B', u'%76'), (u'%6B', None)],
  137. u'%66', None)
  138. def test_repr(self):
  139. """
  140. L{URL.__repr__} will display the canonical form of the URL, wrapped in
  141. a L{URL.fromText} invocation, so that it is C{eval}-able but still easy
  142. to read.
  143. """
  144. self.assertEqual(
  145. repr(URL(scheme=u'http', host=u'foo', path=[u'bar'],
  146. query=[(u'baz', None), (u'k', u'v')],
  147. fragment=u'frob')),
  148. "URL.from_text(%s)" % (repr(u"http://foo/bar?baz&k=v#frob"),)
  149. )
  150. def test_fromText(self):
  151. """
  152. Round-tripping L{URL.fromText} with C{str} results in an equivalent
  153. URL.
  154. """
  155. urlpath = URL.fromText(theurl)
  156. self.assertEqual(theurl, urlpath.asText())
  157. def test_roundtrip(self):
  158. """
  159. L{URL.asText} should invert L{URL.fromText}.
  160. """
  161. tests = (
  162. "http://localhost",
  163. "http://localhost/",
  164. "http://localhost/foo",
  165. "http://localhost/foo/",
  166. "http://localhost/foo!!bar/",
  167. "http://localhost/foo%20bar/",
  168. "http://localhost/foo%2Fbar/",
  169. "http://localhost/foo?n",
  170. "http://localhost/foo?n=v",
  171. "http://localhost/foo?n=/a/b",
  172. "http://example.com/foo!@$bar?b!@z=123",
  173. "http://localhost/asd?a=asd%20sdf/345",
  174. "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)",
  175. "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)",
  176. )
  177. for test in tests:
  178. result = URL.fromText(test).asText()
  179. self.assertEqual(test, result)
  180. def test_equality(self):
  181. """
  182. Two URLs decoded using L{URL.fromText} will be equal (C{==}) if they
  183. decoded same URL string, and unequal (C{!=}) if they decoded different
  184. strings.
  185. """
  186. urlpath = URL.fromText(theurl)
  187. self.assertEqual(urlpath, URL.fromText(theurl))
  188. self.assertNotEqual(
  189. urlpath,
  190. URL.fromText('ftp://www.anotherinvaliddomain.com/'
  191. 'foo/bar/baz/?zot=21&zut')
  192. )
  193. def test_fragmentEquality(self):
  194. """
  195. An URL created with the empty string for a fragment compares equal
  196. to an URL created with an unspecified fragment.
  197. """
  198. self.assertEqual(URL(fragment=u''), URL())
  199. self.assertEqual(URL.fromText(u"http://localhost/#"),
  200. URL.fromText(u"http://localhost/"))
  201. def test_child(self):
  202. """
  203. L{URL.child} appends a new path segment, but does not affect the query
  204. or fragment.
  205. """
  206. urlpath = URL.fromText(theurl)
  207. self.assertEqual("http://www.foo.com/a/nice/path/gong?zot=23&zut",
  208. urlpath.child(u'gong').asText())
  209. self.assertEqual("http://www.foo.com/a/nice/path/gong%2F?zot=23&zut",
  210. urlpath.child(u'gong/').asText())
  211. self.assertEqual(
  212. "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut",
  213. urlpath.child(u'gong/double').asText()
  214. )
  215. self.assertEqual(
  216. "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut",
  217. urlpath.child(u'gong/double/').asText()
  218. )
  219. def test_multiChild(self):
  220. """
  221. L{URL.child} receives multiple segments as C{*args} and appends each in
  222. turn.
  223. """
  224. self.assertEqual(URL.fromText('http://example.com/a/b')
  225. .child('c', 'd', 'e').asText(),
  226. 'http://example.com/a/b/c/d/e')
  227. def test_childInitRoot(self):
  228. """
  229. L{URL.child} of a L{URL} without a path produces a L{URL} with a single
  230. path segment.
  231. """
  232. childURL = URL(host=u"www.foo.com").child(u"c")
  233. self.assertTrue(childURL.rooted)
  234. self.assertEqual("http://www.foo.com/c", childURL.asText())
  235. def test_sibling(self):
  236. """
  237. L{URL.sibling} of a L{URL} replaces the last path segment, but does not
  238. affect the query or fragment.
  239. """
  240. urlpath = URL.fromText(theurl)
  241. self.assertEqual(
  242. "http://www.foo.com/a/nice/path/sister?zot=23&zut",
  243. urlpath.sibling(u'sister').asText()
  244. )
  245. # Use an url without trailing '/' to check child removal.
  246. theurl2 = "http://www.foo.com/a/nice/path?zot=23&zut"
  247. urlpath = URL.fromText(theurl2)
  248. self.assertEqual(
  249. "http://www.foo.com/a/nice/sister?zot=23&zut",
  250. urlpath.sibling(u'sister').asText()
  251. )
  252. def test_click(self):
  253. """
  254. L{URL.click} interprets the given string as a relative URI-reference
  255. and returns a new L{URL} interpreting C{self} as the base absolute URI.
  256. """
  257. urlpath = URL.fromText(theurl)
  258. # A null uri should be valid (return here).
  259. self.assertEqual("http://www.foo.com/a/nice/path/?zot=23&zut",
  260. urlpath.click("").asText())
  261. # A simple relative path remove the query.
  262. self.assertEqual("http://www.foo.com/a/nice/path/click",
  263. urlpath.click("click").asText())
  264. # An absolute path replace path and query.
  265. self.assertEqual("http://www.foo.com/click",
  266. urlpath.click("/click").asText())
  267. # Replace just the query.
  268. self.assertEqual("http://www.foo.com/a/nice/path/?burp",
  269. urlpath.click("?burp").asText())
  270. # One full url to another should not generate '//' between authority.
  271. # and path
  272. self.assertNotIn("//foobar",
  273. urlpath.click('http://www.foo.com/foobar').asText())
  274. # From a url with no query clicking a url with a query, the query
  275. # should be handled properly.
  276. u = URL.fromText('http://www.foo.com/me/noquery')
  277. self.assertEqual('http://www.foo.com/me/17?spam=158',
  278. u.click('/me/17?spam=158').asText())
  279. # Check that everything from the path onward is removed when the click
  280. # link has no path.
  281. u = URL.fromText('http://localhost/foo?abc=def')
  282. self.assertEqual(u.click('http://www.python.org').asText(),
  283. 'http://www.python.org')
  284. def test_clickRFC3986(self):
  285. """
  286. L{URL.click} should correctly resolve the examples in RFC 3986.
  287. """
  288. base = URL.fromText(relativeLinkBaseForRFC3986)
  289. for (ref, expected) in relativeLinkTestsForRFC3986:
  290. self.assertEqual(base.click(ref).asText(), expected)
  291. def test_clickSchemeRelPath(self):
  292. """
  293. L{URL.click} should not accept schemes with relative paths.
  294. """
  295. base = URL.fromText(relativeLinkBaseForRFC3986)
  296. self.assertRaises(NotImplementedError, base.click, 'g:h')
  297. self.assertRaises(NotImplementedError, base.click, 'http:h')
  298. def test_cloneUnchanged(self):
  299. """
  300. Verify that L{URL.replace} doesn't change any of the arguments it
  301. is passed.
  302. """
  303. urlpath = URL.fromText('https://x:1/y?z=1#A')
  304. self.assertEqual(
  305. urlpath.replace(urlpath.scheme,
  306. urlpath.host,
  307. urlpath.path,
  308. urlpath.query,
  309. urlpath.fragment,
  310. urlpath.port),
  311. urlpath)
  312. self.assertEqual(
  313. urlpath.replace(),
  314. urlpath)
  315. def test_clickCollapse(self):
  316. """
  317. L{URL.click} collapses C{.} and C{..} according to RFC 3986 section
  318. 5.2.4.
  319. """
  320. tests = [
  321. ['http://localhost/', '.', 'http://localhost/'],
  322. ['http://localhost/', '..', 'http://localhost/'],
  323. ['http://localhost/a/b/c', '.', 'http://localhost/a/b/'],
  324. ['http://localhost/a/b/c', '..', 'http://localhost/a/'],
  325. ['http://localhost/a/b/c', './d/e', 'http://localhost/a/b/d/e'],
  326. ['http://localhost/a/b/c', '../d/e', 'http://localhost/a/d/e'],
  327. ['http://localhost/a/b/c', '/./d/e', 'http://localhost/d/e'],
  328. ['http://localhost/a/b/c', '/../d/e', 'http://localhost/d/e'],
  329. ['http://localhost/a/b/c/', '../../d/e/',
  330. 'http://localhost/a/d/e/'],
  331. ['http://localhost/a/./c', '../d/e', 'http://localhost/d/e'],
  332. ['http://localhost/a/./c/', '../d/e', 'http://localhost/a/d/e'],
  333. ['http://localhost/a/b/c/d', './e/../f/../g',
  334. 'http://localhost/a/b/c/g'],
  335. ['http://localhost/a/b/c', 'd//e', 'http://localhost/a/b/d//e'],
  336. ]
  337. for start, click, expected in tests:
  338. actual = URL.fromText(start).click(click).asText()
  339. self.assertEqual(
  340. actual,
  341. expected,
  342. "{start}.click({click}) => {actual} not {expected}".format(
  343. start=start,
  344. click=repr(click),
  345. actual=actual,
  346. expected=expected,
  347. )
  348. )
  349. def test_queryAdd(self):
  350. """
  351. L{URL.add} adds query parameters.
  352. """
  353. self.assertEqual(
  354. "http://www.foo.com/a/nice/path/?foo=bar",
  355. URL.fromText("http://www.foo.com/a/nice/path/")
  356. .add(u"foo", u"bar").asText())
  357. self.assertEqual(
  358. "http://www.foo.com/?foo=bar",
  359. URL(host=u"www.foo.com").add(u"foo", u"bar")
  360. .asText())
  361. urlpath = URL.fromText(theurl)
  362. self.assertEqual(
  363. "http://www.foo.com/a/nice/path/?zot=23&zut&burp",
  364. urlpath.add(u"burp").asText())
  365. self.assertEqual(
  366. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx",
  367. urlpath.add(u"burp", u"xxx").asText())
  368. self.assertEqual(
  369. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing",
  370. urlpath.add(u"burp", u"xxx").add(u"zing").asText())
  371. # Note the inversion!
  372. self.assertEqual(
  373. "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx",
  374. urlpath.add(u"zing").add(u"burp", u"xxx").asText())
  375. # Note the two values for the same name.
  376. self.assertEqual(
  377. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32",
  378. urlpath.add(u"burp", u"xxx").add(u"zot", u'32')
  379. .asText())
  380. def test_querySet(self):
  381. """
  382. L{URL.set} replaces query parameters by name.
  383. """
  384. urlpath = URL.fromText(theurl)
  385. self.assertEqual(
  386. "http://www.foo.com/a/nice/path/?zot=32&zut",
  387. urlpath.set(u"zot", u'32').asText())
  388. # Replace name without value with name/value and vice-versa.
  389. self.assertEqual(
  390. "http://www.foo.com/a/nice/path/?zot&zut=itworked",
  391. urlpath.set(u"zot").set(u"zut", u"itworked").asText()
  392. )
  393. # Q: what happens when the query has two values and we replace?
  394. # A: we replace both values with a single one
  395. self.assertEqual(
  396. "http://www.foo.com/a/nice/path/?zot=32&zut",
  397. urlpath.add(u"zot", u"xxx").set(u"zot", u'32').asText()
  398. )
  399. def test_queryRemove(self):
  400. """
  401. L{URL.remove} removes all instances of a query parameter.
  402. """
  403. url = URL.fromText(u"https://example.com/a/b/?foo=1&bar=2&foo=3")
  404. self.assertEqual(
  405. url.remove(u"foo"),
  406. URL.fromText(u"https://example.com/a/b/?bar=2")
  407. )
  408. def test_parseEqualSignInParamValue(self):
  409. """
  410. Every C{=}-sign after the first in a query parameter is simply included
  411. in the value of the parameter.
  412. """
  413. u = URL.fromText('http://localhost/?=x=x=x')
  414. self.assertEqual(u.get(u''), ['x=x=x'])
  415. self.assertEqual(u.asText(), 'http://localhost/?=x%3Dx%3Dx')
  416. u = URL.fromText('http://localhost/?foo=x=x=x&bar=y')
  417. self.assertEqual(u.query, (('foo', 'x=x=x'),
  418. ('bar', 'y')))
  419. self.assertEqual(u.asText(), 'http://localhost/?foo=x%3Dx%3Dx&bar=y')
  420. def test_empty(self):
  421. """
  422. An empty L{URL} should serialize as the empty string.
  423. """
  424. self.assertEqual(URL().asText(), u'')
  425. def test_justQueryText(self):
  426. """
  427. An L{URL} with query text should serialize as just query text.
  428. """
  429. u = URL(query=[(u"hello", u"world")])
  430. self.assertEqual(u.asText(), u'?hello=world')
  431. def test_identicalEqual(self):
  432. """
  433. L{URL} compares equal to itself.
  434. """
  435. u = URL.fromText('http://localhost/')
  436. self.assertEqual(u, u)
  437. def test_similarEqual(self):
  438. """
  439. URLs with equivalent components should compare equal.
  440. """
  441. u1 = URL.fromText('http://localhost/')
  442. u2 = URL.fromText('http://localhost/')
  443. self.assertEqual(u1, u2)
  444. def test_differentNotEqual(self):
  445. """
  446. L{URL}s that refer to different resources are both unequal (C{!=}) and
  447. also not equal (not C{==}).
  448. """
  449. u1 = URL.fromText('http://localhost/a')
  450. u2 = URL.fromText('http://localhost/b')
  451. self.assertFalse(u1 == u2, "%r != %r" % (u1, u2))
  452. self.assertNotEqual(u1, u2)
  453. def test_otherTypesNotEqual(self):
  454. """
  455. L{URL} is not equal (C{==}) to other types.
  456. """
  457. u = URL.fromText('http://localhost/')
  458. self.assertFalse(u == 42, "URL must not equal a number.")
  459. self.assertFalse(u == object(), "URL must not equal an object.")
  460. self.assertNotEqual(u, 42)
  461. self.assertNotEqual(u, object())
  462. def test_identicalNotUnequal(self):
  463. """
  464. Identical L{URL}s are not unequal (C{!=}) to each other.
  465. """
  466. u = URL.fromText('http://localhost/')
  467. self.assertFalse(u != u, "%r == itself" % u)
  468. def test_similarNotUnequal(self):
  469. """
  470. Structurally similar L{URL}s are not unequal (C{!=}) to each other.
  471. """
  472. u1 = URL.fromText('http://localhost/')
  473. u2 = URL.fromText('http://localhost/')
  474. self.assertFalse(u1 != u2, "%r == %r" % (u1, u2))
  475. def test_differentUnequal(self):
  476. """
  477. Structurally different L{URL}s are unequal (C{!=}) to each other.
  478. """
  479. u1 = URL.fromText('http://localhost/a')
  480. u2 = URL.fromText('http://localhost/b')
  481. self.assertTrue(u1 != u2, "%r == %r" % (u1, u2))
  482. def test_otherTypesUnequal(self):
  483. """
  484. L{URL} is unequal (C{!=}) to other types.
  485. """
  486. u = URL.fromText('http://localhost/')
  487. self.assertTrue(u != 42, "URL must differ from a number.")
  488. self.assertTrue(u != object(), "URL must be differ from an object.")
  489. def test_asURI(self):
  490. """
  491. L{URL.asURI} produces an URI which converts any URI unicode encoding
  492. into pure US-ASCII and returns a new L{URL}.
  493. """
  494. unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  495. '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}'
  496. '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}='
  497. '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}'
  498. '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}')
  499. iri = URL.fromText(unicodey)
  500. uri = iri.asURI()
  501. self.assertEqual(iri.host, '\N{LATIN SMALL LETTER E WITH ACUTE}.com')
  502. self.assertEqual(iri.path[0],
  503. '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}')
  504. self.assertEqual(iri.asText(), unicodey)
  505. expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
  506. actualURI = uri.asText()
  507. self.assertEqual(actualURI, expectedURI,
  508. '%r != %r' % (actualURI, expectedURI))
  509. def test_asIRI(self):
  510. """
  511. L{URL.asIRI} decodes any percent-encoded text in the URI, making it
  512. more suitable for reading by humans, and returns a new L{URL}.
  513. """
  514. asciiish = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
  515. uri = URL.fromText(asciiish)
  516. iri = uri.asIRI()
  517. self.assertEqual(uri.host, 'xn--9ca.com')
  518. self.assertEqual(uri.path[0], '%C3%A9')
  519. self.assertEqual(uri.asText(), asciiish)
  520. expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  521. '\N{LATIN SMALL LETTER E WITH ACUTE}'
  522. '?\N{LATIN SMALL LETTER A WITH ACUTE}='
  523. '\N{LATIN SMALL LETTER I WITH ACUTE}'
  524. '#\N{LATIN SMALL LETTER U WITH ACUTE}')
  525. actualIRI = iri.asText()
  526. self.assertEqual(actualIRI, expectedIRI,
  527. '%r != %r' % (actualIRI, expectedIRI))
  528. def test_badUTF8AsIRI(self):
  529. """
  530. Bad UTF-8 in a path segment, query parameter, or fragment results in
  531. that portion of the URI remaining percent-encoded in the IRI.
  532. """
  533. urlWithBinary = 'http://xn--9ca.com/%00%FF/%C3%A9'
  534. uri = URL.fromText(urlWithBinary)
  535. iri = uri.asIRI()
  536. expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  537. '%00%FF/'
  538. '\N{LATIN SMALL LETTER E WITH ACUTE}')
  539. actualIRI = iri.asText()
  540. self.assertEqual(actualIRI, expectedIRI,
  541. '%r != %r' % (actualIRI, expectedIRI))
  542. def test_alreadyIRIAsIRI(self):
  543. """
  544. A L{URL} composed of non-ASCII text will result in non-ASCII text.
  545. """
  546. unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  547. '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}'
  548. '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}='
  549. '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}'
  550. '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}')
  551. iri = URL.fromText(unicodey)
  552. alsoIRI = iri.asIRI()
  553. self.assertEqual(alsoIRI.asText(), unicodey)
  554. def test_alreadyURIAsURI(self):
  555. """
  556. A L{URL} composed of encoded text will remain encoded.
  557. """
  558. expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
  559. uri = URL.fromText(expectedURI)
  560. actualURI = uri.asURI().asText()
  561. self.assertEqual(actualURI, expectedURI)
  562. def test_userinfo(self):
  563. """
  564. L{URL.fromText} will parse the C{userinfo} portion of the URI
  565. separately from the host and port.
  566. """
  567. url = URL.fromText(
  568. 'http://someuser:somepassword@example.com/some-segment@ignore'
  569. )
  570. self.assertEqual(url.authority(True),
  571. 'someuser:somepassword@example.com')
  572. self.assertEqual(url.authority(False), 'someuser:@example.com')
  573. self.assertEqual(url.userinfo, 'someuser:somepassword')
  574. self.assertEqual(url.user, 'someuser')
  575. self.assertEqual(url.asText(),
  576. 'http://someuser:@example.com/some-segment@ignore')
  577. self.assertEqual(
  578. url.replace(userinfo=u"someuser").asText(),
  579. 'http://someuser@example.com/some-segment@ignore'
  580. )
  581. def test_portText(self):
  582. """
  583. L{URL.fromText} parses custom port numbers as integers.
  584. """
  585. portURL = URL.fromText(u"http://www.example.com:8080/")
  586. self.assertEqual(portURL.port, 8080)
  587. self.assertEqual(portURL.asText(), u"http://www.example.com:8080/")
  588. def test_mailto(self):
  589. """
  590. Although L{URL} instances are mainly for dealing with HTTP, other
  591. schemes (such as C{mailto:}) should work as well. For example,
  592. L{URL.fromText}/L{URL.asText} round-trips cleanly for a C{mailto:} URL
  593. representing an email address.
  594. """
  595. self.assertEqual(URL.fromText(u"mailto:user@example.com").asText(),
  596. u"mailto:user@example.com")
  597. def test_queryIterable(self):
  598. """
  599. When a L{URL} is created with a C{query} argument, the C{query}
  600. argument is converted into an N-tuple of 2-tuples.
  601. """
  602. url = URL(query=[[u'alpha', u'beta']])
  603. self.assertEqual(url.query, ((u'alpha', u'beta'),))
  604. def test_pathIterable(self):
  605. """
  606. When a L{URL} is created with a C{path} argument, the C{path} is
  607. converted into a tuple.
  608. """
  609. url = URL(path=[u'hello', u'world'])
  610. self.assertEqual(url.path, (u'hello', u'world'))
  611. def test_invalidArguments(self):
  612. """
  613. Passing an argument of the wrong type to any of the constructor
  614. arguments of L{URL} will raise a descriptive L{TypeError}.
  615. L{URL} typechecks very aggressively to ensure that its constitutent
  616. parts are all properly immutable and to prevent confusing errors when
  617. bad data crops up in a method call long after the code that called the
  618. constructor is off the stack.
  619. """
  620. class Unexpected(object):
  621. def __str__(self):
  622. return "wrong"
  623. def __repr__(self):
  624. return "<unexpected>"
  625. defaultExpectation = "unicode" if bytes is str else "str"
  626. def assertRaised(raised, expectation, name):
  627. self.assertEqual(str(raised.exception),
  628. "expected {} for {}, got {}".format(
  629. expectation,
  630. name, "<unexpected>"))
  631. def check(param, expectation=defaultExpectation):
  632. with self.assertRaises(TypeError) as raised:
  633. URL(**{param: Unexpected()})
  634. assertRaised(raised, expectation, param)
  635. check("scheme")
  636. check("host")
  637. check("fragment")
  638. check("rooted", "bool")
  639. check("userinfo")
  640. check("port", "int or NoneType")
  641. with self.assertRaises(TypeError) as raised:
  642. URL(path=[Unexpected(),])
  643. assertRaised(raised, defaultExpectation, "path segment")
  644. with self.assertRaises(TypeError) as raised:
  645. URL(query=[(u"name", Unexpected()),])
  646. assertRaised(raised, defaultExpectation + " or NoneType",
  647. "query parameter value")
  648. with self.assertRaises(TypeError) as raised:
  649. URL(query=[(Unexpected(), u"value"),])
  650. assertRaised(raised, defaultExpectation, "query parameter name")
  651. # No custom error message for this one, just want to make sure
  652. # non-2-tuples don't get through.
  653. with self.assertRaises(TypeError):
  654. URL(query=[Unexpected()])
  655. with self.assertRaises(ValueError):
  656. URL(query=[(u'k', u'v', u'vv')])
  657. with self.assertRaises(ValueError):
  658. URL(query=[(u'k',)])
  659. url = URL.fromText("https://valid.example.com/")
  660. with self.assertRaises(TypeError) as raised:
  661. url.child(Unexpected())
  662. assertRaised(raised, defaultExpectation, "path segment")
  663. with self.assertRaises(TypeError) as raised:
  664. url.sibling(Unexpected())
  665. assertRaised(raised, defaultExpectation, "path segment")
  666. with self.assertRaises(TypeError) as raised:
  667. url.click(Unexpected())
  668. assertRaised(raised, defaultExpectation, "relative URL")
  669. def test_technicallyTextIsIterableBut(self):
  670. """
  671. Technically, L{str} (or L{unicode}, as appropriate) is iterable, but
  672. C{URL(path="foo")} resulting in C{URL.fromText("f/o/o")} is never what
  673. you want.
  674. """
  675. with self.assertRaises(TypeError) as raised:
  676. URL(path=u'foo')
  677. self.assertEqual(
  678. str(raised.exception),
  679. "expected iterable of text for path, not: {}"
  680. .format(repr(u'foo'))
  681. )
  682. class URLDeprecationTests(SynchronousTestCase):
  683. """
  684. L{twisted.python.constants} is deprecated.
  685. """
  686. def test_urlDeprecation(self):
  687. """
  688. L{twisted.python.constants} is deprecated since Twisted 17.5.0.
  689. """
  690. from twisted.python import url
  691. url
  692. warningsShown = self.flushWarnings([self.test_urlDeprecation])
  693. self.assertEqual(1, len(warningsShown))
  694. self.assertEqual(
  695. ("twisted.python.url was deprecated in Twisted 17.5.0:"
  696. " Please use hyperlink from PyPI instead."),
  697. warningsShown[0]['message'])