test_url.py 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. from __future__ import unicode_literals
  5. import socket
  6. from unittest import TestCase
  7. from .. import URL, URLParseError
  8. # automatically import the py27 windows implementation when appropriate
  9. from .._url import inet_pton, SCHEME_PORT_MAP
  10. unicode = type(u'')
  11. BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut"
  12. # Examples from RFC 3986 section 5.4, Reference Resolution Examples
  13. relativeLinkBaseForRFC3986 = 'http://a/b/c/d;p?q'
  14. relativeLinkTestsForRFC3986 = [
  15. # "Normal"
  16. # ('g:h', 'g:h'), # can't click on a scheme-having url without an abs path
  17. ('g', 'http://a/b/c/g'),
  18. ('./g', 'http://a/b/c/g'),
  19. ('g/', 'http://a/b/c/g/'),
  20. ('/g', 'http://a/g'),
  21. ('//g', 'http://g'),
  22. ('?y', 'http://a/b/c/d;p?y'),
  23. ('g?y', 'http://a/b/c/g?y'),
  24. ('#s', 'http://a/b/c/d;p?q#s'),
  25. ('g#s', 'http://a/b/c/g#s'),
  26. ('g?y#s', 'http://a/b/c/g?y#s'),
  27. (';x', 'http://a/b/c/;x'),
  28. ('g;x', 'http://a/b/c/g;x'),
  29. ('g;x?y#s', 'http://a/b/c/g;x?y#s'),
  30. ('', 'http://a/b/c/d;p?q'),
  31. ('.', 'http://a/b/c/'),
  32. ('./', 'http://a/b/c/'),
  33. ('..', 'http://a/b/'),
  34. ('../', 'http://a/b/'),
  35. ('../g', 'http://a/b/g'),
  36. ('../..', 'http://a/'),
  37. ('../../', 'http://a/'),
  38. ('../../g', 'http://a/g'),
  39. # Abnormal examples
  40. # ".." cannot be used to change the authority component of a URI.
  41. ('../../../g', 'http://a/g'),
  42. ('../../../../g', 'http://a/g'),
  43. # Only include "." and ".." when they are only part of a larger segment,
  44. # not by themselves.
  45. ('/./g', 'http://a/g'),
  46. ('/../g', 'http://a/g'),
  47. ('g.', 'http://a/b/c/g.'),
  48. ('.g', 'http://a/b/c/.g'),
  49. ('g..', 'http://a/b/c/g..'),
  50. ('..g', 'http://a/b/c/..g'),
  51. # Unnecessary or nonsensical forms of "." and "..".
  52. ('./../g', 'http://a/b/g'),
  53. ('./g/.', 'http://a/b/c/g/'),
  54. ('g/./h', 'http://a/b/c/g/h'),
  55. ('g/../h', 'http://a/b/c/h'),
  56. ('g;x=1/./y', 'http://a/b/c/g;x=1/y'),
  57. ('g;x=1/../y', 'http://a/b/c/y'),
  58. # Separating the reference's query and fragment components from the path.
  59. ('g?y/./x', 'http://a/b/c/g?y/./x'),
  60. ('g?y/../x', 'http://a/b/c/g?y/../x'),
  61. ('g#s/./x', 'http://a/b/c/g#s/./x'),
  62. ('g#s/../x', 'http://a/b/c/g#s/../x')
  63. ]
  64. ROUNDTRIP_TESTS = (
  65. "http://localhost",
  66. "http://localhost/",
  67. "http://127.0.0.1/",
  68. "http://[::127.0.0.1]/",
  69. "http://[::1]/",
  70. "http://localhost/foo",
  71. "http://localhost/foo/",
  72. "http://localhost/foo!!bar/",
  73. "http://localhost/foo%20bar/",
  74. "http://localhost/foo%2Fbar/",
  75. "http://localhost/foo?n",
  76. "http://localhost/foo?n=v",
  77. "http://localhost/foo?n=/a/b",
  78. "http://example.com/foo!@$bar?b!@z=123",
  79. "http://localhost/asd?a=asd%20sdf/345",
  80. "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)",
  81. "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)",
  82. "?sslrootcert=/Users/glyph/Downloads/rds-ca-2015-root.pem&sslmode=verify",
  83. # from boltons.urlutils' tests
  84. 'http://googlewebsite.com/e-shops.aspx',
  85. 'http://example.com:8080/search?q=123&business=Nothing%20Special',
  86. 'http://hatnote.com:9000/?arg=1&arg=2&arg=3',
  87. 'https://xn--bcher-kva.ch',
  88. 'http://xn--ggbla1c4e.xn--ngbc5azd/',
  89. 'http://tools.ietf.org/html/rfc3986#section-3.4',
  90. # 'http://wiki:pedia@hatnote.com',
  91. 'ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz',
  92. 'http://[1080:0:0:0:8:800:200C:417A]/index.html',
  93. 'ssh://192.0.2.16:2222/',
  94. 'https://[::101.45.75.219]:80/?hi=bye',
  95. 'ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)',
  96. 'mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org',
  97. 'news:alt.rec.motorcycle',
  98. 'tel:+1-800-867-5309',
  99. 'urn:oasis:member:A00024:x',
  100. ('magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%'
  101. '20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&'
  102. 'tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&'
  103. 'tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337'),
  104. )
  105. class TestURL(TestCase):
  106. """
  107. Tests for L{URL}.
  108. """
  109. def assertUnicoded(self, u):
  110. """
  111. The given L{URL}'s components should be L{unicode}.
  112. @param u: The L{URL} to test.
  113. """
  114. self.assertTrue(isinstance(u.scheme, unicode) or u.scheme is None,
  115. repr(u))
  116. self.assertTrue(isinstance(u.host, unicode) or u.host is None,
  117. repr(u))
  118. for seg in u.path:
  119. self.assertEqual(type(seg), unicode, repr(u))
  120. for (k, v) in u.query:
  121. self.assertEqual(type(seg), unicode, repr(u))
  122. self.assertTrue(v is None or isinstance(v, unicode), repr(u))
  123. self.assertEqual(type(u.fragment), unicode, repr(u))
  124. def assertURL(self, u, scheme, host, path, query,
  125. fragment, port, userinfo=''):
  126. """
  127. The given L{URL} should have the given components.
  128. @param u: The actual L{URL} to examine.
  129. @param scheme: The expected scheme.
  130. @param host: The expected host.
  131. @param path: The expected path.
  132. @param query: The expected query.
  133. @param fragment: The expected fragment.
  134. @param port: The expected port.
  135. @param userinfo: The expected userinfo.
  136. """
  137. actual = (u.scheme, u.host, u.path, u.query,
  138. u.fragment, u.port, u.userinfo)
  139. expected = (scheme, host, tuple(path), tuple(query),
  140. fragment, port, u.userinfo)
  141. self.assertEqual(actual, expected)
  142. def test_initDefaults(self):
  143. """
  144. L{URL} should have appropriate default values.
  145. """
  146. def check(u):
  147. self.assertUnicoded(u)
  148. self.assertURL(u, 'http', '', [], [], '', 80, '')
  149. check(URL('http', ''))
  150. check(URL('http', '', [], []))
  151. check(URL('http', '', [], [], ''))
  152. def test_init(self):
  153. """
  154. L{URL} should accept L{unicode} parameters.
  155. """
  156. u = URL('s', 'h', ['p'], [('k', 'v'), ('k', None)], 'f')
  157. self.assertUnicoded(u)
  158. self.assertURL(u, 's', 'h', ['p'], [('k', 'v'), ('k', None)],
  159. 'f', None)
  160. self.assertURL(URL('http', '\xe0', ['\xe9'],
  161. [('\u03bb', '\u03c0')], '\u22a5'),
  162. 'http', '\xe0', ['\xe9'],
  163. [('\u03bb', '\u03c0')], '\u22a5', 80)
  164. def test_initPercent(self):
  165. """
  166. L{URL} should accept (and not interpret) percent characters.
  167. """
  168. u = URL('s', '%68', ['%70'], [('%6B', '%76'), ('%6B', None)],
  169. '%66')
  170. self.assertUnicoded(u)
  171. self.assertURL(u,
  172. 's', '%68', ['%70'],
  173. [('%6B', '%76'), ('%6B', None)],
  174. '%66', None)
  175. def test_repr(self):
  176. """
  177. L{URL.__repr__} will display the canonical form of the URL, wrapped in
  178. a L{URL.from_text} invocation, so that it is C{eval}-able but still easy
  179. to read.
  180. """
  181. self.assertEqual(
  182. repr(URL(scheme='http', host='foo', path=['bar'],
  183. query=[('baz', None), ('k', 'v')],
  184. fragment='frob')),
  185. "URL.from_text(%s)" % (repr(u"http://foo/bar?baz&k=v#frob"),)
  186. )
  187. def test_from_text(self):
  188. """
  189. Round-tripping L{URL.from_text} with C{str} results in an equivalent
  190. URL.
  191. """
  192. urlpath = URL.from_text(BASIC_URL)
  193. self.assertEqual(BASIC_URL, urlpath.to_text())
  194. def test_roundtrip(self):
  195. """
  196. L{URL.to_text} should invert L{URL.from_text}.
  197. """
  198. for test in ROUNDTRIP_TESTS:
  199. result = URL.from_text(test).to_text()
  200. self.assertEqual(test, result)
  201. def test_equality(self):
  202. """
  203. Two URLs decoded using L{URL.from_text} will be equal (C{==}) if they
  204. decoded same URL string, and unequal (C{!=}) if they decoded different
  205. strings.
  206. """
  207. urlpath = URL.from_text(BASIC_URL)
  208. self.assertEqual(urlpath, URL.from_text(BASIC_URL))
  209. self.assertNotEqual(
  210. urlpath,
  211. URL.from_text('ftp://www.anotherinvaliddomain.com/'
  212. 'foo/bar/baz/?zot=21&zut')
  213. )
  214. def test_fragmentEquality(self):
  215. """
  216. An URL created with the empty string for a fragment compares equal
  217. to an URL created with an unspecified fragment.
  218. """
  219. self.assertEqual(URL(fragment=''), URL())
  220. self.assertEqual(URL.from_text(u"http://localhost/#"),
  221. URL.from_text(u"http://localhost/"))
  222. def test_child(self):
  223. """
  224. L{URL.child} appends a new path segment, but does not affect the query
  225. or fragment.
  226. """
  227. urlpath = URL.from_text(BASIC_URL)
  228. self.assertEqual("http://www.foo.com/a/nice/path/gong?zot=23&zut",
  229. urlpath.child('gong').to_text())
  230. self.assertEqual("http://www.foo.com/a/nice/path/gong%2F?zot=23&zut",
  231. urlpath.child('gong/').to_text())
  232. self.assertEqual(
  233. "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut",
  234. urlpath.child('gong/double').to_text()
  235. )
  236. self.assertEqual(
  237. "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut",
  238. urlpath.child('gong/double/').to_text()
  239. )
  240. def test_multiChild(self):
  241. """
  242. L{URL.child} receives multiple segments as C{*args} and appends each in
  243. turn.
  244. """
  245. url = URL.from_text('http://example.com/a/b')
  246. self.assertEqual(url.child('c', 'd', 'e').to_text(),
  247. 'http://example.com/a/b/c/d/e')
  248. def test_childInitRoot(self):
  249. """
  250. L{URL.child} of a L{URL} without a path produces a L{URL} with a single
  251. path segment.
  252. """
  253. childURL = URL(host=u"www.foo.com").child(u"c")
  254. self.assertTrue(childURL.rooted)
  255. self.assertEqual("http://www.foo.com/c", childURL.to_text())
  256. def test_sibling(self):
  257. """
  258. L{URL.sibling} of a L{URL} replaces the last path segment, but does not
  259. affect the query or fragment.
  260. """
  261. urlpath = URL.from_text(BASIC_URL)
  262. self.assertEqual(
  263. "http://www.foo.com/a/nice/path/sister?zot=23&zut",
  264. urlpath.sibling('sister').to_text()
  265. )
  266. # Use an url without trailing '/' to check child removal.
  267. url_text = "http://www.foo.com/a/nice/path?zot=23&zut"
  268. urlpath = URL.from_text(url_text)
  269. self.assertEqual(
  270. "http://www.foo.com/a/nice/sister?zot=23&zut",
  271. urlpath.sibling('sister').to_text()
  272. )
  273. def test_click(self):
  274. """
  275. L{URL.click} interprets the given string as a relative URI-reference
  276. and returns a new L{URL} interpreting C{self} as the base absolute URI.
  277. """
  278. urlpath = URL.from_text(BASIC_URL)
  279. # A null uri should be valid (return here).
  280. self.assertEqual("http://www.foo.com/a/nice/path/?zot=23&zut",
  281. urlpath.click("").to_text())
  282. # A simple relative path remove the query.
  283. self.assertEqual("http://www.foo.com/a/nice/path/click",
  284. urlpath.click("click").to_text())
  285. # An absolute path replace path and query.
  286. self.assertEqual("http://www.foo.com/click",
  287. urlpath.click("/click").to_text())
  288. # Replace just the query.
  289. self.assertEqual("http://www.foo.com/a/nice/path/?burp",
  290. urlpath.click("?burp").to_text())
  291. # One full url to another should not generate '//' between authority.
  292. # and path
  293. self.assertTrue("//foobar" not in
  294. urlpath.click('http://www.foo.com/foobar').to_text())
  295. # From a url with no query clicking a url with a query, the query
  296. # should be handled properly.
  297. u = URL.from_text('http://www.foo.com/me/noquery')
  298. self.assertEqual('http://www.foo.com/me/17?spam=158',
  299. u.click('/me/17?spam=158').to_text())
  300. # Check that everything from the path onward is removed when the click
  301. # link has no path.
  302. u = URL.from_text('http://localhost/foo?abc=def')
  303. self.assertEqual(u.click('http://www.python.org').to_text(),
  304. 'http://www.python.org')
  305. # https://twistedmatrix.com/trac/ticket/8184
  306. u = URL.from_text('http://hatnote.com/a/b/../c/./d/e/..')
  307. res = 'http://hatnote.com/a/c/d/'
  308. self.assertEqual(u.click('').to_text(), res)
  309. # test click default arg is same as empty string above
  310. self.assertEqual(u.click().to_text(), res)
  311. def test_clickRFC3986(self):
  312. """
  313. L{URL.click} should correctly resolve the examples in RFC 3986.
  314. """
  315. base = URL.from_text(relativeLinkBaseForRFC3986)
  316. for (ref, expected) in relativeLinkTestsForRFC3986:
  317. self.assertEqual(base.click(ref).to_text(), expected)
  318. def test_clickSchemeRelPath(self):
  319. """
  320. L{URL.click} should not accept schemes with relative paths.
  321. """
  322. base = URL.from_text(relativeLinkBaseForRFC3986)
  323. self.assertRaises(NotImplementedError, base.click, 'g:h')
  324. self.assertRaises(NotImplementedError, base.click, 'http:h')
  325. def test_cloneUnchanged(self):
  326. """
  327. Verify that L{URL.replace} doesn't change any of the arguments it
  328. is passed.
  329. """
  330. urlpath = URL.from_text('https://x:1/y?z=1#A')
  331. self.assertEqual(urlpath.replace(urlpath.scheme,
  332. urlpath.host,
  333. urlpath.path,
  334. urlpath.query,
  335. urlpath.fragment,
  336. urlpath.port),
  337. urlpath)
  338. self.assertEqual(urlpath.replace(), urlpath)
  339. def test_clickCollapse(self):
  340. """
  341. L{URL.click} collapses C{.} and C{..} according to RFC 3986 section
  342. 5.2.4.
  343. """
  344. tests = [
  345. ['http://localhost/', '.', 'http://localhost/'],
  346. ['http://localhost/', '..', 'http://localhost/'],
  347. ['http://localhost/a/b/c', '.', 'http://localhost/a/b/'],
  348. ['http://localhost/a/b/c', '..', 'http://localhost/a/'],
  349. ['http://localhost/a/b/c', './d/e', 'http://localhost/a/b/d/e'],
  350. ['http://localhost/a/b/c', '../d/e', 'http://localhost/a/d/e'],
  351. ['http://localhost/a/b/c', '/./d/e', 'http://localhost/d/e'],
  352. ['http://localhost/a/b/c', '/../d/e', 'http://localhost/d/e'],
  353. ['http://localhost/a/b/c/', '../../d/e/',
  354. 'http://localhost/a/d/e/'],
  355. ['http://localhost/a/./c', '../d/e', 'http://localhost/d/e'],
  356. ['http://localhost/a/./c/', '../d/e', 'http://localhost/a/d/e'],
  357. ['http://localhost/a/b/c/d', './e/../f/../g',
  358. 'http://localhost/a/b/c/g'],
  359. ['http://localhost/a/b/c', 'd//e', 'http://localhost/a/b/d//e'],
  360. ]
  361. for start, click, expected in tests:
  362. actual = URL.from_text(start).click(click).to_text()
  363. self.assertEqual(
  364. actual,
  365. expected,
  366. "{start}.click({click}) => {actual} not {expected}".format(
  367. start=start,
  368. click=repr(click),
  369. actual=actual,
  370. expected=expected,
  371. )
  372. )
  373. def test_queryAdd(self):
  374. """
  375. L{URL.add} adds query parameters.
  376. """
  377. self.assertEqual(
  378. "http://www.foo.com/a/nice/path/?foo=bar",
  379. URL.from_text("http://www.foo.com/a/nice/path/")
  380. .add(u"foo", u"bar").to_text())
  381. self.assertEqual(
  382. "http://www.foo.com/?foo=bar",
  383. URL(host=u"www.foo.com").add(u"foo", u"bar")
  384. .to_text())
  385. urlpath = URL.from_text(BASIC_URL)
  386. self.assertEqual(
  387. "http://www.foo.com/a/nice/path/?zot=23&zut&burp",
  388. urlpath.add(u"burp").to_text())
  389. self.assertEqual(
  390. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx",
  391. urlpath.add(u"burp", u"xxx").to_text())
  392. self.assertEqual(
  393. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing",
  394. urlpath.add(u"burp", u"xxx").add(u"zing").to_text())
  395. # Note the inversion!
  396. self.assertEqual(
  397. "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx",
  398. urlpath.add(u"zing").add(u"burp", u"xxx").to_text())
  399. # Note the two values for the same name.
  400. self.assertEqual(
  401. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32",
  402. urlpath.add(u"burp", u"xxx").add(u"zot", '32')
  403. .to_text())
  404. def test_querySet(self):
  405. """
  406. L{URL.set} replaces query parameters by name.
  407. """
  408. urlpath = URL.from_text(BASIC_URL)
  409. self.assertEqual(
  410. "http://www.foo.com/a/nice/path/?zot=32&zut",
  411. urlpath.set(u"zot", '32').to_text())
  412. # Replace name without value with name/value and vice-versa.
  413. self.assertEqual(
  414. "http://www.foo.com/a/nice/path/?zot&zut=itworked",
  415. urlpath.set(u"zot").set(u"zut", u"itworked").to_text()
  416. )
  417. # Q: what happens when the query has two values and we replace?
  418. # A: we replace both values with a single one
  419. self.assertEqual(
  420. "http://www.foo.com/a/nice/path/?zot=32&zut",
  421. urlpath.add(u"zot", u"xxx").set(u"zot", '32').to_text()
  422. )
  423. def test_queryRemove(self):
  424. """
  425. L{URL.remove} removes all instances of a query parameter.
  426. """
  427. url = URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3")
  428. self.assertEqual(
  429. url.remove(u"foo"),
  430. URL.from_text(u"https://example.com/a/b/?bar=2")
  431. )
  432. def test_parseEqualSignInParamValue(self):
  433. """
  434. Every C{=}-sign after the first in a query parameter is simply included
  435. in the value of the parameter.
  436. """
  437. u = URL.from_text('http://localhost/?=x=x=x')
  438. self.assertEqual(u.get(''), ['x=x=x'])
  439. self.assertEqual(u.to_text(), 'http://localhost/?=x%3Dx%3Dx')
  440. u = URL.from_text('http://localhost/?foo=x=x=x&bar=y')
  441. self.assertEqual(u.query, (('foo', 'x=x=x'), ('bar', 'y')))
  442. self.assertEqual(u.to_text(), 'http://localhost/?foo=x%3Dx%3Dx&bar=y')
  443. def test_empty(self):
  444. """
  445. An empty L{URL} should serialize as the empty string.
  446. """
  447. self.assertEqual(URL().to_text(), '')
  448. def test_justQueryText(self):
  449. """
  450. An L{URL} with query text should serialize as just query text.
  451. """
  452. u = URL(query=[(u"hello", u"world")])
  453. self.assertEqual(u.to_text(), '?hello=world')
  454. def test_identicalEqual(self):
  455. """
  456. L{URL} compares equal to itself.
  457. """
  458. u = URL.from_text('http://localhost/')
  459. self.assertEqual(u, u)
  460. def test_similarEqual(self):
  461. """
  462. URLs with equivalent components should compare equal.
  463. """
  464. u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
  465. u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
  466. self.assertEqual(u1, u2)
  467. def test_differentNotEqual(self):
  468. """
  469. L{URL}s that refer to different resources are both unequal (C{!=}) and
  470. also not equal (not C{==}).
  471. """
  472. u1 = URL.from_text('http://localhost/a')
  473. u2 = URL.from_text('http://localhost/b')
  474. self.assertFalse(u1 == u2, "%r != %r" % (u1, u2))
  475. self.assertNotEqual(u1, u2)
  476. def test_otherTypesNotEqual(self):
  477. """
  478. L{URL} is not equal (C{==}) to other types.
  479. """
  480. u = URL.from_text('http://localhost/')
  481. self.assertFalse(u == 42, "URL must not equal a number.")
  482. self.assertFalse(u == object(), "URL must not equal an object.")
  483. self.assertNotEqual(u, 42)
  484. self.assertNotEqual(u, object())
  485. def test_identicalNotUnequal(self):
  486. """
  487. Identical L{URL}s are not unequal (C{!=}) to each other.
  488. """
  489. u = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
  490. self.assertFalse(u != u, "%r == itself" % u)
  491. def test_similarNotUnequal(self):
  492. """
  493. Structurally similar L{URL}s are not unequal (C{!=}) to each other.
  494. """
  495. u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
  496. u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
  497. self.assertFalse(u1 != u2, "%r == %r" % (u1, u2))
  498. def test_differentUnequal(self):
  499. """
  500. Structurally different L{URL}s are unequal (C{!=}) to each other.
  501. """
  502. u1 = URL.from_text('http://localhost/a')
  503. u2 = URL.from_text('http://localhost/b')
  504. self.assertTrue(u1 != u2, "%r == %r" % (u1, u2))
  505. def test_otherTypesUnequal(self):
  506. """
  507. L{URL} is unequal (C{!=}) to other types.
  508. """
  509. u = URL.from_text('http://localhost/')
  510. self.assertTrue(u != 42, "URL must differ from a number.")
  511. self.assertTrue(u != object(), "URL must be differ from an object.")
  512. def test_asURI(self):
  513. """
  514. L{URL.asURI} produces an URI which converts any URI unicode encoding
  515. into pure US-ASCII and returns a new L{URL}.
  516. """
  517. unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  518. '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}'
  519. '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}='
  520. '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}'
  521. '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}')
  522. iri = URL.from_text(unicodey)
  523. uri = iri.asURI()
  524. self.assertEqual(iri.host, '\N{LATIN SMALL LETTER E WITH ACUTE}.com')
  525. self.assertEqual(iri.path[0],
  526. '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}')
  527. self.assertEqual(iri.to_text(), unicodey)
  528. expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
  529. actualURI = uri.to_text()
  530. self.assertEqual(actualURI, expectedURI,
  531. '%r != %r' % (actualURI, expectedURI))
  532. def test_asIRI(self):
  533. """
  534. L{URL.asIRI} decodes any percent-encoded text in the URI, making it
  535. more suitable for reading by humans, and returns a new L{URL}.
  536. """
  537. asciiish = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
  538. uri = URL.from_text(asciiish)
  539. iri = uri.asIRI()
  540. self.assertEqual(uri.host, 'xn--9ca.com')
  541. self.assertEqual(uri.path[0], '%C3%A9')
  542. self.assertEqual(uri.to_text(), asciiish)
  543. expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  544. '\N{LATIN SMALL LETTER E WITH ACUTE}'
  545. '?\N{LATIN SMALL LETTER A WITH ACUTE}='
  546. '\N{LATIN SMALL LETTER I WITH ACUTE}'
  547. '#\N{LATIN SMALL LETTER U WITH ACUTE}')
  548. actualIRI = iri.to_text()
  549. self.assertEqual(actualIRI, expectedIRI,
  550. '%r != %r' % (actualIRI, expectedIRI))
  551. def test_badUTF8AsIRI(self):
  552. """
  553. Bad UTF-8 in a path segment, query parameter, or fragment results in
  554. that portion of the URI remaining percent-encoded in the IRI.
  555. """
  556. urlWithBinary = 'http://xn--9ca.com/%00%FF/%C3%A9'
  557. uri = URL.from_text(urlWithBinary)
  558. iri = uri.asIRI()
  559. expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  560. '%00%FF/'
  561. '\N{LATIN SMALL LETTER E WITH ACUTE}')
  562. actualIRI = iri.to_text()
  563. self.assertEqual(actualIRI, expectedIRI,
  564. '%r != %r' % (actualIRI, expectedIRI))
  565. def test_alreadyIRIAsIRI(self):
  566. """
  567. A L{URL} composed of non-ASCII text will result in non-ASCII text.
  568. """
  569. unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
  570. '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}'
  571. '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}='
  572. '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}'
  573. '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}')
  574. iri = URL.from_text(unicodey)
  575. alsoIRI = iri.asIRI()
  576. self.assertEqual(alsoIRI.to_text(), unicodey)
  577. def test_alreadyURIAsURI(self):
  578. """
  579. A L{URL} composed of encoded text will remain encoded.
  580. """
  581. expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
  582. uri = URL.from_text(expectedURI)
  583. actualURI = uri.asURI().to_text()
  584. self.assertEqual(actualURI, expectedURI)
  585. def test_userinfo(self):
  586. """
  587. L{URL.from_text} will parse the C{userinfo} portion of the URI
  588. separately from the host and port.
  589. """
  590. url = URL.from_text(
  591. 'http://someuser:somepassword@example.com/some-segment@ignore'
  592. )
  593. self.assertEqual(url.authority(True),
  594. 'someuser:somepassword@example.com')
  595. self.assertEqual(url.authority(False), 'someuser:@example.com')
  596. self.assertEqual(url.userinfo, 'someuser:somepassword')
  597. self.assertEqual(url.user, 'someuser')
  598. self.assertEqual(url.to_text(),
  599. 'http://someuser:@example.com/some-segment@ignore')
  600. self.assertEqual(
  601. url.replace(userinfo=u"someuser").to_text(),
  602. 'http://someuser@example.com/some-segment@ignore'
  603. )
  604. def test_portText(self):
  605. """
  606. L{URL.from_text} parses custom port numbers as integers.
  607. """
  608. portURL = URL.from_text(u"http://www.example.com:8080/")
  609. self.assertEqual(portURL.port, 8080)
  610. self.assertEqual(portURL.to_text(), u"http://www.example.com:8080/")
  611. def test_mailto(self):
  612. """
  613. Although L{URL} instances are mainly for dealing with HTTP, other
  614. schemes (such as C{mailto:}) should work as well. For example,
  615. L{URL.from_text}/L{URL.to_text} round-trips cleanly for a C{mailto:} URL
  616. representing an email address.
  617. """
  618. self.assertEqual(URL.from_text(u"mailto:user@example.com").to_text(),
  619. u"mailto:user@example.com")
  620. def test_queryIterable(self):
  621. """
  622. When a L{URL} is created with a C{query} argument, the C{query}
  623. argument is converted into an N-tuple of 2-tuples.
  624. """
  625. url = URL(query=[['alpha', 'beta']])
  626. self.assertEqual(url.query, (('alpha', 'beta'),))
  627. def test_pathIterable(self):
  628. """
  629. When a L{URL} is created with a C{path} argument, the C{path} is
  630. converted into a tuple.
  631. """
  632. url = URL(path=['hello', 'world'])
  633. self.assertEqual(url.path, ('hello', 'world'))
  634. def test_invalidArguments(self):
  635. """
  636. Passing an argument of the wrong type to any of the constructor
  637. arguments of L{URL} will raise a descriptive L{TypeError}.
  638. L{URL} typechecks very aggressively to ensure that its constitutent
  639. parts are all properly immutable and to prevent confusing errors when
  640. bad data crops up in a method call long after the code that called the
  641. constructor is off the stack.
  642. """
  643. class Unexpected(object):
  644. def __str__(self):
  645. return "wrong"
  646. def __repr__(self):
  647. return "<unexpected>"
  648. defaultExpectation = "unicode" if bytes is str else "str"
  649. def assertRaised(raised, expectation, name):
  650. self.assertEqual(str(raised.exception),
  651. "expected {0} for {1}, got {2}".format(
  652. expectation,
  653. name, "<unexpected>"))
  654. def check(param, expectation=defaultExpectation):
  655. with self.assertRaises(TypeError) as raised:
  656. URL(**{param: Unexpected()})
  657. assertRaised(raised, expectation, param)
  658. check("scheme")
  659. check("host")
  660. check("fragment")
  661. check("rooted", "bool")
  662. check("userinfo")
  663. check("port", "int or NoneType")
  664. with self.assertRaises(TypeError) as raised:
  665. URL(path=[Unexpected()])
  666. assertRaised(raised, defaultExpectation, "path segment")
  667. with self.assertRaises(TypeError) as raised:
  668. URL(query=[(u"name", Unexpected())])
  669. assertRaised(raised, defaultExpectation + " or NoneType",
  670. "query parameter value")
  671. with self.assertRaises(TypeError) as raised:
  672. URL(query=[(Unexpected(), u"value")])
  673. assertRaised(raised, defaultExpectation, "query parameter name")
  674. # No custom error message for this one, just want to make sure
  675. # non-2-tuples don't get through.
  676. with self.assertRaises(TypeError):
  677. URL(query=[Unexpected()])
  678. with self.assertRaises(ValueError):
  679. URL(query=[('k', 'v', 'vv')])
  680. with self.assertRaises(ValueError):
  681. URL(query=[('k',)])
  682. url = URL.from_text("https://valid.example.com/")
  683. with self.assertRaises(TypeError) as raised:
  684. url.child(Unexpected())
  685. assertRaised(raised, defaultExpectation, "path segment")
  686. with self.assertRaises(TypeError) as raised:
  687. url.sibling(Unexpected())
  688. assertRaised(raised, defaultExpectation, "path segment")
  689. with self.assertRaises(TypeError) as raised:
  690. url.click(Unexpected())
  691. assertRaised(raised, defaultExpectation, "relative URL")
  692. def test_technicallyTextIsIterableBut(self):
  693. """
  694. Technically, L{str} (or L{unicode}, as appropriate) is iterable, but
  695. C{URL(path="foo")} resulting in C{URL.from_text("f/o/o")} is never what
  696. you want.
  697. """
  698. with self.assertRaises(TypeError) as raised:
  699. URL(path='foo')
  700. self.assertEqual(
  701. str(raised.exception),
  702. "expected iterable of text for path, not: {0}"
  703. .format(repr('foo'))
  704. )
  705. def test_netloc(self):
  706. url = URL(scheme='https')
  707. self.assertEqual(url.uses_netloc, True)
  708. url = URL(scheme='git+https')
  709. self.assertEqual(url.uses_netloc, True)
  710. url = URL(scheme='mailto')
  711. self.assertEqual(url.uses_netloc, False)
  712. url = URL(scheme='ztp')
  713. self.assertEqual(url.uses_netloc, None)
  714. url = URL.from_text('ztp://test.com')
  715. self.assertEqual(url.uses_netloc, True)
  716. url = URL.from_text('ztp:test:com')
  717. self.assertEqual(url.uses_netloc, False)
  718. def test_ipv6_with_port(self):
  719. t = 'https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/'
  720. url = URL.from_text(t)
  721. assert url.host == '2001:0db8:85a3:0000:0000:8a2e:0370:7334'
  722. assert url.port == 80
  723. assert url.family == socket.AF_INET6
  724. assert SCHEME_PORT_MAP[url.scheme] != url.port
  725. def test_invalid_ipv6(self):
  726. invalid_ipv6_ips = ['2001::0234:C1ab::A0:aabc:003F',
  727. '2001::1::3F',
  728. ':',
  729. '::::',
  730. '::256.0.0.1']
  731. for ip in invalid_ipv6_ips:
  732. url_text = 'http://[' + ip + ']'
  733. self.assertRaises(socket.error, inet_pton,
  734. socket.AF_INET6, ip)
  735. self.assertRaises(URLParseError, URL.from_text, url_text)
  736. def test_ip_family_detection(self):
  737. u = URL.from_text('http://giggle.com')
  738. self.assertEqual(u.family, None)
  739. u = URL.from_text('http://127.0.0.1/a/b/?c=d')
  740. self.assertEqual(u.family, socket.AF_INET)
  741. u = URL.from_text('http://[::1]/a/b/?c=d')
  742. self.assertEqual(u.family, socket.AF_INET6)
  743. def test_invalid_port(self):
  744. self.assertRaises(URLParseError, URL.from_text, 'ftp://portmouth:smash')
  745. self.assertRaises(ValueError, URL.from_text,
  746. 'http://reader.googlewebsite.com:neverforget')
  747. def test_idna(self):
  748. u1 = URL.from_text('http://bücher.ch')
  749. self.assertEquals(u1.host, 'bücher.ch')
  750. self.assertEquals(u1.to_text(), 'http://bücher.ch')
  751. self.assertEquals(u1.to_uri().to_text(), 'http://xn--bcher-kva.ch')
  752. u2 = URL.from_text('https://xn--bcher-kva.ch')
  753. self.assertEquals(u2.host, 'xn--bcher-kva.ch')
  754. self.assertEquals(u2.to_text(), 'https://xn--bcher-kva.ch')
  755. self.assertEquals(u2.to_iri().to_text(), u'https://bücher.ch')
  756. def test_netloc_slashes(self):
  757. # basic sanity checks
  758. url = URL.from_text('mailto:mahmoud@hatnote.com')
  759. self.assertEquals(url.scheme, 'mailto')
  760. self.assertEquals(url.to_text(), 'mailto:mahmoud@hatnote.com')
  761. url = URL.from_text('http://hatnote.com')
  762. self.assertEquals(url.scheme, 'http')
  763. self.assertEquals(url.to_text(), 'http://hatnote.com')
  764. # test that unrecognized schemes stay consistent with '//'
  765. url = URL.from_text('newscheme:a:b:c')
  766. self.assertEquals(url.scheme, 'newscheme')
  767. self.assertEquals(url.to_text(), 'newscheme:a:b:c')
  768. url = URL.from_text('newerscheme://a/b/c')
  769. self.assertEquals(url.scheme, 'newerscheme')
  770. self.assertEquals(url.to_text(), 'newerscheme://a/b/c')
  771. # test that reasonable guesses are made
  772. url = URL.from_text('git+ftp://gitstub.biz/glyph/lefkowitz')
  773. self.assertEquals(url.scheme, 'git+ftp')
  774. self.assertEquals(url.to_text(),
  775. 'git+ftp://gitstub.biz/glyph/lefkowitz')
  776. url = URL.from_text('what+mailto:freerealestate@enotuniq.org')
  777. self.assertEquals(url.scheme, 'what+mailto')
  778. self.assertEquals(url.to_text(),
  779. 'what+mailto:freerealestate@enotuniq.org')
  780. url = URL(scheme='ztp', path=('x', 'y', 'z'), rooted=True)
  781. self.assertEquals(url.to_text(), 'ztp:/x/y/z')
  782. # also works when the input doesn't include '//'
  783. url = URL(scheme='git+ftp', path=('x', 'y', 'z' ,''),
  784. rooted=True, uses_netloc=True)
  785. # broken bc urlunsplit
  786. self.assertEquals(url.to_text(), 'git+ftp:///x/y/z/')
  787. # really why would this ever come up but ok
  788. url = URL.from_text('file:///path/to/heck')
  789. url2 = url.replace(scheme='mailto')
  790. self.assertEquals(url2.to_text(), 'mailto:/path/to/heck')
  791. return
  792. def test_wrong_constructor(self):
  793. with self.assertRaises(ValueError):
  794. # whole URL not allowed
  795. URL(BASIC_URL)
  796. with self.assertRaises(ValueError):
  797. # explicitly bad scheme not allowed
  798. URL('HTTP_____more_like_imHoTTeP')
  799. def test_encoded_userinfo(self):
  800. url = URL.from_text('http://user:pass@example.com')
  801. assert url.userinfo == 'user:pass'
  802. url = url.replace(userinfo='us%20her:pass')
  803. iri = url.to_iri()
  804. assert iri.to_text(with_password=True) == 'http://us her:pass@example.com'
  805. assert iri.to_text(with_password=False) == 'http://us her:@example.com'
  806. assert iri.to_uri().to_text(with_password=True) == 'http://us%20her:pass@example.com'
  807. def test_hash(self):
  808. url_map = {}
  809. url1 = URL.from_text('http://blog.hatnote.com/ask?utm_source=geocity')
  810. assert hash(url1) == hash(url1) # sanity
  811. url_map[url1] = 1
  812. url2 = URL.from_text('http://blog.hatnote.com/ask')
  813. url2 = url2.set('utm_source', 'geocity')
  814. url_map[url2] = 2
  815. assert len(url_map) == 1
  816. assert list(url_map.values()) == [2]
  817. assert hash(URL()) == hash(URL()) # slightly more sanity
  818. def test_dir(self):
  819. url = URL()
  820. res = dir(url)
  821. assert len(res) > 15
  822. # twisted compat
  823. assert 'fromText' not in res
  824. assert 'asText' not in res
  825. assert 'asURI' not in res
  826. assert 'asIRI' not in res
  827. def test_twisted_compat(self):
  828. url = URL.fromText(u'http://example.com/a%20té%C3%A9st')
  829. assert url.asText() == 'http://example.com/a%20té%C3%A9st'
  830. assert url.asURI().asText() == 'http://example.com/a%20t%C3%A9%C3%A9st'
  831. # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést'
  832. def test_set_ordering(self):
  833. # TODO
  834. url = URL.from_text('http://example.com/?a=b&c')
  835. url = url.set(u'x', u'x')
  836. url = url.add(u'x', u'y')
  837. assert url.to_text() == u'http://example.com/?a=b&x=x&c&x=y'
  838. # Would expect:
  839. # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y'
  840. def test_schemeless_path(self):
  841. "See issue #4"
  842. u1 = URL.from_text("urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
  843. u2 = URL.from_text(u1.to_text())
  844. assert u1 == u2 # sanity testing roundtripping
  845. u3 = URL.from_text(u1.to_iri().to_text())
  846. assert u1 == u3
  847. assert u2 == u3
  848. # test that colons are ok past the first segment
  849. u4 = URL.from_text("first-segment/urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
  850. u5 = u4.to_iri()
  851. assert u5.to_text() == u'first-segment/urn:ietf:wg:oauth:2.0:oob'
  852. u6 = URL.from_text(u5.to_text()).to_uri()
  853. assert u5 == u6 # colons stay decoded bc they're not in the first seg
  854. def test_emoji_domain(self):
  855. "See issue #7, affecting only narrow builds (2.6-3.3)"
  856. url = URL.from_text('https://xn--vi8hiv.ws')
  857. iri = url.to_iri()
  858. iri.to_text()
  859. # as long as we don't get ValueErrors, we're good
  860. def test_delim_in_param(self):
  861. "Per issue #6 and #8"
  862. self.assertRaises(ValueError, URL, scheme=u'http', host=u'a/c')
  863. self.assertRaises(ValueError, URL, path=(u"?",))
  864. self.assertRaises(ValueError, URL, path=(u"#",))
  865. self.assertRaises(ValueError, URL, query=((u"&", "test")))
  866. def test_empty_paths_eq(self):
  867. u1 = URL.from_text('http://example.com/')
  868. u2 = URL.from_text('http://example.com')
  869. assert u1 == u2
  870. u1 = URL.from_text('http://example.com')
  871. u2 = URL.from_text('http://example.com')
  872. assert u1 == u2
  873. u1 = URL.from_text('http://example.com')
  874. u2 = URL.from_text('http://example.com/')
  875. assert u1 == u2
  876. u1 = URL.from_text('http://example.com/')
  877. u2 = URL.from_text('http://example.com/')
  878. assert u1 == u2
  879. # python 2.6 compat
  880. def assertRaises(self, excClass, callableObj=None, *args, **kwargs):
  881. """Fail unless an exception of class excClass is raised
  882. by callableObj when invoked with arguments args and keyword
  883. arguments kwargs. If a different type of exception is
  884. raised, it will not be caught, and the test case will be
  885. deemed to have suffered an error, exactly as for an
  886. unexpected exception.
  887. If called with callableObj omitted or None, will return a
  888. context object used like this::
  889. with self.assertRaises(SomeException):
  890. do_something()
  891. The context manager keeps a reference to the exception as
  892. the 'exception' attribute. This allows you to inspect the
  893. exception after the assertion::
  894. with self.assertRaises(SomeException) as cm:
  895. do_something()
  896. the_exception = cm.exception
  897. self.assertEqual(the_exception.error_code, 3)
  898. """
  899. context = _AssertRaisesContext(excClass, self)
  900. if callableObj is None:
  901. return context
  902. with context:
  903. callableObj(*args, **kwargs)
  904. # PYTHON 2.6 compat
  905. class _AssertRaisesContext(object):
  906. """A context manager used to implement TestCase.assertRaises* methods."""
  907. def __init__(self, expected, test_case, expected_regexp=None):
  908. self.expected = expected
  909. self.failureException = test_case.failureException
  910. self.expected_regexp = expected_regexp
  911. def __enter__(self):
  912. return self
  913. def __exit__(self, exc_type, exc_value, tb):
  914. if exc_type is None:
  915. try:
  916. exc_name = self.expected.__name__
  917. except AttributeError:
  918. exc_name = str(self.expected)
  919. raise self.failureException(
  920. "{0} not raised".format(exc_name))
  921. if not issubclass(exc_type, self.expected):
  922. # let unexpected exceptions pass through
  923. return False
  924. self.exception = exc_value # store for later retrieval
  925. if self.expected_regexp is None:
  926. return True
  927. expected_regexp = self.expected_regexp
  928. if not expected_regexp.search(str(exc_value)):
  929. raise self.failureException('"%s" does not match "%s"' %
  930. (expected_regexp.pattern, str(exc_value)))
  931. return True