urlpath.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. # -*- test-case-name: twisted.python.test.test_urlpath -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. L{URLPath}, a representation of a URL.
  6. """
  7. from __future__ import division, absolute_import
  8. from twisted.python.compat import (
  9. nativeString, unicode, urllib_parse as urlparse, urlunquote, urlquote
  10. )
  11. from hyperlink import URL as _URL
  12. _allascii = b"".join([chr(x).encode('ascii') for x in range(1, 128)])
  13. def _rereconstituter(name):
  14. """
  15. Attriute declaration to preserve mutability on L{URLPath}.
  16. @param name: a public attribute name
  17. @type name: native L{str}
  18. @return: a descriptor which retrieves the private version of the attribute
  19. on get and calls rerealize on set.
  20. """
  21. privateName = nativeString("_") + name
  22. return property(
  23. lambda self: getattr(self, privateName),
  24. lambda self, value: (setattr(self, privateName,
  25. value if isinstance(value, bytes)
  26. else value.encode("charmap")) or
  27. self._reconstitute())
  28. )
  29. class URLPath(object):
  30. """
  31. A representation of a URL.
  32. @ivar scheme: The scheme of the URL (e.g. 'http').
  33. @type scheme: L{bytes}
  34. @ivar netloc: The network location ("host").
  35. @type netloc: L{bytes}
  36. @ivar path: The path on the network location.
  37. @type path: L{bytes}
  38. @ivar query: The query argument (the portion after ? in the URL).
  39. @type query: L{bytes}
  40. @ivar fragment: The page fragment (the portion after # in the URL).
  41. @type fragment: L{bytes}
  42. """
  43. def __init__(self, scheme=b'', netloc=b'localhost', path=b'',
  44. query=b'', fragment=b''):
  45. self._scheme = scheme or b'http'
  46. self._netloc = netloc
  47. self._path = path or b'/'
  48. self._query = query
  49. self._fragment = fragment
  50. self._reconstitute()
  51. def _reconstitute(self):
  52. """
  53. Reconstitute this L{URLPath} from all its given attributes.
  54. """
  55. urltext = urlquote(
  56. urlparse.urlunsplit((self._scheme, self._netloc,
  57. self._path, self._query, self._fragment)),
  58. safe=_allascii
  59. )
  60. self._url = _URL.fromText(urltext.encode("ascii").decode("ascii"))
  61. scheme = _rereconstituter("scheme")
  62. netloc = _rereconstituter("netloc")
  63. path = _rereconstituter("path")
  64. query = _rereconstituter("query")
  65. fragment = _rereconstituter("fragment")
  66. @classmethod
  67. def _fromURL(cls, urlInstance):
  68. """
  69. Reconstruct all the public instance variables of this L{URLPath} from
  70. its underlying L{_URL}.
  71. @param urlInstance: the object to base this L{URLPath} on.
  72. @type urlInstance: L{_URL}
  73. @return: a new L{URLPath}
  74. """
  75. self = cls.__new__(cls)
  76. self._url = urlInstance.replace(path=urlInstance.path or [u""])
  77. self._scheme = self._url.scheme.encode("ascii")
  78. self._netloc = self._url.authority().encode("ascii")
  79. self._path = (_URL(path=self._url.path,
  80. rooted=True).asURI().asText()
  81. .encode("ascii"))
  82. self._query = (_URL(query=self._url.query).asURI().asText()
  83. .encode("ascii"))[1:]
  84. self._fragment = self._url.fragment.encode("ascii")
  85. return self
  86. def pathList(self, unquote=False, copy=True):
  87. """
  88. Split this URL's path into its components.
  89. @param unquote: whether to remove %-encoding from the returned strings.
  90. @param copy: (ignored, do not use)
  91. @return: The components of C{self.path}
  92. @rtype: L{list} of L{bytes}
  93. """
  94. segments = self._url.path
  95. mapper = lambda x: x.encode("ascii")
  96. if unquote:
  97. mapper = (lambda x, m=mapper: m(urlunquote(x)))
  98. return [b''] + [mapper(segment) for segment in segments]
  99. @classmethod
  100. def fromString(klass, url):
  101. """
  102. Make a L{URLPath} from a L{str} or L{unicode}.
  103. @param url: A L{str} representation of a URL.
  104. @type url: L{str} or L{unicode}.
  105. @return: a new L{URLPath} derived from the given string.
  106. @rtype: L{URLPath}
  107. """
  108. if not isinstance(url, (str, unicode)):
  109. raise ValueError("'url' must be a str or unicode")
  110. if isinstance(url, bytes):
  111. # On Python 2, accepting 'str' (for compatibility) means we might
  112. # get 'bytes'. On py3, this will not work with bytes due to the
  113. # check above.
  114. return klass.fromBytes(url)
  115. return klass._fromURL(_URL.fromText(url))
  116. @classmethod
  117. def fromBytes(klass, url):
  118. """
  119. Make a L{URLPath} from a L{bytes}.
  120. @param url: A L{bytes} representation of a URL.
  121. @type url: L{bytes}
  122. @return: a new L{URLPath} derived from the given L{bytes}.
  123. @rtype: L{URLPath}
  124. @since: 15.4
  125. """
  126. if not isinstance(url, bytes):
  127. raise ValueError("'url' must be bytes")
  128. quoted = urlquote(url, safe=_allascii)
  129. if isinstance(quoted, bytes):
  130. # This will only be bytes on python 2, where we can transform it
  131. # into unicode. On python 3, urlquote always returns str.
  132. quoted = quoted.decode("ascii")
  133. return klass.fromString(quoted)
  134. @classmethod
  135. def fromRequest(klass, request):
  136. """
  137. Make a L{URLPath} from a L{twisted.web.http.Request}.
  138. @param request: A L{twisted.web.http.Request} to make the L{URLPath}
  139. from.
  140. @return: a new L{URLPath} derived from the given request.
  141. @rtype: L{URLPath}
  142. """
  143. return klass.fromBytes(request.prePathURL())
  144. def _mod(self, newURL, keepQuery):
  145. """
  146. Return a modified copy of C{self} using C{newURL}, keeping the query
  147. string if C{keepQuery} is C{True}.
  148. @param newURL: a L{URL} to derive a new L{URLPath} from
  149. @type newURL: L{URL}
  150. @param keepQuery: if C{True}, preserve the query parameters from
  151. C{self} on the new L{URLPath}; if C{False}, give the new L{URLPath}
  152. no query parameters.
  153. @type keepQuery: L{bool}
  154. @return: a new L{URLPath}
  155. """
  156. return self._fromURL(newURL.replace(
  157. fragment=u'', query=self._url.query if keepQuery else ()
  158. ))
  159. def sibling(self, path, keepQuery=False):
  160. """
  161. Get the sibling of the current L{URLPath}. A sibling is a file which
  162. is in the same directory as the current file.
  163. @param path: The path of the sibling.
  164. @type path: L{bytes}
  165. @param keepQuery: Whether to keep the query parameters on the returned
  166. L{URLPath}.
  167. @type: keepQuery: L{bool}
  168. @return: a new L{URLPath}
  169. """
  170. return self._mod(self._url.sibling(path.decode("ascii")), keepQuery)
  171. def child(self, path, keepQuery=False):
  172. """
  173. Get the child of this L{URLPath}.
  174. @param path: The path of the child.
  175. @type path: L{bytes}
  176. @param keepQuery: Whether to keep the query parameters on the returned
  177. L{URLPath}.
  178. @type: keepQuery: L{bool}
  179. @return: a new L{URLPath}
  180. """
  181. return self._mod(self._url.child(path.decode("ascii")), keepQuery)
  182. def parent(self, keepQuery=False):
  183. """
  184. Get the parent directory of this L{URLPath}.
  185. @param keepQuery: Whether to keep the query parameters on the returned
  186. L{URLPath}.
  187. @type: keepQuery: L{bool}
  188. @return: a new L{URLPath}
  189. """
  190. return self._mod(self._url.click(u".."), keepQuery)
  191. def here(self, keepQuery=False):
  192. """
  193. Get the current directory of this L{URLPath}.
  194. @param keepQuery: Whether to keep the query parameters on the returned
  195. L{URLPath}.
  196. @type: keepQuery: L{bool}
  197. @return: a new L{URLPath}
  198. """
  199. return self._mod(self._url.click(u"."), keepQuery)
  200. def click(self, st):
  201. """
  202. Return a path which is the URL where a browser would presumably take
  203. you if you clicked on a link with an HREF as given.
  204. @param st: A relative URL, to be interpreted relative to C{self} as the
  205. base URL.
  206. @type st: L{bytes}
  207. @return: a new L{URLPath}
  208. """
  209. return self._fromURL(self._url.click(st.decode("ascii")))
  210. def __str__(self):
  211. """
  212. The L{str} of a L{URLPath} is its URL text.
  213. """
  214. return nativeString(self._url.asURI().asText())
  215. def __repr__(self):
  216. """
  217. The L{repr} of a L{URLPath} is an eval-able expression which will
  218. construct a similar L{URLPath}.
  219. """
  220. return ('URLPath(scheme=%r, netloc=%r, path=%r, query=%r, fragment=%r)'
  221. % (self.scheme, self.netloc, self.path, self.query,
  222. self.fragment))