utils_url.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. from six.moves.urllib import parse
  4. import simplejson as json
  5. class UrlHandler(object):
  6. DEFAULT_NEXT_HANDLER = None
  7. def __init__(self, client):
  8. self._client = client
  9. self._next = UrlHandler.DEFAULT_NEXT_HANDLER
  10. @property
  11. def endHandler(self):
  12. if not self.nextHandler:
  13. return self
  14. return self.nextHandler.endHandler
  15. @property
  16. def nextHandler(self):
  17. return self._next
  18. @nextHandler.setter
  19. def nextHandler(self, handler):
  20. if not isinstance(handler, UrlHandler):
  21. raise ValueError("invalid handler for nextHandler(): {}".format(handler))
  22. self._next = handler
  23. def parse(self):
  24. """
  25. 对 url 的每一部分都做解析 将各部分的零件都放到该放的位置上去
  26. """
  27. if self.nextHandler:
  28. self.nextHandler.parse()
  29. def add_query(self, added_query):
  30. if self.nextHandler:
  31. self.nextHandler.add_query(added_query)
  32. class SchemeHandler(UrlHandler):
  33. pass
  34. class NetLocHandler(UrlHandler):
  35. pass
  36. class PathHandler(UrlHandler):
  37. pass
  38. class ParamsHandler(UrlHandler):
  39. pass
  40. class QueryHandler(UrlHandler):
  41. def parse(self):
  42. pass
  43. def add_query(self, added_query):
  44. if not self._client.has_fragment:
  45. query = parse.parse_qs(qs = self._client.parseResult.query, keep_blank_values = True)
  46. new_query = {k: json.dumps(v) if isinstance(v, bool) else v for k, v in added_query.items()}
  47. query.update(new_query)
  48. self._client.parseResult = self._client.parseResult._replace(query = parse.urlencode(query, True))
  49. else:
  50. pass
  51. super(QueryHandler, self).add_query(added_query)
  52. class FragmentHandler(UrlHandler):
  53. def parse(self):
  54. pass
  55. def add_query(self, added_query):
  56. if self._client.has_fragment:
  57. old_framgent = self._client.parseResult.fragment
  58. new_query = {k: json.dumps(v) if isinstance(v, bool) else v for k, v in added_query.items()}
  59. if not old_framgent:
  60. fragment_path = ''
  61. else:
  62. tokens = self._client.parseResult.fragment.split('?')
  63. fragment_path = tokens[0]
  64. new_query.update(parse.parse_qs(qs = '&'.join(tokens[1:]), keep_blank_values = True))
  65. self._client.parseResult = self._client.parseResult._replace(
  66. fragment = '{}?{}'.format(fragment_path, parse.urlencode(new_query, True)))
  67. else:
  68. pass
  69. super(FragmentHandler, self).add_query(added_query)
  70. class UrlClient(object):
  71. """
  72. 处理url 每一部分单独处理自己职责内的问题 调用顺序依次是
  73. scheme netloc path params query fragment
  74. """
  75. def __init__(self, url):
  76. self._url = url
  77. self._parseResult = parse.urlparse(self._url)
  78. # 添加节点
  79. self.urlHeadHandler = SchemeHandler(self)
  80. self.urlHeadHandler.endHandler.nextHandler = NetLocHandler(self)
  81. self.urlHeadHandler.endHandler.nextHandler = PathHandler(self)
  82. self.urlHeadHandler.endHandler.nextHandler = ParamsHandler(self)
  83. self.urlHeadHandler.endHandler.nextHandler = QueryHandler(self)
  84. self.urlHeadHandler.endHandler.nextHandler = FragmentHandler(self)
  85. self.urlHeadHandler.parse()
  86. @property
  87. def has_fragment(self):
  88. if '#' in self._url:
  89. return True
  90. else:
  91. return False
  92. @property
  93. def parseResult(self):
  94. return self._parseResult
  95. @parseResult.setter
  96. def parseResult(self, value):
  97. """
  98. TODO value 类型的检查
  99. """
  100. self._parseResult = value
  101. def add_query(self, added_query):
  102. """
  103. 对 url 中的query 部分进行更新
  104. """
  105. self.urlHeadHandler.add_query(added_query)
  106. return self
  107. def getUrl(self):
  108. return parse.urlunparse(self.parseResult)
  109. def before_frag_add_query(uri, added_query):
  110. urlClient = UrlClient(uri)
  111. urlClient.add_query(added_query)
  112. return urlClient.getUrl()
  113. def add_query(uri, added_query):
  114. bits = list(parse.urlparse(uri))
  115. qs = parse.parse_qs(qs = bits[4], keep_blank_values = True)
  116. new_query = {k: json.dumps(v) if isinstance(v, bool) else v for k, v in added_query.items()}
  117. qs.update(new_query)
  118. bits[4] = parse.urlencode(qs, True)
  119. return parse.urlunparse(bits)
  120. if __name__ == '__main__':
  121. a = "https://www.washpayer.com/user/index.html?l=123456"
  122. b = "https://www.washpayer.com/user/index.html?l=123456&chargeIndex=1"
  123. c = "https://www.washpayer.com/user/index.html?l=123456#/pay"
  124. d = "https://www.washpayer.com/user/index.html?l=123456&chargeIndex=1#/pay"
  125. e = "https://www.washpayer.com/user/index.html#/pay?l=123456"
  126. f = "https://www.washpayer.com/user/index.html#/pay?l=123456&chargeIndex=1"
  127. g = "https://www.washpayer.com/user/index.html?isTest=1#/pay?l=123456"
  128. h = "https://www.washpayer.com/user/index.html?isTest=1#/pay?l=123456&chargeIndex=1"
  129. redirectQuery = {"redirect": "/user/index.html#/pay", "v": "1.0.02"}
  130. print before_frag_add_query(a, redirectQuery)
  131. print before_frag_add_query(b, redirectQuery)
  132. print before_frag_add_query(c, redirectQuery)
  133. print before_frag_add_query(d, redirectQuery)
  134. print before_frag_add_query(e, redirectQuery)
  135. print before_frag_add_query(f, redirectQuery)
  136. print before_frag_add_query(g, redirectQuery)
  137. print before_frag_add_query(h, redirectQuery)
  138. t1 = 'https://www.washpayer.com/user/index.html#/pay?l=123456'
  139. _add_query = {'chargeIndex': 1}
  140. print add_query(before_frag_add_query(t1, added_query = _add_query), added_query = {'v': '1.0.0'})
  141. t2 = 'https://www.washpayer.com/user/index.html#/pay'
  142. _add_query = {'l': '123456', 'chargeIndex': 1}
  143. print add_query(before_frag_add_query(t2, added_query = _add_query), added_query = {'v': '1.0.0'})
  144. t3 = 'https://www.washpayer.com/user/index.html#/pay'
  145. _add_query = {
  146. 'l': '123456',
  147. 'chargeIndex': 1,
  148. 'redirect': "/user/index.html#/pay"
  149. }
  150. print add_query(before_frag_add_query(t3, added_query = _add_query), added_query = {'v': '1.0.0'})