# -*- coding: utf-8 -*- # !/usr/bin/env python from six.moves.urllib import parse import simplejson as json class UrlHandler(object): DEFAULT_NEXT_HANDLER = None def __init__(self, client): self._client = client self._next = UrlHandler.DEFAULT_NEXT_HANDLER @property def endHandler(self): if not self.nextHandler: return self return self.nextHandler.endHandler @property def nextHandler(self): return self._next @nextHandler.setter def nextHandler(self, handler): if not isinstance(handler, UrlHandler): raise ValueError("invalid handler for nextHandler(): {}".format(handler)) self._next = handler def parse(self): """ 对 url 的每一部分都做解析 将各部分的零件都放到该放的位置上去 """ if self.nextHandler: self.nextHandler.parse() def add_query(self, added_query): if self.nextHandler: self.nextHandler.add_query(added_query) class SchemeHandler(UrlHandler): pass class NetLocHandler(UrlHandler): pass class PathHandler(UrlHandler): pass class ParamsHandler(UrlHandler): pass class QueryHandler(UrlHandler): def parse(self): pass def add_query(self, added_query): if not self._client.has_fragment: query = parse.parse_qs(qs = self._client.parseResult.query, keep_blank_values = True) new_query = {k: json.dumps(v) if isinstance(v, bool) else v for k, v in added_query.items()} query.update(new_query) self._client.parseResult = self._client.parseResult._replace(query = parse.urlencode(query, True)) else: pass super(QueryHandler, self).add_query(added_query) class FragmentHandler(UrlHandler): def parse(self): pass def add_query(self, added_query): if self._client.has_fragment: old_framgent = self._client.parseResult.fragment new_query = {k: json.dumps(v) if isinstance(v, bool) else v for k, v in added_query.items()} if not old_framgent: fragment_path = '' else: tokens = self._client.parseResult.fragment.split('?') fragment_path = tokens[0] new_query.update(parse.parse_qs(qs = '&'.join(tokens[1:]), keep_blank_values = True)) self._client.parseResult = self._client.parseResult._replace( fragment = '{}?{}'.format(fragment_path, parse.urlencode(new_query, True))) else: pass super(FragmentHandler, self).add_query(added_query) class UrlClient(object): """ 处理url 每一部分单独处理自己职责内的问题 调用顺序依次是 scheme netloc path params query fragment """ def __init__(self, url): self._url = url self._parseResult = parse.urlparse(self._url) # 添加节点 self.urlHeadHandler = SchemeHandler(self) self.urlHeadHandler.endHandler.nextHandler = NetLocHandler(self) self.urlHeadHandler.endHandler.nextHandler = PathHandler(self) self.urlHeadHandler.endHandler.nextHandler = ParamsHandler(self) self.urlHeadHandler.endHandler.nextHandler = QueryHandler(self) self.urlHeadHandler.endHandler.nextHandler = FragmentHandler(self) self.urlHeadHandler.parse() @property def has_fragment(self): if '#' in self._url: return True else: return False @property def parseResult(self): return self._parseResult @parseResult.setter def parseResult(self, value): """ TODO value 类型的检查 """ self._parseResult = value def add_query(self, added_query): """ 对 url 中的query 部分进行更新 """ self.urlHeadHandler.add_query(added_query) return self def getUrl(self): return parse.urlunparse(self.parseResult) def before_frag_add_query(uri, added_query): urlClient = UrlClient(uri) urlClient.add_query(added_query) return urlClient.getUrl() def add_query(uri, added_query): bits = list(parse.urlparse(uri)) qs = parse.parse_qs(qs = bits[4], keep_blank_values = True) new_query = {k: json.dumps(v) if isinstance(v, bool) else v for k, v in added_query.items()} qs.update(new_query) bits[4] = parse.urlencode(qs, True) return parse.urlunparse(bits) if __name__ == '__main__': a = "https://www.washpayer.com/user/index.html?l=123456" b = "https://www.washpayer.com/user/index.html?l=123456&chargeIndex=1" c = "https://www.washpayer.com/user/index.html?l=123456#/pay" d = "https://www.washpayer.com/user/index.html?l=123456&chargeIndex=1#/pay" e = "https://www.washpayer.com/user/index.html#/pay?l=123456" f = "https://www.washpayer.com/user/index.html#/pay?l=123456&chargeIndex=1" g = "https://www.washpayer.com/user/index.html?isTest=1#/pay?l=123456" h = "https://www.washpayer.com/user/index.html?isTest=1#/pay?l=123456&chargeIndex=1" redirectQuery = {"redirect": "/user/index.html#/pay", "v": "1.0.02"} print before_frag_add_query(a, redirectQuery) print before_frag_add_query(b, redirectQuery) print before_frag_add_query(c, redirectQuery) print before_frag_add_query(d, redirectQuery) print before_frag_add_query(e, redirectQuery) print before_frag_add_query(f, redirectQuery) print before_frag_add_query(g, redirectQuery) print before_frag_add_query(h, redirectQuery) t1 = 'https://www.washpayer.com/user/index.html#/pay?l=123456' _add_query = {'chargeIndex': 1} print add_query(before_frag_add_query(t1, added_query = _add_query), added_query = {'v': '1.0.0'}) t2 = 'https://www.washpayer.com/user/index.html#/pay' _add_query = {'l': '123456', 'chargeIndex': 1} print add_query(before_frag_add_query(t2, added_query = _add_query), added_query = {'v': '1.0.0'}) t3 = 'https://www.washpayer.com/user/index.html#/pay' _add_query = { 'l': '123456', 'chargeIndex': 1, 'redirect': "/user/index.html#/pay" } print add_query(before_frag_add_query(t3, added_query = _add_query), added_query = {'v': '1.0.0'})