1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- # -*- coding: utf-8 -*-
- """
- @author:XuMing(xuming624@qq.com)
- @description:
- """
- import argparse
- import sys
- sys.path.append('..')
- import addressparser
- def parse(addresses, cut=False):
- """
- Turns address list into province, city, country and street.
- :param addresses: list of address
- :param cut: bool
- :return: list of province, city, country and street
- """
- result = []
- df = addressparser.transform(addresses, open_warning=False, cut=cut)
- for map_key in zip(df["省"], df["市"], df["区"], df["地名"]):
- place = map_key[3]
- if not isinstance(place, str):
- place = ''
- result.append('\t'.join([map_key[0], map_key[1], map_key[2], place]))
- return result
- def main(**kwargs):
- """
- Cmd script of addressparser. Input address file, output extracted province, city country and street.
- :param kwargs: input, a text file object that will be read from. Should contain address data, one address per line
- :param output: a text file object where parsed output will be written. Parsed output will be similar to CSV data
- :type input: text file object in read mode
- :type output: text file object in write mode
- :return:
- """
- lines = []
- with open(kwargs['input'], 'r', encoding='utf-8') as f:
- for line in f:
- lines.append(line.strip())
- print('{} lines in input'.format(len(lines)))
- cut = kwargs['cut'] if 'cut' in kwargs else False
- parsed = parse(lines, cut=cut)
- count = 0
- with open(kwargs['output'], 'w', encoding='utf-8') as f:
- for i, o in zip(lines, parsed):
- count += 1
- f.write(i + '\t' + o + '\n')
- print('{} lines in output'.format(count))
- def run():
- parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument('input', type=str,
- help='the input file path, file encode need utf-8.')
- parser.add_argument('-o', '--output', type=str, required=True,
- help='the output file path.')
- parser.add_argument('-c', '--cut', action="store_true", help='use cut mode.')
- args = parser.parse_args()
- main(**vars(args))
- if __name__ == '__main__':
- run()
|