__main__.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author:XuMing(xuming624@qq.com)
  4. @description:
  5. """
  6. import argparse
  7. import sys
  8. sys.path.append('..')
  9. import addressparser
  10. def parse(addresses, cut=False):
  11. """
  12. Turns address list into province, city, country and street.
  13. :param addresses: list of address
  14. :param cut: bool
  15. :return: list of province, city, country and street
  16. """
  17. result = []
  18. df = addressparser.transform(addresses, open_warning=False, cut=cut)
  19. for map_key in zip(df["省"], df["市"], df["区"], df["地名"]):
  20. place = map_key[3]
  21. if not isinstance(place, str):
  22. place = ''
  23. result.append('\t'.join([map_key[0], map_key[1], map_key[2], place]))
  24. return result
  25. def main(**kwargs):
  26. """
  27. Cmd script of addressparser. Input address file, output extracted province, city country and street.
  28. :param kwargs: input, a text file object that will be read from. Should contain address data, one address per line
  29. :param output: a text file object where parsed output will be written. Parsed output will be similar to CSV data
  30. :type input: text file object in read mode
  31. :type output: text file object in write mode
  32. :return:
  33. """
  34. lines = []
  35. with open(kwargs['input'], 'r', encoding='utf-8') as f:
  36. for line in f:
  37. lines.append(line.strip())
  38. print('{} lines in input'.format(len(lines)))
  39. cut = kwargs['cut'] if 'cut' in kwargs else False
  40. parsed = parse(lines, cut=cut)
  41. count = 0
  42. with open(kwargs['output'], 'w', encoding='utf-8') as f:
  43. for i, o in zip(lines, parsed):
  44. count += 1
  45. f.write(i + '\t' + o + '\n')
  46. print('{} lines in output'.format(count))
  47. def run():
  48. parser = argparse.ArgumentParser(description=__doc__)
  49. parser.add_argument('input', type=str,
  50. help='the input file path, file encode need utf-8.')
  51. parser.add_argument('-o', '--output', type=str, required=True,
  52. help='the output file path.')
  53. parser.add_argument('-c', '--cut', action="store_true", help='use cut mode.')
  54. args = parser.parse_args()
  55. main(**vars(args))
  56. if __name__ == '__main__':
  57. run()