__main__.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """Jieba command line interface."""
  2. import sys
  3. import jieba
  4. from argparse import ArgumentParser
  5. from ._compat import *
  6. parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", epilog="If no filename specified, use STDIN instead.")
  7. parser.add_argument("-d", "--delimiter", metavar="DELIM", default=' / ',
  8. nargs='?', const=' ',
  9. help="use DELIM instead of ' / ' for word delimiter; or a space if it is used without DELIM")
  10. parser.add_argument("-p", "--pos", metavar="DELIM", nargs='?', const='_',
  11. help="enable POS tagging; if DELIM is specified, use DELIM instead of '_' for POS delimiter")
  12. parser.add_argument("-D", "--dict", help="use DICT as dictionary")
  13. parser.add_argument("-u", "--user-dict",
  14. help="use USER_DICT together with the default dictionary or DICT (if specified)")
  15. parser.add_argument("-a", "--cut-all",
  16. action="store_true", dest="cutall", default=False,
  17. help="full pattern cutting (ignored with POS tagging)")
  18. parser.add_argument("-n", "--no-hmm", dest="hmm", action="store_false",
  19. default=True, help="don't use the Hidden Markov Model")
  20. parser.add_argument("-q", "--quiet", action="store_true", default=False,
  21. help="don't print loading messages to stderr")
  22. parser.add_argument("-V", '--version', action='version',
  23. version="Jieba " + jieba.__version__)
  24. parser.add_argument("filename", nargs='?', help="input file")
  25. args = parser.parse_args()
  26. if args.quiet:
  27. jieba.setLogLevel(60)
  28. if args.pos:
  29. import jieba.posseg
  30. posdelim = args.pos
  31. def cutfunc(sentence, _, HMM=True):
  32. for w, f in jieba.posseg.cut(sentence, HMM):
  33. yield w + posdelim + f
  34. else:
  35. cutfunc = jieba.cut
  36. delim = text_type(args.delimiter)
  37. cutall = args.cutall
  38. hmm = args.hmm
  39. fp = open(args.filename, 'r') if args.filename else sys.stdin
  40. if args.dict:
  41. jieba.initialize(args.dict)
  42. else:
  43. jieba.initialize()
  44. if args.user_dict:
  45. jieba.load_userdict(args.user_dict)
  46. ln = fp.readline()
  47. while ln:
  48. l = ln.rstrip('\r\n')
  49. result = delim.join(cutfunc(ln.rstrip('\r\n'), cutall, hmm))
  50. if PY2:
  51. result = result.encode(default_encoding)
  52. print(result)
  53. ln = fp.readline()
  54. fp.close()