nlp.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # -*- coding: utf-8 -*-
  2. """
  3. 自然语言处理
  4. """
  5. import re
  6. import sys
  7. import math
  8. import time
  9. from .base import AipBase
  10. from .base import base64
  11. from .base import json
  12. from .base import urlencode
  13. from .base import quote
  14. class AipNlp(AipBase):
  15. """
  16. 自然语言处理
  17. """
  18. __lexerUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer'
  19. __lexerCustomUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom'
  20. __depParserUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser'
  21. __wordEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_vec'
  22. __dnnlmCnUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/dnnlm_cn'
  23. __wordSimEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_sim'
  24. __simnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/simnet'
  25. __commentTagUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag'
  26. __sentimentClassifyUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify'
  27. __keywordUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/keyword'
  28. __topicUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/topic'
  29. __ecnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/ecnet'
  30. __emotionUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion'
  31. __newsSummaryUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary'
  32. def _proccessResult(self, content):
  33. """
  34. formate result
  35. """
  36. if sys.version_info.major == 2:
  37. return json.loads(content.decode('gbk', 'ignore').encode('utf8')) or {}
  38. else:
  39. return json.loads(str(content, 'gbk')) or {}
  40. def _proccessRequest(self, url, params, data, headers):
  41. """
  42. _proccessRequest
  43. """
  44. if sys.version_info.major == 2:
  45. return json.dumps(data, ensure_ascii=False).decode('utf8').encode('gbk')
  46. else:
  47. return json.dumps(data, ensure_ascii=False).encode('gbk')
  48. def lexer(self, text, options=None):
  49. """
  50. 词法分析
  51. """
  52. options = options or {}
  53. data = {}
  54. data['text'] = text
  55. data.update(options)
  56. return self._request(self.__lexerUrl, data)
  57. def lexerCustom(self, text, options=None):
  58. """
  59. 词法分析(定制版)
  60. """
  61. options = options or {}
  62. data = {}
  63. data['text'] = text
  64. data.update(options)
  65. return self._request(self.__lexerCustomUrl, data)
  66. def depParser(self, text, options=None):
  67. """
  68. 依存句法分析
  69. """
  70. options = options or {}
  71. data = {}
  72. data['text'] = text
  73. data.update(options)
  74. return self._request(self.__depParserUrl, data)
  75. def wordEmbedding(self, word, options=None):
  76. """
  77. 词向量表示
  78. """
  79. options = options or {}
  80. data = {}
  81. data['word'] = word
  82. data.update(options)
  83. return self._request(self.__wordEmbeddingUrl, data)
  84. def dnnlm(self, text, options=None):
  85. """
  86. DNN语言模型
  87. """
  88. options = options or {}
  89. data = {}
  90. data['text'] = text
  91. data.update(options)
  92. return self._request(self.__dnnlmCnUrl, data)
  93. def wordSimEmbedding(self, word_1, word_2, options=None):
  94. """
  95. 词义相似度
  96. """
  97. options = options or {}
  98. data = {}
  99. data['word_1'] = word_1
  100. data['word_2'] = word_2
  101. data.update(options)
  102. return self._request(self.__wordSimEmbeddingUrl, data)
  103. def simnet(self, text_1, text_2, options=None):
  104. """
  105. 短文本相似度
  106. """
  107. options = options or {}
  108. data = {}
  109. data['text_1'] = text_1
  110. data['text_2'] = text_2
  111. data.update(options)
  112. return self._request(self.__simnetUrl, data)
  113. def commentTag(self, text, options=None):
  114. """
  115. 评论观点抽取
  116. """
  117. options = options or {}
  118. data = {}
  119. data['text'] = text
  120. data.update(options)
  121. return self._request(self.__commentTagUrl, data)
  122. def sentimentClassify(self, text, options=None):
  123. """
  124. 情感倾向分析
  125. """
  126. options = options or {}
  127. data = {}
  128. data['text'] = text
  129. data.update(options)
  130. return self._request(self.__sentimentClassifyUrl, data)
  131. def keyword(self, title, content, options=None):
  132. """
  133. 文章标签
  134. """
  135. options = options or {}
  136. data = {}
  137. data['title'] = title
  138. data['content'] = content
  139. data.update(options)
  140. return self._request(self.__keywordUrl, data)
  141. def topic(self, title, content, options=None):
  142. """
  143. 文章分类
  144. """
  145. options = options or {}
  146. data = {}
  147. data['title'] = title
  148. data['content'] = content
  149. data.update(options)
  150. return self._request(self.__topicUrl, data)
  151. def ecnet(self, text, options=None):
  152. """
  153. 文本纠错
  154. """
  155. options = options or {}
  156. data = {}
  157. data['text'] = text
  158. data.update(options)
  159. return self._request(self.__ecnetUrl, data)
  160. def emotion(self, text, options=None):
  161. """
  162. 对话情绪识别接口
  163. """
  164. options = options or {}
  165. data = {}
  166. data['text'] = text
  167. data.update(options)
  168. return self._request(self.__emotionUrl, data)
  169. def newsSummary(self, content, max_summary_len, options=None):
  170. """
  171. 新闻摘要接口
  172. """
  173. options = options or {}
  174. data = {}
  175. data['content'] = content
  176. data['max_summary_len'] = max_summary_len
  177. data.update(options)
  178. return self._request(self.__newsSummaryUrl, data)