dayuan
/
manyi


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
							
# -*- coding: utf-8 -*-

"""
自然语言处理
"""

import re
import sys
import math
import time
from .base import AipBase
from .base import base64
from .base import json
from .base import urlencode
from .base import quote

class AipNlp(AipBase):

    """
    自然语言处理
    """

    __lexerUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer'

    __lexerCustomUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom'

    __depParserUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser'

    __wordEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_vec'

    __dnnlmCnUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/dnnlm_cn'

    __wordSimEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_sim'

    __simnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/simnet'

    __commentTagUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag'

    __sentimentClassifyUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify'

    __keywordUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/keyword'

    __topicUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/topic'

    __ecnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/ecnet'

    __emotionUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion'

    __newsSummaryUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary'

    def _proccessResult(self, content):
        """
            formate result
        """
        
        if sys.version_info.major == 2:
            return json.loads(content.decode('gbk', 'ignore').encode('utf8')) or {}
        else:
            return json.loads(str(content, 'gbk')) or {}

    def _proccessRequest(self, url, params, data, headers):
        """
            _proccessRequest
        """

        if sys.version_info.major == 2:
            return json.dumps(data, ensure_ascii=False).decode('utf8').encode('gbk')
        else:
            return json.dumps(data, ensure_ascii=False).encode('gbk')
    
    def lexer(self, text, options=None):
        """
            词法分析
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__lexerUrl, data)
    
    def lexerCustom(self, text, options=None):
        """
            词法分析（定制版）
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__lexerCustomUrl, data)
    
    def depParser(self, text, options=None):
        """
            依存句法分析
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__depParserUrl, data)
    
    def wordEmbedding(self, word, options=None):
        """
            词向量表示
        """
        options = options or {}

        data = {}
        data['word'] = word

        data.update(options)

        return self._request(self.__wordEmbeddingUrl, data)
    
    def dnnlm(self, text, options=None):
        """
            DNN语言模型
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__dnnlmCnUrl, data)
    
    def wordSimEmbedding(self, word_1, word_2, options=None):
        """
            词义相似度
        """
        options = options or {}

        data = {}
        data['word_1'] = word_1
        data['word_2'] = word_2

        data.update(options)

        return self._request(self.__wordSimEmbeddingUrl, data)
    
    def simnet(self, text_1, text_2, options=None):
        """
            短文本相似度
        """
        options = options or {}

        data = {}
        data['text_1'] = text_1
        data['text_2'] = text_2

        data.update(options)

        return self._request(self.__simnetUrl, data)
    
    def commentTag(self, text, options=None):
        """
            评论观点抽取
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__commentTagUrl, data)
    
    def sentimentClassify(self, text, options=None):
        """
            情感倾向分析
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__sentimentClassifyUrl, data)
    
    def keyword(self, title, content, options=None):
        """
            文章标签
        """
        options = options or {}

        data = {}
        data['title'] = title
        data['content'] = content

        data.update(options)

        return self._request(self.__keywordUrl, data)
    
    def topic(self, title, content, options=None):
        """
            文章分类
        """
        options = options or {}

        data = {}
        data['title'] = title
        data['content'] = content

        data.update(options)

        return self._request(self.__topicUrl, data)
    
    def ecnet(self, text, options=None):
        """
            文本纠错
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__ecnetUrl, data)
    
    def emotion(self, text, options=None):
        """
            对话情绪识别接口
        """
        options = options or {}

        data = {}
        data['text'] = text

        data.update(options)

        return self._request(self.__emotionUrl, data)
    
    def newsSummary(self, content, max_summary_len, options=None):
        """
            新闻摘要接口
        """
        options = options or {}

        data = {}
        data['content'] = content
        data['max_summary_len'] = max_summary_len

        data.update(options)

        return self._request(self.__newsSummaryUrl, data)