nets.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. The function lex_net(args) define the lexical analysis network structure
  16. """
  17. import sys
  18. import os
  19. import math
  20. import paddle.fluid as fluid
  21. from paddle.fluid.initializer import NormalInitializer
  22. def lex_net(word, vocab_size, num_labels, for_infer=True, target=None):
  23. """
  24. define the lexical analysis network structure
  25. word: stores the input of the model
  26. for_infer: a boolean value, indicating if the model to be created is for training or predicting.
  27. return:
  28. for infer: return the prediction
  29. otherwise: return the prediction
  30. """
  31. word_emb_dim=128
  32. grnn_hidden_dim=128
  33. bigru_num=2
  34. emb_lr = 1.0
  35. crf_lr = 1.0
  36. init_bound = 0.1
  37. IS_SPARSE = True
  38. def _bigru_layer(input_feature):
  39. """
  40. define the bidirectional gru layer
  41. """
  42. pre_gru = fluid.layers.fc(
  43. input=input_feature,
  44. size=grnn_hidden_dim * 3,
  45. param_attr=fluid.ParamAttr(
  46. initializer=fluid.initializer.Uniform(
  47. low=-init_bound, high=init_bound),
  48. regularizer=fluid.regularizer.L2DecayRegularizer(
  49. regularization_coeff=1e-4)))
  50. gru = fluid.layers.dynamic_gru(
  51. input=pre_gru,
  52. size=grnn_hidden_dim,
  53. param_attr=fluid.ParamAttr(
  54. initializer=fluid.initializer.Uniform(
  55. low=-init_bound, high=init_bound),
  56. regularizer=fluid.regularizer.L2DecayRegularizer(
  57. regularization_coeff=1e-4)))
  58. pre_gru_r = fluid.layers.fc(
  59. input=input_feature,
  60. size=grnn_hidden_dim * 3,
  61. param_attr=fluid.ParamAttr(
  62. initializer=fluid.initializer.Uniform(
  63. low=-init_bound, high=init_bound),
  64. regularizer=fluid.regularizer.L2DecayRegularizer(
  65. regularization_coeff=1e-4)))
  66. gru_r = fluid.layers.dynamic_gru(
  67. input=pre_gru_r,
  68. size=grnn_hidden_dim,
  69. is_reverse=True,
  70. param_attr=fluid.ParamAttr(
  71. initializer=fluid.initializer.Uniform(
  72. low=-init_bound, high=init_bound),
  73. regularizer=fluid.regularizer.L2DecayRegularizer(
  74. regularization_coeff=1e-4)))
  75. bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1)
  76. return bi_merge
  77. def _net_conf(word, target=None):
  78. """
  79. Configure the network
  80. """
  81. word_embedding = fluid.embedding(
  82. input=word,
  83. size=[vocab_size, word_emb_dim],
  84. dtype='float32',
  85. is_sparse=IS_SPARSE,
  86. param_attr=fluid.ParamAttr(
  87. learning_rate=emb_lr,
  88. name="word_emb",
  89. initializer=fluid.initializer.Uniform(
  90. low=-init_bound, high=init_bound)))
  91. input_feature = word_embedding
  92. for i in range(bigru_num):
  93. bigru_output = _bigru_layer(input_feature)
  94. input_feature = bigru_output
  95. emission = fluid.layers.fc(
  96. size=num_labels,
  97. input=bigru_output,
  98. param_attr=fluid.ParamAttr(
  99. initializer=fluid.initializer.Uniform(
  100. low=-init_bound, high=init_bound),
  101. regularizer=fluid.regularizer.L2DecayRegularizer(
  102. regularization_coeff=1e-4)))
  103. size = emission.shape[1]
  104. fluid.layers.create_parameter(
  105. shape=[size + 2, size], dtype=emission.dtype, name='crfw')
  106. crf_decode = fluid.layers.crf_decoding(
  107. input=emission, param_attr=fluid.ParamAttr(name='crfw'))
  108. return crf_decode
  109. return _net_conf(word)