# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model
[docs]class NERCRF(object):
"""
Bi-LSTM NER model with CRF classification layer (tf.keras model)
Args:
use_cudnn (bool, optional): use cudnn LSTM cells
"""
def __init__(self, use_cudnn=False):
self.model = None
self.word_length = None
self.target_label_dims = None
self.word_vocab_size = None
self.char_vocab_size = None
self.word_embedding_dims = None
self.char_embedding_dims = None
self.tagger_lstm_dims = None
self.dropout = None
self.use_cudnn = use_cudnn
[docs] def build(
self,
word_length,
target_label_dims,
word_vocab_size,
char_vocab_size,
word_embedding_dims=100,
char_embedding_dims=16,
tagger_lstm_dims=200,
dropout=0.5,
):
"""
Build a NERCRF model
Args:
word_length (int): max word length in characters
target_label_dims (int): number of entity labels (for classification)
word_vocab_size (int): word vocabulary size
char_vocab_size (int): character vocabulary size
word_embedding_dims (int): word embedding dimensions
char_embedding_dims (int): character embedding dimensions
tagger_lstm_dims (int): word tagger LSTM output dimensions
dropout (float): dropout rate
"""
self.word_length = word_length
self.target_label_dims = target_label_dims
self.word_vocab_size = word_vocab_size
self.char_vocab_size = char_vocab_size
self.word_embedding_dims = word_embedding_dims
self.char_embedding_dims = char_embedding_dims
self.tagger_lstm_dims = tagger_lstm_dims
self.dropout = dropout
# build word input
words_input = tf.keras.layers.Input(shape=(None,), name="words_input")
embedding_layer = tf.keras.layers.Embedding(
self.word_vocab_size, self.word_embedding_dims, name="word_embedding"
)
word_embeddings = embedding_layer(words_input)
# create word character embeddings
word_chars_input = tf.keras.layers.Input(
shape=(None, self.word_length), name="word_chars_input"
)
char_embedding_layer = tf.keras.layers.Embedding(
self.char_vocab_size, self.char_embedding_dims, name="char_embedding"
)(word_chars_input)
char_embeddings = tf.keras.layers.TimeDistributed(
tf.keras.layers.Conv1D(128, 3, padding="same", activation="relu")
)(char_embedding_layer)
char_embeddings = tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalMaxPooling1D())(
char_embeddings
)
# create the final feature vectors
features = tf.keras.layers.concatenate([word_embeddings, char_embeddings], axis=-1)
# encode using a bi-LSTM
features = tf.keras.layers.Dropout(self.dropout)(features)
bilstm = tf.keras.layers.Bidirectional(
self._rnn_cell(self.tagger_lstm_dims, return_sequences=True)
)(features)
bilstm = tf.keras.layers.Bidirectional(
self._rnn_cell(self.tagger_lstm_dims, return_sequences=True)
)(bilstm)
bilstm = tf.keras.layers.Dropout(self.dropout)(bilstm)
bilstm = tf.keras.layers.Dense(self.target_label_dims)(bilstm)
inputs = [words_input, word_chars_input]
sequence_lengths = tf.keras.layers.Input(shape=(1,), dtype="int32", name="seq_lens")
inputs.append(sequence_lengths)
crf = CRF(self.target_label_dims, name="ner_crf")
predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths)
# compile the model
model = tf.keras.Model(inputs=inputs, outputs=predictions)
model.compile(
loss={"ner_crf": crf.loss}, optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.0)
)
self.model = model
def _rnn_cell(self, units, **kwargs):
if self.use_cudnn:
rnn_cell = tf.keras.layers.CuDNNLSTM(units, **kwargs)
else:
rnn_cell = tf.keras.layers.LSTM(units, **kwargs)
return rnn_cell
[docs] def load_embedding_weights(self, weights):
"""
Load word embedding weights into the model embedding layer
Args:
weights (numpy.ndarray): 2D matrix of word weights
"""
assert self.model is not None, (
"Cannot assign weights, apply build() before trying to " "loading embedding weights "
)
emb_layer = self.model.get_layer(name="word_embedding")
assert emb_layer.output_dim == weights.shape[1], "embedding vectors shape mismatch"
emb_layer.set_weights([weights])
[docs] def fit(self, x, y, epochs=1, batch_size=1, callbacks=None, validation=None):
"""
Train a model given input samples and target labels.
Args:
x (numpy.ndarray or :obj:`numpy.ndarray`): input samples
y (numpy.ndarray): input sample labels
epochs (:obj:`int`, optional): number of epochs to train
batch_size (:obj:`int`, optional): batch size
callbacks(:obj:`Callback`, optional): Keras compatible callbacks
validation(:obj:`list` of :obj:`numpy.ndarray`, optional): optional validation data
to be evaluated when training
"""
assert self.model, "Model was not initialized"
self.model.fit(
x,
y,
epochs=epochs,
batch_size=batch_size,
shuffle=True,
validation_data=validation,
callbacks=callbacks,
)
[docs] def predict(self, x, batch_size=1):
"""
Get the prediction of the model on given input
Args:
x (numpy.ndarray or :obj:`numpy.ndarray`): input samples
batch_size (:obj:`int`, optional): batch size
Returns:
numpy.ndarray: predicted values by the model
"""
assert self.model, "Model was not initialized"
return self.model.predict(x, batch_size=batch_size)
[docs] def save(self, path):
"""
Save model to path
Args:
path (str): path to save model weights
"""
topology = {k: v for k, v in self.__dict__.items()}
topology.pop("model")
topology.pop("use_cudnn")
save_model(self.model, topology, path)
[docs] def load(self, path):
"""
Load model weights
Args:
path (str): path to load model from
"""
load_model(path, self)