Source code for nlp_architect.models.ner_crf

# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model


[docs]class NERCRF(object):
    """
    Bi-LSTM NER model with CRF classification layer (tf.keras model)

    Args:
        use_cudnn (bool, optional): use cudnn LSTM cells
    """

    def __init__(self, use_cudnn=False):
        self.model = None
        self.word_length = None
        self.target_label_dims = None
        self.word_vocab_size = None
        self.char_vocab_size = None
        self.word_embedding_dims = None
        self.char_embedding_dims = None
        self.tagger_lstm_dims = None
        self.dropout = None
        self.use_cudnn = use_cudnn

[docs]    def build(
        self,
        word_length,
        target_label_dims,
        word_vocab_size,
        char_vocab_size,
        word_embedding_dims=100,
        char_embedding_dims=16,
        tagger_lstm_dims=200,
        dropout=0.5,
    ):
        """
        Build a NERCRF model

        Args:
            word_length (int): max word length in characters
            target_label_dims (int): number of entity labels (for classification)
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_embedding_dims (int): word embedding dimensions
            char_embedding_dims (int): character embedding dimensions
            tagger_lstm_dims (int): word tagger LSTM output dimensions
            dropout (float): dropout rate
        """
        self.word_length = word_length
        self.target_label_dims = target_label_dims
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size
        self.word_embedding_dims = word_embedding_dims
        self.char_embedding_dims = char_embedding_dims
        self.tagger_lstm_dims = tagger_lstm_dims
        self.dropout = dropout

        # build word input
        words_input = tf.keras.layers.Input(shape=(None,), name="words_input")
        embedding_layer = tf.keras.layers.Embedding(
            self.word_vocab_size, self.word_embedding_dims, name="word_embedding"
        )
        word_embeddings = embedding_layer(words_input)

        # create word character embeddings
        word_chars_input = tf.keras.layers.Input(
            shape=(None, self.word_length), name="word_chars_input"
        )
        char_embedding_layer = tf.keras.layers.Embedding(
            self.char_vocab_size, self.char_embedding_dims, name="char_embedding"
        )(word_chars_input)
        char_embeddings = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Conv1D(128, 3, padding="same", activation="relu")
        )(char_embedding_layer)
        char_embeddings = tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalMaxPooling1D())(
            char_embeddings
        )

        # create the final feature vectors
        features = tf.keras.layers.concatenate([word_embeddings, char_embeddings], axis=-1)

        # encode using a bi-LSTM
        features = tf.keras.layers.Dropout(self.dropout)(features)
        bilstm = tf.keras.layers.Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims, return_sequences=True)
        )(features)
        bilstm = tf.keras.layers.Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims, return_sequences=True)
        )(bilstm)
        bilstm = tf.keras.layers.Dropout(self.dropout)(bilstm)
        bilstm = tf.keras.layers.Dense(self.target_label_dims)(bilstm)

        inputs = [words_input, word_chars_input]

        sequence_lengths = tf.keras.layers.Input(shape=(1,), dtype="int32", name="seq_lens")
        inputs.append(sequence_lengths)
        crf = CRF(self.target_label_dims, name="ner_crf")
        predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths)

        # compile the model
        model = tf.keras.Model(inputs=inputs, outputs=predictions)
        model.compile(
            loss={"ner_crf": crf.loss}, optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.0)
        )

        self.model = model

    def _rnn_cell(self, units, **kwargs):
        if self.use_cudnn:
            rnn_cell = tf.keras.layers.CuDNNLSTM(units, **kwargs)
        else:
            rnn_cell = tf.keras.layers.LSTM(units, **kwargs)
        return rnn_cell

[docs]    def load_embedding_weights(self, weights):
        """
        Load word embedding weights into the model embedding layer

        Args:
            weights (numpy.ndarray): 2D matrix of word weights
        """
        assert self.model is not None, (
            "Cannot assign weights, apply build() before trying to " "loading embedding weights "
        )
        emb_layer = self.model.get_layer(name="word_embedding")
        assert emb_layer.output_dim == weights.shape[1], "embedding vectors shape mismatch"
        emb_layer.set_weights([weights])

[docs]    def fit(self, x, y, epochs=1, batch_size=1, callbacks=None, validation=None):
        """
        Train a model given input samples and target labels.

        Args:
            x (numpy.ndarray or :obj:`numpy.ndarray`): input samples
            y (numpy.ndarray): input sample labels
            epochs (:obj:`int`, optional): number of epochs to train
            batch_size (:obj:`int`, optional): batch size
            callbacks(:obj:`Callback`, optional): Keras compatible callbacks
            validation(:obj:`list` of :obj:`numpy.ndarray`, optional): optional validation data
                to be evaluated when training
        """
        assert self.model, "Model was not initialized"
        self.model.fit(
            x,
            y,
            epochs=epochs,
            batch_size=batch_size,
            shuffle=True,
            validation_data=validation,
            callbacks=callbacks,
        )

[docs]    def predict(self, x, batch_size=1):
        """
        Get the prediction of the model on given input

        Args:
            x (numpy.ndarray or :obj:`numpy.ndarray`): input samples
            batch_size (:obj:`int`, optional): batch size

        Returns:
            numpy.ndarray: predicted values by the model
        """
        assert self.model, "Model was not initialized"
        return self.model.predict(x, batch_size=batch_size)

[docs]    def save(self, path):
        """
        Save model to path

        Args:
            path (str): path to save model weights
        """
        topology = {k: v for k, v in self.__dict__.items()}
        topology.pop("model")
        topology.pop("use_cudnn")
        save_model(self.model, topology, path)

[docs]    def load(self, path):
        """
        Load model weights

        Args:
            path (str): path to load model from
        """
        load_model(path, self)