Source code for nlp_architect.models.intent_extraction

# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model


[docs]class IntentExtractionModel(object):
    """
    Intent Extraction model base class (using tf.keras)
    """

    def __init__(self):
        self.model = None

[docs]    def fit(self, x, y, epochs=1, batch_size=1, callbacks=None, validation=None):
        """
        Train a model given input samples and target labels.

        Args:
            x: input samples
            y: input sample labels
            epochs (:obj:`int`, optional): number of epochs to train
            batch_size (:obj:`int`, optional): batch size
            callbacks(:obj:`Callback`, optional): Keras compatible callbacks
            validation(:obj:`list` of :obj:`numpy.ndarray`, optional): optional validation data
                to be evaluated when training
        """
        assert self.model, "Model was not initialized"
        self.model.fit(
            x,
            y,
            epochs=epochs,
            batch_size=batch_size,
            shuffle=True,
            validation_data=validation,
            callbacks=callbacks,
        )

[docs]    def predict(self, x, batch_size=1):
        """
        Get the prediction of the model on given input

        Args:
            x: samples to run through the model
            batch_size (:obj:`int`, optional): batch size:

        Returns:
            numpy.ndarray: predicted values by the model
        """
        assert self.model, "Model was not initialized"
        return self.model.predict(x, batch_size=batch_size)

[docs]    def save(self, path, exclude=None):
        """
        Save model to path

        Args:
            path (str): path to save model
            exclude (list, optional): a list of object fields to exclude when saving
        """
        assert self.model, "Model was not initialized"
        topology = {k: v for k, v in self.__dict__.items()}
        topology.pop("model")
        if exclude and isinstance(exclude, list):
            for x in exclude:
                topology.pop(x)
        save_model(self.model, topology=topology, filepath=path)

[docs]    def load(self, path):
        """
        Load a trained model

        Args:
            path (str): path to model file
        """
        load_model(path, self)

    @property
    def input_shape(self):
        """:obj:`tuple`:Get input shape"""
        return self.model.layers[0].input_shape

    @staticmethod
    def _create_input_embed(sentence_len, is_extern_emb, token_emb_size, vocab_size):
        if is_extern_emb:
            in_layer = e_layer = tf.keras.layers.Input(
                shape=(
                    sentence_len,
                    token_emb_size,
                ),
                dtype="float32",
                name="tokens_input",
            )
        else:
            in_layer = tf.keras.layers.Input(
                shape=(sentence_len,), dtype="int32", name="tokens_input"
            )
            e_layer = tf.keras.layers.Embedding(
                vocab_size, token_emb_size, input_length=sentence_len, name="embedding_layer"
            )(in_layer)
        return in_layer, e_layer

[docs]    def load_embedding_weights(self, weights):
        """
        Load word embedding weights into the model embedding layer

        Args:
            weights (numpy.ndarray): 2D matrix of word weights
        """
        assert self.model is not None, (
            "Cannot assign weights, apply build() before trying to " "loading embedding weights "
        )
        emb_layer = self.model.get_layer(name="word_embedding")
        assert emb_layer.output_dim == weights.shape[1], "embedding vectors shape mismatch"
        emb_layer.set_weights([weights])


[docs]class MultiTaskIntentModel(IntentExtractionModel):
    """
    Multi-Task Intent and Slot tagging model (using tf.keras)

    Args:
        use_cudnn (bool, optional): use GPU based model (CUDNNA cells)
    """

    def __init__(self, use_cudnn=False):
        super().__init__()
        self.model = None
        self.word_length = None
        self.num_labels = None
        self.num_intent_labels = None
        self.word_vocab_size = None
        self.char_vocab_size = None
        self.word_emb_dims = None
        self.char_emb_dims = None
        self.char_lstm_dims = None
        self.tagger_lstm_dims = None
        self.dropout = None
        self.use_cudnn = use_cudnn

[docs]    def build(
        self,
        word_length,
        num_labels,
        num_intent_labels,
        word_vocab_size,
        char_vocab_size,
        word_emb_dims=100,
        char_emb_dims=30,
        char_lstm_dims=30,
        tagger_lstm_dims=100,
        dropout=0.2,
    ):
        """
        Build a model

        Args:
            word_length (int): max word length (in characters)
            num_labels (int): number of slot labels
            num_intent_labels (int): number of intent classes
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_emb_dims (int, optional): word embedding dimensions
            char_emb_dims (int, optional): character embedding dimensions
            char_lstm_dims (int, optional): character feature LSTM hidden size
            tagger_lstm_dims (int, optional): tagger LSTM hidden size
            dropout (float, optional): dropout rate
        """
        self.word_length = word_length
        self.num_labels = num_labels
        self.num_intent_labels = num_intent_labels
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size
        self.word_emb_dims = word_emb_dims
        self.char_emb_dims = char_emb_dims
        self.char_lstm_dims = char_lstm_dims
        self.tagger_lstm_dims = tagger_lstm_dims
        self.dropout = dropout

        words_input = tf.keras.layers.Input(shape=(None,), name="words_input")
        embedding_layer = tf.keras.layers.Embedding(
            self.word_vocab_size, self.word_emb_dims, name="word_embedding"
        )
        word_embeddings = embedding_layer(words_input)
        word_embeddings = tf.keras.layers.Dropout(self.dropout)(word_embeddings)

        # create word character input and embeddings layer
        word_chars_input = tf.keras.layers.Input(
            shape=(None, self.word_length), name="word_chars_input"
        )
        char_embedding_layer = tf.keras.layers.Embedding(
            self.char_vocab_size,
            self.char_emb_dims,
            input_length=self.word_length,
            name="char_embedding",
        )
        # apply embedding to each word
        char_embeddings = char_embedding_layer(word_chars_input)
        # feed dense char vectors into BiLSTM
        char_embeddings = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Bidirectional(self._rnn_cell(self.char_lstm_dims))
        )(char_embeddings)
        char_embeddings = tf.keras.layers.Dropout(self.dropout)(char_embeddings)

        # first BiLSTM layer (used for intent classification)
        first_bilstm_layer = tf.keras.layers.Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims, return_sequences=True, return_state=True)
        )
        first_lstm_out = first_bilstm_layer(word_embeddings)

        lstm_y_sequence = first_lstm_out[:1][0]  # save y states of the LSTM layer
        states = first_lstm_out[1:]
        hf, _, hb, _ = states  # extract last hidden states
        h_state = tf.keras.layers.concatenate([hf, hb], axis=-1)
        intents = tf.keras.layers.Dense(
            self.num_intent_labels, activation="softmax", name="intent_classifier_output"
        )(h_state)

        # create the 2nd feature vectors
        combined_features = tf.keras.layers.concatenate([lstm_y_sequence, char_embeddings], axis=-1)

        # 2nd BiLSTM layer for label classification
        second_bilstm_layer = tf.keras.layers.Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims, return_sequences=True)
        )(combined_features)
        second_bilstm_layer = tf.keras.layers.Dropout(self.dropout)(second_bilstm_layer)
        bilstm_out = tf.keras.layers.Dense(self.num_labels)(second_bilstm_layer)

        # feed BiLSTM vectors into CRF
        with tf.device("/cpu:0"):
            crf = CRF(self.num_labels, name="intent_slot_crf")
            labels = crf(bilstm_out)

        # compile the model
        model = tf.keras.Model(inputs=[words_input, word_chars_input], outputs=[intents, labels])

        # define losses and metrics
        loss_f = {
            "intent_classifier_output": "categorical_crossentropy",
            "intent_slot_crf": crf.loss,
        }
        metrics = {
            "intent_classifier_output": "categorical_accuracy",
            "intent_slot_crf": crf.viterbi_accuracy,
        }

        model.compile(loss=loss_f, optimizer=tf.train.AdamOptimizer(), metrics=metrics)
        self.model = model

    def _rnn_cell(self, units, **kwargs):
        if self.use_cudnn:
            rnn_cell = tf.keras.layers.CuDNNLSTM(units, **kwargs)
        else:
            rnn_cell = tf.keras.layers.LSTM(units, **kwargs)
        return rnn_cell

    # pylint: disable=arguments-differ
[docs]    def save(self, path):
        """
        Save model to path

        Args:
            path (str): path to save model
        """
        super().save(path, ["use_cudnn"])


[docs]class Seq2SeqIntentModel(IntentExtractionModel):
    """
    Encoder Decoder Deep LSTM Tagger Model (using tf.keras)
    """

    def __init__(self):
        super().__init__()
        self.model = None
        self.vocab_size = None
        self.tag_labels = None
        self.token_emb_size = None
        self.encoder_depth = None
        self.decoder_depth = None
        self.lstm_hidden_size = None
        self.encoder_dropout = None
        self.decoder_dropout = None

[docs]    def build(
        self,
        vocab_size,
        tag_labels,
        token_emb_size=100,
        encoder_depth=1,
        decoder_depth=1,
        lstm_hidden_size=100,
        encoder_dropout=0.5,
        decoder_dropout=0.5,
    ):
        """
        Build the model

        Args:
            vocab_size (int): vocabulary size
            tag_labels (int): number of tag labels
            token_emb_size (int, optional): token embedding vector size
            encoder_depth (int, optional): number of encoder LSTM layers
            decoder_depth (int, optional): number of decoder LSTM layers
            lstm_hidden_size (int, optional): LSTM layers hidden size
            encoder_dropout (float, optional): encoder dropout
            decoder_dropout (float, optional): decoder dropout
        """
        self.vocab_size = vocab_size
        self.tag_labels = tag_labels
        self.token_emb_size = token_emb_size
        self.encoder_depth = encoder_depth
        self.decoder_depth = decoder_depth
        self.lstm_hidden_size = lstm_hidden_size
        self.encoder_dropout = encoder_dropout
        self.decoder_dropout = decoder_dropout

        words_input = tf.keras.layers.Input(shape=(None,), name="words_input")
        emb_layer = tf.keras.layers.Embedding(
            self.vocab_size, self.token_emb_size, name="word_embedding"
        )
        benc_in = emb_layer(words_input)

        assert self.encoder_depth > 0, "Encoder depth must be > 0"
        for i in range(self.encoder_depth):
            bencoder = tf.keras.layers.LSTM(
                self.lstm_hidden_size,
                return_sequences=True,
                return_state=True,
                go_backwards=True,
                dropout=self.encoder_dropout,
                name="encoder_blstm_{}".format(i),
            )(benc_in)
            benc_in = bencoder[0]
        b_states = bencoder[1:]
        benc_h, bene_c = b_states

        decoder_inputs = benc_in
        assert self.decoder_depth > 0, "Decoder depth must be > 0"
        for i in range(self.decoder_depth):
            decoder = tf.keras.layers.LSTM(
                self.lstm_hidden_size, return_sequences=True, name="decoder_lstm_{}".format(i)
            )(decoder_inputs, initial_state=[benc_h, bene_c])
            decoder_inputs = decoder
        decoder_outputs = tf.keras.layers.Dropout(self.decoder_dropout)(decoder)
        decoder_predictions = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(self.tag_labels, activation="softmax"), name="decoder_classifier"
        )(decoder_outputs)

        self.model = tf.keras.Model(words_input, decoder_predictions)
        self.model.compile(
            optimizer=tf.train.AdamOptimizer(),
            loss="categorical_crossentropy",
            metrics=["categorical_accuracy"],
        )