# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model
[docs]class MultiTaskIntentModel(IntentExtractionModel):
"""
Multi-Task Intent and Slot tagging model (using tf.keras)
Args:
use_cudnn (bool, optional): use GPU based model (CUDNNA cells)
"""
def __init__(self, use_cudnn=False):
super().__init__()
self.model = None
self.word_length = None
self.num_labels = None
self.num_intent_labels = None
self.word_vocab_size = None
self.char_vocab_size = None
self.word_emb_dims = None
self.char_emb_dims = None
self.char_lstm_dims = None
self.tagger_lstm_dims = None
self.dropout = None
self.use_cudnn = use_cudnn
[docs] def build(
self,
word_length,
num_labels,
num_intent_labels,
word_vocab_size,
char_vocab_size,
word_emb_dims=100,
char_emb_dims=30,
char_lstm_dims=30,
tagger_lstm_dims=100,
dropout=0.2,
):
"""
Build a model
Args:
word_length (int): max word length (in characters)
num_labels (int): number of slot labels
num_intent_labels (int): number of intent classes
word_vocab_size (int): word vocabulary size
char_vocab_size (int): character vocabulary size
word_emb_dims (int, optional): word embedding dimensions
char_emb_dims (int, optional): character embedding dimensions
char_lstm_dims (int, optional): character feature LSTM hidden size
tagger_lstm_dims (int, optional): tagger LSTM hidden size
dropout (float, optional): dropout rate
"""
self.word_length = word_length
self.num_labels = num_labels
self.num_intent_labels = num_intent_labels
self.word_vocab_size = word_vocab_size
self.char_vocab_size = char_vocab_size
self.word_emb_dims = word_emb_dims
self.char_emb_dims = char_emb_dims
self.char_lstm_dims = char_lstm_dims
self.tagger_lstm_dims = tagger_lstm_dims
self.dropout = dropout
words_input = tf.keras.layers.Input(shape=(None,), name="words_input")
embedding_layer = tf.keras.layers.Embedding(
self.word_vocab_size, self.word_emb_dims, name="word_embedding"
)
word_embeddings = embedding_layer(words_input)
word_embeddings = tf.keras.layers.Dropout(self.dropout)(word_embeddings)
# create word character input and embeddings layer
word_chars_input = tf.keras.layers.Input(
shape=(None, self.word_length), name="word_chars_input"
)
char_embedding_layer = tf.keras.layers.Embedding(
self.char_vocab_size,
self.char_emb_dims,
input_length=self.word_length,
name="char_embedding",
)
# apply embedding to each word
char_embeddings = char_embedding_layer(word_chars_input)
# feed dense char vectors into BiLSTM
char_embeddings = tf.keras.layers.TimeDistributed(
tf.keras.layers.Bidirectional(self._rnn_cell(self.char_lstm_dims))
)(char_embeddings)
char_embeddings = tf.keras.layers.Dropout(self.dropout)(char_embeddings)
# first BiLSTM layer (used for intent classification)
first_bilstm_layer = tf.keras.layers.Bidirectional(
self._rnn_cell(self.tagger_lstm_dims, return_sequences=True, return_state=True)
)
first_lstm_out = first_bilstm_layer(word_embeddings)
lstm_y_sequence = first_lstm_out[:1][0] # save y states of the LSTM layer
states = first_lstm_out[1:]
hf, _, hb, _ = states # extract last hidden states
h_state = tf.keras.layers.concatenate([hf, hb], axis=-1)
intents = tf.keras.layers.Dense(
self.num_intent_labels, activation="softmax", name="intent_classifier_output"
)(h_state)
# create the 2nd feature vectors
combined_features = tf.keras.layers.concatenate([lstm_y_sequence, char_embeddings], axis=-1)
# 2nd BiLSTM layer for label classification
second_bilstm_layer = tf.keras.layers.Bidirectional(
self._rnn_cell(self.tagger_lstm_dims, return_sequences=True)
)(combined_features)
second_bilstm_layer = tf.keras.layers.Dropout(self.dropout)(second_bilstm_layer)
bilstm_out = tf.keras.layers.Dense(self.num_labels)(second_bilstm_layer)
# feed BiLSTM vectors into CRF
with tf.device("/cpu:0"):
crf = CRF(self.num_labels, name="intent_slot_crf")
labels = crf(bilstm_out)
# compile the model
model = tf.keras.Model(inputs=[words_input, word_chars_input], outputs=[intents, labels])
# define losses and metrics
loss_f = {
"intent_classifier_output": "categorical_crossentropy",
"intent_slot_crf": crf.loss,
}
metrics = {
"intent_classifier_output": "categorical_accuracy",
"intent_slot_crf": crf.viterbi_accuracy,
}
model.compile(loss=loss_f, optimizer=tf.train.AdamOptimizer(), metrics=metrics)
self.model = model
def _rnn_cell(self, units, **kwargs):
if self.use_cudnn:
rnn_cell = tf.keras.layers.CuDNNLSTM(units, **kwargs)
else:
rnn_cell = tf.keras.layers.LSTM(units, **kwargs)
return rnn_cell
# pylint: disable=arguments-differ
[docs] def save(self, path):
"""
Save model to path
Args:
path (str): path to save model
"""
super().save(path, ["use_cudnn"])
[docs]class Seq2SeqIntentModel(IntentExtractionModel):
"""
Encoder Decoder Deep LSTM Tagger Model (using tf.keras)
"""
def __init__(self):
super().__init__()
self.model = None
self.vocab_size = None
self.tag_labels = None
self.token_emb_size = None
self.encoder_depth = None
self.decoder_depth = None
self.lstm_hidden_size = None
self.encoder_dropout = None
self.decoder_dropout = None
[docs] def build(
self,
vocab_size,
tag_labels,
token_emb_size=100,
encoder_depth=1,
decoder_depth=1,
lstm_hidden_size=100,
encoder_dropout=0.5,
decoder_dropout=0.5,
):
"""
Build the model
Args:
vocab_size (int): vocabulary size
tag_labels (int): number of tag labels
token_emb_size (int, optional): token embedding vector size
encoder_depth (int, optional): number of encoder LSTM layers
decoder_depth (int, optional): number of decoder LSTM layers
lstm_hidden_size (int, optional): LSTM layers hidden size
encoder_dropout (float, optional): encoder dropout
decoder_dropout (float, optional): decoder dropout
"""
self.vocab_size = vocab_size
self.tag_labels = tag_labels
self.token_emb_size = token_emb_size
self.encoder_depth = encoder_depth
self.decoder_depth = decoder_depth
self.lstm_hidden_size = lstm_hidden_size
self.encoder_dropout = encoder_dropout
self.decoder_dropout = decoder_dropout
words_input = tf.keras.layers.Input(shape=(None,), name="words_input")
emb_layer = tf.keras.layers.Embedding(
self.vocab_size, self.token_emb_size, name="word_embedding"
)
benc_in = emb_layer(words_input)
assert self.encoder_depth > 0, "Encoder depth must be > 0"
for i in range(self.encoder_depth):
bencoder = tf.keras.layers.LSTM(
self.lstm_hidden_size,
return_sequences=True,
return_state=True,
go_backwards=True,
dropout=self.encoder_dropout,
name="encoder_blstm_{}".format(i),
)(benc_in)
benc_in = bencoder[0]
b_states = bencoder[1:]
benc_h, bene_c = b_states
decoder_inputs = benc_in
assert self.decoder_depth > 0, "Decoder depth must be > 0"
for i in range(self.decoder_depth):
decoder = tf.keras.layers.LSTM(
self.lstm_hidden_size, return_sequences=True, name="decoder_lstm_{}".format(i)
)(decoder_inputs, initial_state=[benc_h, bene_c])
decoder_inputs = decoder
decoder_outputs = tf.keras.layers.Dropout(self.decoder_dropout)(decoder)
decoder_predictions = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(self.tag_labels, activation="softmax"), name="decoder_classifier"
)(decoder_outputs)
self.model = tf.keras.Model(words_input, decoder_predictions)
self.model.compile(
optimizer=tf.train.AdamOptimizer(),
loss="categorical_crossentropy",
metrics=["categorical_accuracy"],
)