Source code for nlp_architect.models.chunker

# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model


[docs]class SequenceTagger(object): """ A sequence tagging model for POS and Chunks written in Tensorflow (and Keras) based on the paper 'Deep multi-task learning with low level tasks supervised at lower layers'. The model has 3 Bi-LSTM layers and outputs POS and Chunk tags. Args: use_cudnn (bool, optional): use GPU based model (CUDNNA cells) """ def __init__(self, use_cudnn=False): self.vocabulary_size = None self.num_pos_labels = None self.num_chunk_labels = None self.char_vocab_size = None self.feature_size = None self.dropout = None self.max_word_len = None self.classifier = None self.optimizer = None self.model = None self.use_cudnn = use_cudnn
[docs] def build( self, vocabulary_size, num_pos_labels, num_chunk_labels, char_vocab_size=None, max_word_len=25, feature_size=100, dropout=0.5, classifier="softmax", optimizer=None, ): """ Build a chunker/POS model Args: vocabulary_size (int): the size of the input vocabulary num_pos_labels (int): the size of of POS labels num_chunk_labels (int): the sie of chunk labels char_vocab_size (int, optional): character vocabulary size max_word_len (int, optional): max characters in a word feature_size (int, optional): feature size - determines the embedding/LSTM layer \ hidden state size dropout (float, optional): dropout rate classifier (str, optional): classifier layer, 'softmax' for softmax or 'crf' for \ conditional random fields classifier. default is 'softmax'. optimizer (tensorflow.python.training.optimizer.Optimizer, optional): optimizer, if \ None will use default SGD (paper setup) """ self.vocabulary_size = vocabulary_size self.char_vocab_size = char_vocab_size self.num_pos_labels = num_pos_labels self.num_chunk_labels = num_chunk_labels self.max_word_len = max_word_len self.feature_size = feature_size self.dropout = dropout self.classifier = classifier word_emb_layer = tf.keras.layers.Embedding( self.vocabulary_size, self.feature_size, name="embedding", mask_zero=False ) word_input = tf.keras.layers.Input(shape=(None,)) word_embedding = word_emb_layer(word_input) input_src = word_input features = word_embedding # add char input if present if self.char_vocab_size is not None: char_input = tf.keras.layers.Input(shape=(None, self.max_word_len)) char_emb_layer = tf.keras.layers.Embedding( self.char_vocab_size, 30, name="char_embedding", mask_zero=False ) char_embedding = char_emb_layer(char_input) char_embedding = tf.keras.layers.TimeDistributed( tf.keras.layers.Conv1D(30, 3, padding="same") )(char_embedding) char_embedding = tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalMaxPooling1D())( char_embedding ) input_src = [input_src, char_input] features = tf.keras.layers.concatenate([word_embedding, char_embedding]) rnn_layer_1 = tf.keras.layers.Bidirectional(self._rnn_cell(return_sequences=True))(features) rnn_layer_2 = tf.keras.layers.Bidirectional(self._rnn_cell(return_sequences=True))( rnn_layer_1 ) rnn_layer_3 = tf.keras.layers.Bidirectional(self._rnn_cell(return_sequences=True))( rnn_layer_2 ) # outputs pos_out = tf.keras.layers.Dense( self.num_pos_labels, activation="softmax", name="pos_output" )(rnn_layer_1) losses = {"pos_output": "categorical_crossentropy"} metrics = {"pos_output": "categorical_accuracy"} if "crf" in self.classifier: with tf.device("/cpu:0"): chunk_crf = CRF(self.num_chunk_labels, name="chunk_crf") rnn_layer_3_dense = tf.keras.layers.Dense(self.num_chunk_labels)( tf.keras.layers.Dropout(self.dropout)(rnn_layer_3) ) chunks_out = chunk_crf(rnn_layer_3_dense) losses["chunk_crf"] = chunk_crf.loss metrics["chunk_crf"] = chunk_crf.viterbi_accuracy else: chunks_out = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(self.num_chunk_labels, activation="softmax"), name="chunk_out" )(rnn_layer_3) losses["chunk_out"] = "categorical_crossentropy" metrics["chunk_out"] = "categorical_accuracy" model = tf.keras.Model(input_src, [pos_out, chunks_out]) if optimizer is None: self.optimizer = tf.keras.optimizers.Adam(0.001, clipnorm=5.0) else: self.optimizer = optimizer model.compile(optimizer=self.optimizer, loss=losses, metrics=metrics) self.model = model
[docs] def load_embedding_weights(self, weights): """ Load word embedding weights into the model embedding layer Args: weights (numpy.ndarray): 2D matrix of word weights """ assert self.model is not None, ( "Cannot assign weights, apply build() before trying to " "loading embedding weights " ) emb_layer = self.model.get_layer(name="embedding") assert emb_layer.output_dim == weights.shape[1], "embedding vectors shape mismatch" emb_layer.set_weights([weights])
def _rnn_cell(self, **kwargs): if self.use_cudnn: rnn_cell = tf.keras.layers.CuDNNLSTM(self.feature_size, **kwargs) else: rnn_cell = tf.keras.layers.LSTM(self.feature_size, **kwargs) return rnn_cell
[docs] def fit(self, x, y, batch_size=1, epochs=1, validation_data=None, callbacks=None): """ Fit provided X and Y on built model Args: x: x samples y: y samples batch_size (int, optional): batch size per sample epochs (int, optional): number of epochs to run before ending training process validation_data (optional): x and y samples to validate at the end of the epoch callbacks (optional): additional callbacks to run with fitting """ self.model.fit( x=x, y=y, batch_size=batch_size, epochs=epochs, validation_data=validation_data, callbacks=callbacks, )
[docs] def predict(self, x, batch_size=1): """ Predict labels given x. Args: x: samples for inference batch_size (int, optional): forward pass batch size Returns: tuple of numpy arrays of pos and chunk labels """ return self.model.predict(x=x, batch_size=batch_size)
[docs] def save(self, filepath): """ Save the model to disk Args: filepath (str): file name to save model """ topology = {k: v for k, v in self.__dict__.items()} topology.pop("model") topology.pop("optimizer") topology.pop("use_cudnn") save_model(self.model, topology, filepath)
[docs] def load(self, filepath): """ Load model from disk Args: filepath (str): file name of model """ load_model(filepath, self)
[docs]class SequenceChunker(SequenceTagger): """ A sequence Chunker model written in Tensorflow (and Keras) based SequenceTagger model. The model uses only the chunking output of the model. """
[docs] def predict(self, x, batch_size=1): """ Predict labels given x. Args: x: samples for inference batch_size (int, optional): forward pass batch size Returns: tuple of numpy arrays of chunk labels """ model = tf.keras.Model(self.model.input, self.model.output[-1]) return model.predict(x=x, batch_size=batch_size)
[docs]class SequencePOSTagger(SequenceTagger): """ A sequence POS tagger model written in Tensorflow (and Keras) based SequenceTagger model. The model uses only the chunking output of the model. """
[docs] def predict(self, x, batch_size=1): """ Predict labels given x. Args: x: samples for inference batch_size (int, optional): forward pass batch size Returns: tuple of numpy arrays of POS labels """ model = tf.keras.Model(self.model.input, self.model.output[0]) return model.predict(x=x, batch_size=batch_size)