Source code for nlp_architect.procedures.transformers.base

# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import argparse

from nlp_architect.models.transformers.base_model import get_models


[docs]def create_base_args(parser: argparse.ArgumentParser, model_types=None): """Add base arguments for Transformers based models""" # Required parameters if model_types is not None and len(model_types) > 1: parser.add_argument( "--model_type", default=None, type=str, choices=model_types, required=True, help="Model type selected in the list: " + ", ".join(model_types), ) parser.add_argument( "--output_dir", default=None, type=str, required=True, help="The output directory where the model predictions and checkpoints " "will be written.", ) # Other parameters parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name", ) parser.add_argument( "--max_seq_length", default=128, type=int, help="The maximum total input sequence length after tokenization. " "Sequences longer than this will be truncated, sequences shorter " "will be padded.", ) parser.add_argument( "--cache_dir", default="", type=str, help="Where do you want to store the pre-trained models downloaded " "from s3", ) parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.", ) parser.add_argument( "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.", ) parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") parser.add_argument( "--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory", ) parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets", )
[docs]def inference_args(parser: argparse.ArgumentParser): """Add inference specific arguments for Transoformer based models""" parser.add_argument( "--model_path", default=None, type=str, required=True, help="Path to pre-trained model" ) parser.add_argument( "--load_quantized_model", action="store_true", help="Load and perform Inference from saved quantized model,\ 'quant_pytorch_model.bin' file must exist in directory and model\ type must be 'quant_<model>'", )
[docs]def train_args(parser: argparse.ArgumentParser, models_family=None): """Add training specific arguments for Transformer based models""" parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(get_models(models_family)), ) parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name", ) parser.add_argument( "--evaluate_during_training", action="store_true", help="Run evaluation during training at each logging step.", ) parser.add_argument( "--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.", ) parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help="Number of updates steps to accumulate before performing a " "backward/update pass.", ) parser.add_argument( "--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam." ) parser.add_argument( "--weight_decay", default=0.0, type=float, help="Weight deay if we apply some." ) parser.add_argument( "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer." ) parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument( "--num_train_epochs", default=3, type=int, help="Total number of training epochs to perform.", ) parser.add_argument( "--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. " "Override num_train_epochs.", ) parser.add_argument( "--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps." ) parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.") parser.add_argument( "--save_steps", type=int, default=500, help="Save checkpoint every X updates steps." ) parser.add_argument( "--eval_all_checkpoints", action="store_true", help="Evaluate all checkpoints starting with the same prefix as " + "model_name ending and ending with step number", ) parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")