# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import argparse
from nlp_architect.models.transformers.base_model import get_models
[docs]def create_base_args(parser: argparse.ArgumentParser, model_types=None):
"""Add base arguments for Transformers based models"""
# Required parameters
if model_types is not None and len(model_types) > 1:
parser.add_argument(
"--model_type",
default=None,
type=str,
choices=model_types,
required=True,
help="Model type selected in the list: " + ", ".join(model_types),
)
parser.add_argument(
"--output_dir",
default=None,
type=str,
required=True,
help="The output directory where the model predictions and checkpoints " "will be written.",
)
# Other parameters
parser.add_argument(
"--tokenizer_name",
default="",
type=str,
help="Pretrained tokenizer name or path if not the same as model_name",
)
parser.add_argument(
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. "
"Sequences longer than this will be truncated, sequences shorter "
"will be padded.",
)
parser.add_argument(
"--cache_dir",
default="",
type=str,
help="Where do you want to store the pre-trained models downloaded " "from s3",
)
parser.add_argument(
"--do_lower_case",
action="store_true",
help="Set this flag if you are using an uncased model.",
)
parser.add_argument(
"--per_gpu_eval_batch_size",
default=8,
type=int,
help="Batch size per GPU/CPU for evaluation.",
)
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
parser.add_argument(
"--overwrite_output_dir",
action="store_true",
help="Overwrite the content of the output directory",
)
parser.add_argument(
"--overwrite_cache",
action="store_true",
help="Overwrite the cached training and evaluation sets",
)
[docs]def inference_args(parser: argparse.ArgumentParser):
"""Add inference specific arguments for Transoformer based models"""
parser.add_argument(
"--model_path", default=None, type=str, required=True, help="Path to pre-trained model"
)
parser.add_argument(
"--load_quantized_model",
action="store_true",
help="Load and perform Inference from saved quantized model,\
'quant_pytorch_model.bin' file must exist in directory and model\
type must be 'quant_<model>'",
)
[docs]def train_args(parser: argparse.ArgumentParser, models_family=None):
"""Add training specific arguments for Transformer based models"""
parser.add_argument(
"--model_name_or_path",
default=None,
type=str,
required=True,
help="Path to pre-trained model or shortcut name selected in the list: "
+ ", ".join(get_models(models_family)),
)
parser.add_argument(
"--config_name",
default="",
type=str,
help="Pretrained config name or path if not the same as model_name",
)
parser.add_argument(
"--evaluate_during_training",
action="store_true",
help="Run evaluation during training at each logging step.",
)
parser.add_argument(
"--per_gpu_train_batch_size",
default=8,
type=int,
help="Batch size per GPU/CPU for training.",
)
parser.add_argument(
"--gradient_accumulation_steps",
type=int,
default=1,
help="Number of updates steps to accumulate before performing a " "backward/update pass.",
)
parser.add_argument(
"--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam."
)
parser.add_argument(
"--weight_decay", default=0.0, type=float, help="Weight deay if we apply some."
)
parser.add_argument(
"--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer."
)
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument(
"--num_train_epochs",
default=3,
type=int,
help="Total number of training epochs to perform.",
)
parser.add_argument(
"--max_steps",
default=-1,
type=int,
help="If > 0: set total number of training steps to perform. " "Override num_train_epochs.",
)
parser.add_argument(
"--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps."
)
parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.")
parser.add_argument(
"--save_steps", type=int, default=500, help="Save checkpoint every X updates steps."
)
parser.add_argument(
"--eval_all_checkpoints",
action="store_true",
help="Evaluate all checkpoints starting with the same prefix as "
+ "model_name ending and ending with step number",
)
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")