Skip to content
Snippets Groups Projects
Commit 388a02cf authored by Damien Hansen's avatar Damien Hansen
Browse files

Upload New File

parent acabc4c5
No related branches found
No related tags found
No related merge requests found
# Data output:
overwrite: false
save_data: ./data/fallout/vocab/fallout_multi
src_vocab: ./data/fallout/vocab/fallout_multi.vocab.src
tgt_vocab: ./data/fallout/vocab/fallout_multi.vocab.tgt
# Training corpora:
data:
globalvoices:
path_src: ./data/globalvoices/tok/trn.en
path_tgt: ./data/globalvoices/tok/trn.fr
transforms: [filtertoolong, sentencepiece]
weight: 2
europarl:
path_src: ./data/europarl/tok/trn.en
path_tgt: ./data/europarl/tok/trn.fr
transforms: [filtertoolong, sentencepiece]
weight: 10
books:
path_src: ./data/books/tok/trn.en
path_tgt: ./data/books/tok/trn.fr
transforms: [filtertoolong, sentencepiece]
weight: 2
news:
path_src: ./data/news/tok/trn.en
path_tgt: ./data/news/tok/trn.fr
transforms: [filtertoolong, sentencepiece]
weight: 2
ted:
path_src: ./data/ted/tok/trn.en
path_tgt: ./data/ted/tok/trn.fr
transforms: [filtertoolong, sentencepiece]
weight: 4
valid:
path_src: ./data/fallout/tok/val.en
path_tgt: ./data/fallout/tok/val.fr
transforms: [filtertoolong, sentencepiece]
src_seq_length: 200
tgt_seq_length: 200
skip_empty_level: silent
src_subword_model: ./data/fallout/subword/unigram_multi_en.model
tgt_subword_model: ./data/fallout/subword/unigram_multi_fr.model
src_subword_vocab: ./data/fallout/subword/unigram_multi_en.vocab
tgt_subword_vocab: ./data/fallout/subword/unigram_multi_fr.vocab
src_subword_nbest: 20
tgt_subword_nbest: 20
src_subword_alpha: 0.1
tgt_subword_alpha: 0.1
# Training parameters:
batch_type: "tokens"
batch_size: 4096
valid_batch_size: 16
batch_size_multiple: 1
max_generator_batches: 0
accum_count: [3]
accum_steps: [0]
train_steps: 100000
valid_steps: 5000
report_every: 100
save_checkpoint_steps: 10000
queue_size: 10000
bucket_size: 32768
# Optimization
model_dtype: "fp32"
optim: "adam"
learning_rate: 2
warmup_steps: 8000
decay_method: "noam"
average_decay: 0.0005
adam_beta2: 0.998
max_grad_norm: 0
label_smoothing: 0.1
param_init: 0
param_init_glorot: true
normalization: "tokens"
# Model
encoder_type: transformer
decoder_type: transformer
enc_layers: 6
dec_layers: 6
heads: 8
rnn_size: 512
word_vec_size: 512
transformer_ff: 2048
dropout_steps: [0]
dropout: [0.1]
attention_dropout: [0.1]
position_encoding: true
# Model output:
save_model: ./out/fallout/models/fallout_train
# Logs:
log_file: ./out/fallout/logs/fallout_train
tensorboard: true
tensorboard_log_dir: ./out/fallout/tensor/fallout_train
# GPU settings:
world_size: 1
gpu_ranks: [0]
# Reproducibility:
seed: 5
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment