Skip to content
Snippets Groups Projects
Commit 34368ef8 authored by Damien Hansen's avatar Damien Hansen
Browse files

Adding training steps

parent 2b06876c
No related branches found
No related tags found
No related merge requests found
train.sh 0 → 100644
#!/bin/sh
# Directories
#############
echo Choose directory in the data folder
read DIR
export DIR
echo Create directory in the output folder
read OUT
export OUT
mkdir -p ./out/${OUT}
mkdir -p ./out/${OUT}/models
mkdir -p ./out/${OUT}/translations
mkdir -p ./out/${OUT}/logs
mkdir -p ./out/${OUT}/tensor
# Training
##########
onmt_train --config ./configs/${DIR}_only.yaml
onmt_train --config ./configs/${DIR}_train.yaml
onmt_train --config ./configs/${DIR}_tuned.yaml
# Translation
#############
# For fine-tuned models
for checkpoint in ./out/${OUT}/models/${DIR}_t*.pt ; do
filename=$(basename $checkpoint .pt)
echo "# Translating checkpoint" ${filename}
onmt_translate \
--verbose \
--replace_unk \
--model $checkpoint \
--src ./data/${DIR}/tok/tra_unigram_multi.en \
--output ./out/${OUT}/translations/${filename}_uni.txt
done
for file in ./out/${OUT}/translations/*_uni.txt ; do
filename=$(basename $file _uni.txt)
spm_decode \
--model=./data/${DIR}/subword/unigram_multi_fr.model \
--input_format=piece \
< ./out/${OUT}/translations/${filename%.*}_uni.txt \
> ./out/${OUT}/translations/${filename%.*}_tok.txt
done
for file in ./out/${OUT}/translations/*_tok.txt ; do
filename=$(basename $file _tok.txt)
sacremoses \
-l fr \
detokenize \
< ./out/${OUT}/translations/${filename%.*}_tok.txt \
> ./out/${OUT}/translations/${filename%.*}.txt
done
rm ./out/${OUT}/translations/*uni.txt
rm ./out/${OUT}/translations/*tok.txt
# For video game only models
for checkpoint in ./out/${OUT}/models/${DIR}_o*.pt ; do
filename=$(basename $checkpoint .pt)
echo "# Translating checkpoint" ${filename}
onmt_translate \
--verbose \
--replace_unk \
--model $checkpoint \
--src ./data/${DIR}/tok/tra_unigram_only.en \
--output ./out/${OUT}/translations/${filename}_uni.txt
done
for file in ./out/${OUT}/translations/*_uni.txt ; do
filename=$(basename $file _uni.txt)
spm_decode \
--model=./data/${DIR}/subword/unigram_only_fr.model \
--input_format=piece \
< ./out/${OUT}/translations/${filename%.*}_uni.txt \
> ./out/${OUT}/translations/${filename%.*}_tok.txt
done
for file in ./out/${OUT}/translations/*_tok.txt ; do
filename=$(basename $file _tok.txt)
sacremoses \
-l fr \
detokenize \
< ./out/${OUT}/translations/${filename%.*}_tok.txt \
> ./out/${OUT}/translations/${filename%.*}.txt
done
rm ./out/${OUT}/translations/*uni.txt
rm ./out/${OUT}/translations/*tok.txt
# Evaluation
############
sacrebleu ./data/${DIR}/tra.fr \
--input ./out/${OUT}/translations/*.txt \
--language-pair en-fr \
--metrics bleu chrf ter \
--chrf-word-order 2 \
--tokenize 13a \
--width 2 \
--format text \
>> ./out/${OUT}/BLEU.txt
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment