-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
95 lines (74 loc) · 3.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#main.py
import os
import shutil
from tqdm import tqdm
from pydub import AudioSegment
from pydub.utils import mediainfo
import subprocess
from process_audio import sample_change,SplitWavAudio
from model_manager import Model
def check_sample_rate(audio_path,sampled_path):
print(">> Checking if Audio Files are in Recommended Sample Rate (and Converting if not) : ")
for file in tqdm(os.listdir(audio_path)):
file = os.path.join(audio_path,file)
if os.path.isfile(file):
info = mediainfo(file)
if(info['sample_rate'] != 16000):
sample_change(file,16000,sampled_path)
else :
shutil.copyfile(file,sampled_path)
def check_rec_length(sampled_path,split_path):
print("\n>> Checking if Audio Files are in Recommended Length[10-30 Sec] (and Splitting them if not) : ")
for file in tqdm(os.listdir(sampled_path)):
file = os.path.join(sampled_path,file)
if os.path.isfile(file):
audio = AudioSegment.from_file(file)
if(audio.duration_seconds >= 30):
split_wav = SplitWavAudio(file,split_path)
split_wav.multiple_split(sec_per_split=30)
else :
shutil.copyfile(file,split_path)
def create_manifest(generator,audio_path,manifest_path):
print("\n>> Creating Manifest Files for Training ... ")
generator = os.path.join(generator,'wav2vec_manifest.py')
cmd = 'python {} {} --dest {} --ext wav --valid-percent 0.01'.format(generator,audio_path,manifest_path)
subprocess.run(cmd,shell=True)
print("\t Manifest Created in : ",manifest_path)
def train_model(trainer,manifest_path,model_path):
print("Starting the Training of the wav2vec Model ... ")
trainer = os.path.join(trainer,'train.py')
manifest = manifest_path # Path for manifest files to be used in training
model = model_path # Path to store Output Model
convFeatureLayers = "[(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)]"
convAggregatorLayers = "[(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)]"
lr = 1e-06 # Learning Rate
minLr = 1e-09 # Min Learning Rate
#optimizer = "adam"
maxLr = 0.005 # Max Learning Rate
cmd = 'python {} {} --save-dir {}'.format(trainer,manifest,model)
cmd += ' --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints --arch wav2vec --task audio_pretraining'
cmd += ' --lr {} --min-lr {} --optimizer adam --max-lr {} --lr-scheduler cosine'.format(lr,minLr,maxLr)
cmd += ' --conv-feature-layers {} --conv-aggregator-layers {}'.format(convFeatureLayers,convAggregatorLayers)
cmd += ' --skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500'
cmd += ' --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 --max-sample-size 150000'
cmd += ' --max-tokens 1500000 --skip-invalid-size-inputs-valid-test'
subprocess.run(cmd,shell=True)
def main():
base = os.getcwd()
audio_path = os.path.join(base,'audio')
sampled_path = os.path.join(audio_path,'SampledAudio')
split_path = os.path.join(audio_path,'SplitAudio')
manifest_path = os.path.join(base,'manifest')
model_path = os.path.join(base,'model')
#final_path =os.path.join(audio_path,'FinalAudio')
for paths in [sampled_path,split_path,manifest_path,model_path]: #,final_path
if not os.path.exists(paths):
os.mkdir(paths)
check_sample_rate(audio_path,sampled_path)
check_rec_length(sampled_path,split_path)
wav2vec_path = os.path.join(base,'fairseq','examples','wav2vec')
create_manifest(wav2vec_path,split_path,manifest_path)
train_model(os.path.join(base,'fairseq'),manifest_path,model_path)
if __name__ == "__main__":
print("\n\t\t ## Wav2Vec v 0.1 by NabinAdhikari674\n")
main()