@@ -49,7 +49,13 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
4949 model .make_generation_fast_ ()
5050
5151 mel_org = np .load (join (in_dir , mel_filename ))
52- mel = Variable (torch .from_numpy (mel_org )).unsqueeze (0 ).contiguous ()
52+ # zero padd
53+ b_pad = r # imitates initial state
54+ e_pad = r - len (mel_org ) % r if len (mel_org ) % r > 0 else 0
55+ mel = np .pad (mel_org , [(b_pad , e_pad ), (0 , 0 )],
56+ mode = "constant" , constant_values = 0 )
57+
58+ mel = Variable (torch .from_numpy (mel )).unsqueeze (0 ).contiguous ()
5359
5460 # Downsample mel spectrogram
5561 if downsample_step > 1 :
@@ -78,10 +84,10 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
7884 frame_positions = frame_positions , speaker_ids = speaker_ids )
7985
8086 mel_output = mel_outputs [0 ].data .cpu ().numpy ()
81-
8287 # **Time resolution adjustment**
83- # remove begenning audio used for first mel prediction
84- wav = np .load (join (in_dir , audio_filename ))[hparams .hop_size * downsample_step :]
88+ mel_output = mel_output [:- (b_pad + e_pad )]
89+
90+ wav = np .load (join (in_dir , audio_filename ))
8591 assert len (wav ) % hparams .hop_size == 0
8692
8793 # Coarse upsample just for convenience
@@ -92,18 +98,13 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
9298 # the original mel length
9399 assert mel_output .shape [0 ] >= mel_org .shape [0 ]
94100
95- # Trim mel output
96- expected_frames = len (wav ) // hparams .hop_size
97- mel_output = mel_output [:expected_frames ]
98-
99101 # Make sure we have correct lengths
100102 assert mel_output .shape [0 ] * hparams .hop_size == len (wav )
101103
102104 timesteps = len (wav )
103105
104106 # save
105- np .save (join (out_dir , audio_filename ), wav .astype (np .int16 ),
106- allow_pickle = False )
107+ np .save (join (out_dir , audio_filename ), wav , allow_pickle = False )
107108 np .save (join (out_dir , mel_filename ), mel_output .astype (np .float32 ),
108109 allow_pickle = False )
109110
0 commit comments