I'm using postman to test my rest api service, my function processes 2 wav files - extracts voice channel, trims 10 seconds (reads and writes in between)
does postman work with it? can it keep temp files somewhere?
I got the error from postman:
OSError: [Errno 22] Invalid argument: FileStorage: 'full_convo1.wav'
the flask app takes 2 wav files, calls processing function on them:
@app.route('/compare_voices', methods = ['POST'])
def compare_voices():
file1 = request.files["file1"]
file2 = request.files["file2"]
embedding1 = get_customer_voice_and_cutting_10_seconds_embedding(file1)
embedding2 = get_customer_voice_and_cutting_10_seconds_embedding(file2)
embedding1 = embedding1 / torch.norm(embedding1, dim=1).unsqueeze(1)
embedding2 = embedding2 / torch.norm(embedding2, dim=1).unsqueeze(1)
score = torch.dot(embedding1.squeeze(0), embedding2.squeeze(0)).item()
print(score)
answer = ''
if score > 0.9:
answer = 'pass '
else:
answer = 'no pass '
score = float(score*100)
score = '%.2f' % score
score = str(score)
answer += score
return jsonify({'response': answer })
if __name__ == '__main__':
app.run(debug=True, port=9090,use_reloader=False)
the actual function that does the job -saves and reloads temp files is this, it returns tensor array:
def get_customer_voice_and_cutting_10_seconds_embedding(file):
print('getting customer voice only')
wav = wf.read(file)
ch = wav[1].shape[1]
sr = wav[0]
c1 = wav[1][:,1]
#print('c0 %i'%c0.size)
if ch==1:
exit()
vad = VoiceActivityDetection()
vad.process(c1)
voice_samples = vad.get_voice_samples()
wf.write('%s_customer.wav'%file,sr,voice_samples)
cur_path = os.getcwd()
filename = [f for f in os.listdir(cur_path) if f.endswith('_customer.wav')]
filename = filename[0]
voice = AudioSegment.from_wav(filename)
new_voice = voice[0:10000]
file = str(file) + '_10seconds.wav'
new_voice.export(file, format='wav')
filename = [f for f in os.listdir(cur_path) if f.endswith('_10seconds.wav')]
filename = filename[0]
return get_embedding(filename)
def get_embedding(wav):
print('getting d vector')
#print(wav)
#model_path = os.path.join(hp.train.checkpoint_dir, model_path)
model_path = 'pretrained.pth'
embedder_net = SpeakerRecognition(512, 5994, use_attention=False)
embedder_net = torch.nn.DataParallel(embedder_net)
embedder_net = embedder_net.cuda()
embedder_net.load_state_dict(torch.load(model_path))
embedder_net.eval()
s1 = extract_all_feat(wav, mode = 'test').transpose()
s1 = torch.Tensor(s1).unsqueeze(0)
e1, _ = embedder_net(s1.cuda())
print(e1)
return e1
Aucun commentaire:
Enregistrer un commentaire