mardi 18 août 2020

where postman save intermediate file: os error invalid arg file storage?

I'm using postman to test my rest api service, my function processes 2 wav files - extracts voice channel, trims 10 seconds (reads and writes in between)

does postman work with it? can it keep temp files somewhere?

I got the error from postman:

OSError: [Errno 22] Invalid argument: FileStorage: 'full_convo1.wav'

the flask app takes 2 wav files, calls processing function on them:

@app.route('/compare_voices', methods = ['POST'])
def compare_voices():
    file1 = request.files["file1"]
    file2 = request.files["file2"]

    embedding1 = get_customer_voice_and_cutting_10_seconds_embedding(file1)
    embedding2 = get_customer_voice_and_cutting_10_seconds_embedding(file2)

    embedding1 = embedding1 / torch.norm(embedding1, dim=1).unsqueeze(1)
    embedding2 = embedding2 / torch.norm(embedding2, dim=1).unsqueeze(1)
    score = torch.dot(embedding1.squeeze(0), embedding2.squeeze(0)).item()
    print(score)
    answer = ''
    if score > 0.9:
        answer = 'pass '
    else:
        answer = 'no pass '
    score = float(score*100)
    score = '%.2f' % score
    score = str(score)
    answer += score
    return jsonify({'response': answer })


if __name__ == '__main__':
    app.run(debug=True, port=9090,use_reloader=False)

the actual function that does the job -saves and reloads temp files is this, it returns tensor array:

def get_customer_voice_and_cutting_10_seconds_embedding(file):

    print('getting customer voice only')
    
    wav = wf.read(file)
    ch = wav[1].shape[1]
    sr = wav[0]

    c1 = wav[1][:,1]
    #print('c0 %i'%c0.size)
    if ch==1:
        exit()

    vad = VoiceActivityDetection()
    vad.process(c1)
    voice_samples = vad.get_voice_samples()
    wf.write('%s_customer.wav'%file,sr,voice_samples)

    cur_path = os.getcwd()
    filename =  [f for f in os.listdir(cur_path) if f.endswith('_customer.wav')]
    filename = filename[0]
    
    voice = AudioSegment.from_wav(filename)
    new_voice = voice[0:10000]
    file = str(file) + '_10seconds.wav'
    new_voice.export(file, format='wav')

    
    filename =  [f for f in os.listdir(cur_path) if f.endswith('_10seconds.wav')]
    filename = filename[0]
    
    return get_embedding(filename)


def get_embedding(wav):
    print('getting d vector')
    #print(wav)
    #model_path = os.path.join(hp.train.checkpoint_dir, model_path)
    model_path = 'pretrained.pth'
    embedder_net = SpeakerRecognition(512, 5994, use_attention=False)
    embedder_net = torch.nn.DataParallel(embedder_net)
    embedder_net = embedder_net.cuda()
    embedder_net.load_state_dict(torch.load(model_path))
    embedder_net.eval()
    s1 = extract_all_feat(wav, mode = 'test').transpose()
    s1 = torch.Tensor(s1).unsqueeze(0)
    e1, _ = embedder_net(s1.cuda())
    print(e1)
    return e1

ing,

Aucun commentaire:

Enregistrer un commentaire