
I've been putting this off for months.
Let's take a breath and pick one thing together.
import fs from 'fs';
const audio = fs.readFileSync('clip.wav').toString('base64');
const resp = await fetch('https://api.inworld.ai/stt/v1/transcribe', {
method: 'POST',
headers: {
Authorization: `Basic ${process.env.INWORLD_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
transcribeConfig: {
modelId: 'inworld/inworld-stt-1',
audioEncoding: 'AUTO_DETECT',
language: 'en-US',
},
audioData: { content: audio },
}),
});
const { transcription, voiceProfile } = await resp.json();
// voiceProfile = { emotion, age, accent, pitch, vocalStyle }
// each with { value, confidence }