
I'm so behind on everything and I don't even know where to start.
That's completely valid. Let's take a breath first, then we'll pick one thing together.
import WebSocket from 'ws';
const ws = new WebSocket(
`wss://api.inworld.ai/api/v1/realtime/session?key=voice-${Date.now()}&protocol=realtime`,
{ headers: { Authorization: `Basic ${process.env.INWORLD_API_KEY}` } }
);
ws.on('message', (data) => {
const msg = JSON.parse(data.toString());
if (msg.type === 'session.created') {
ws.send(JSON.stringify({
type: 'session.update',
session: {
type: 'realtime',
model: 'anthropic/claude-sonnet-4-6', // any Router model
instructions: 'You are a warm conversational assistant.',
output_modalities: ['audio', 'text'],
audio: {
input: {
transcription: { model: 'inworld/inworld-stt-1' },
turn_detection: {
type: 'semantic_vad',
eagerness: 'medium',
create_response: true,
interrupt_response: true,
},
},
output: {
model: 'inworld-tts-1.5-max',
voice: 'Sarah',
},
},
},
}));
}
if (msg.type === 'response.output_audio.delta') {
audioQueue.push(base64ToPcm16(msg.delta));
if (!isPlaying) playNext();
}
});
mic.on('data', (chunk) => {
ws.send(JSON.stringify({
type: 'input_audio_buffer.append',
audio: chunk.toString('base64'),
}));
});