
I need to move my dinner reservation to 8pm.
Done. Dinner for four moved to 8pm. I also bumped your table to the window.
// tools declared at session.updateif (event.type === "response.function_call") { const result = await getBooking(args); ws.send({ type: "conversation.item.create", ... });}// audio stream never closes during the tool call.// tools declared at session.updateif (event.type === "response.function_call") { const result = await getBooking(args); ws.send({ type: "conversation.item.create", ... });}// audio stream never closes during the tool call.// 1. Connect (same endpoint for browser, server, SIP)
const ws = new WebSocket(
'wss://api.inworld.ai/api/v1/realtime/session?key=' + sessionId,
['realtime']
);
ws.addEventListener('open', () => {});
ws.addEventListener('message', async (event) => {
const msg = JSON.parse(event.data);
// 2. Configure on session.created
if (msg.type === 'session.created') {
ws.send(JSON.stringify({
type: 'session.update',
session: {
type: 'realtime',
model: 'openai/gpt-5.4', // any LLM
instructions: 'You are a helpful voice agent.',
output_modalities: ['audio', 'text'],
audio: {
input: {
turn_detection: {
type: 'semantic_vad',
eagerness: 'medium',
create_response: true,
interrupt_response: true,
},
},
output: {
model: 'inworld-tts-1.5-max', // top-ranked voice
voice: 'Clive',
},
},
},
}));
}
// 3. Play audio deltas as they stream
if (msg.type === 'response.output_audio.delta') {
audioQueue.push(base64ToPcm16(msg.delta));
if (!isPlaying) playNext();
}
});
// Stream mic audio in (semantic VAD handles turn detection)
mic.on('data', (chunk) => {
ws.send(JSON.stringify({
type: 'input_audio_buffer.append',
audio: chunk.toString('base64'),
}));
});