Optimization, performance, and implementation best practices for the ASR WebSocket API
const optimalConfig = { audioEncoding: 'linear16', audioSampleRate: '16000', audioChannels: '1', // Mono for efficiency addPunctuation: 'true', };
// Audio preprocessing example function preprocessAudio(audioBuffer) { const processedBuffer = new Float32Array(audioBuffer.length); // 1. Normalize audio levels const maxValue = Math.max(...audioBuffer.map(Math.abs)); const normalizationFactor = maxValue > 0 ? 0.8 / maxValue : 1; // 2. Apply normalization and basic filtering for (let i = 0; i < audioBuffer.length; i++) { let sample = audioBuffer[i] * normalizationFactor; // Simple high-pass filter to reduce low-frequency noise if (i > 0) { sample = sample - 0.95 * processedBuffer[i - 1]; } processedBuffer[i] = sample; } return processedBuffer; } // Apply in audio processor processor.onaudioprocess = (e) => { const inputData = e.inputBuffer.getChannelData(0); const processedData = preprocessAudio(inputData); // Convert to Int16 and send const int16Data = new Int16Array(processedData.length); for (let i = 0; i < processedData.length; i++) { int16Data[i] = Math.max(-32768, Math.min(32767, processedData[i] * 32768)); } if (ws.readyState === WebSocket.OPEN) { ws.send(int16Data.buffer); } };
Connection Refused or Timeout
No Transcription Results
High Latency or Delays
speechEndThreshold