Non-Streaming Transcription using Python SDK

Use the official Python SDK to transcribe complete files without streaming.

import os
from smallestai.waves import WavesClient

api_key = os.getenv("SMALLEST_API_KEY")
client = WavesClient(api_key)

result = client.transcribe(
    file_path="path/to/audio.wav",
    language="en",
    word_timestamps=False,
    age_detection=False,
    gender_detection=False,
    emotion_detection=False
)
print(result)

ASR Streaming - WebSocket Code Examples

This guide provides complete working examples for implementing the Waves ASR WebSocket API across different platforms and use cases.

Server-Side Implementations

import asyncio
import websockets
import json

class FileStreamer:
    def __init__(self, api_key, audio_file):
        self.api_key = api_key
        self.audio_file = audio_file
        self.ws = None
        self.connected = False
        self.transcription_text = []
        
        self.audio_data = open(self.audio_file, 'rb').read()

    async def connect(self):
        base_url = "wss://waves-api.smallest.ai/api/v1/asr"
        
        self.params = {
            "audioLanguage": "en",
            "audioEncoding": "linear16",
            "audioSampleRate": "16000",
            "audioChannels": "1",
            "addPunctuation": "true",
            "api_key": self.api_key
        }
        
        query_string = "&".join([f"{key}={value}" for key, value in self.params.items()])
        url = f"{base_url}?{query_string}"
        
        print("🔌 Connecting...")
        
        self.ws = await websockets.connect(url)
        print("✅ Connected")
        self.connected = True
        self.listener_task = asyncio.create_task(self._listen_for_messages())

    async def _listen_for_messages(self):
        try:
            async for message in self.ws:
                response = json.loads(message)
                if "error" in response:
                    print(f"❌ Error: {response}")
                elif "text" in response:
                    end_marker = " [END]" if response.get("isEndOfTurn", False) else ""
                    print(f"📝 {response['text']}{end_marker}")
                    self.transcription_text.append(response["text"])
        except websockets.exceptions.ConnectionClosed:
            print("🔌 Connection closed")
            self.connected = False

    async def stream_file(self):      
        CHUNK_SIZE_S = 0.3 # in seconds
        
        chunk_size = int(16000 * 2 * CHUNK_SIZE_S) # change sample rate here
        
        while len(self.audio_data):
            chunk, self.audio_data = self.audio_data[:chunk_size], self.audio_data[chunk_size:]
            await self.ws.send(chunk)
            await asyncio.sleep(CHUNK_SIZE_MS)
    
        print("✅ Streaming complete")
        await self.ws.send(b'')

    async def get_transcription(self):
        return " ".join(self.transcription_text)

    async def close(self):
        if self.ws:
            await self.ws.close()

async def main():
    api_key = "SMALLEST_API_KEY"
    audio_file = "audio_path.wav"
    
    streamer = FileStreamer(api_key, audio_file)
    
    try:
        await streamer.connect()
        await streamer.stream_file()
        if streamer.listener_task:
            await streamer.listener_task
        
        full_text = await streamer.get_transcription()
        print(f"\n🎯 Complete Transcription:\n{full_text}")
        
    except Exception as error:
        print(f"❌ Error: {error}")
    finally:
        await streamer.close()

if __name__ == "__main__":
    asyncio.run(main())

JavaScript (Browser with Microphone)

Complete example for browser-based real-time transcription:

// Browser-based ASR with microphone input
let ws;
let audioContext;
let processor;
let source;
let stream;

async function startASR() {
    const apiKey = 'your-api-key';  // Replace with your API key
    const baseUrl = 'wss://waves-api.smallest.ai/api/v1/asr';
    
    // Configure parameters
    const params = new URLSearchParams({
        api_key: apiKey,
        audioEncoding: 'linear16',
        audioSampleRate: '16000',
        audioChannels: '1',
        addPunctuation: 'true',
        speechEndpointing: '300'
    });
    
    const url = `${baseUrl}?${params}`;
    ws = new WebSocket(url);

    ws.onopen = async () => {
        console.log('✅ Connected to ASR service');
        await setupMicrophone();
    };

    ws.onmessage = (event) => {
        try {
            const response = JSON.parse(event.data);
            handleTranscription(response);
        } catch (err) {
            console.error('❌ Parse error:', err);
        }
    };

    ws.onerror = (error) => {
        console.error('❌ WebSocket error:', error);
    };

    ws.onclose = (event) => {
        console.log(`🔌 Connection closed: ${event.code} - ${event.reason}`);
        stopASR();
    };
}

async function setupMicrophone() {
    try {
        // Request microphone access
        stream = await navigator.mediaDevices.getUserMedia({ 
            audio: {
                sampleRate: 16000,
                channelCount: 1,
                echoCancellation: true,
                noiseSuppression: true
            }
        });

        // Create audio context
        audioContext = new AudioContext({ sampleRate: 16000 });
        source = audioContext.createMediaStreamSource(stream);
        
        // Create audio processor
        processor = audioContext.createScriptProcessor(4096, 1, 1);
        
        processor.onaudioprocess = (e) => {
            if (ws.readyState === WebSocket.OPEN) {
                const inputData = e.inputBuffer.getChannelData(0);
                
                // Convert to 16-bit PCM
                const int16Data = new Int16Array(inputData.length);
                for (let i = 0; i < inputData.length; i++) {
                    int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
                }
                
                // Send audio data
                ws.send(int16Data.buffer);
            }
        };

        // Connect audio nodes
        source.connect(processor);
        processor.connect(audioContext.destination);
        
        console.log('🎤 Recording started. Speak now...');
        
    } catch (err) {
        console.error('❌ Microphone error:', err);
        alert('Microphone access required for ASR functionality');
    }
}

function handleTranscription(response) {
    console.log('📝 Response:', response);
    
    if (response.error) {
        console.error('❌ API Error:', response);
        return;
    }
    
    if (response.text) {
        const endOfTurn = response.isEndOfTurn ? ' [END_OF_TURN]' : '';
        console.log(`📝 ${response.text}${endOfTurn}`);
        updateTranscriptionDisplay(response.text);
    }
}

function updateTranscriptionDisplay(text) {
    const container = document.getElementById('transcription');
    if (!container) return;

    const finalDiv = document.createElement('div');
    finalDiv.className = 'final-transcription';
    finalDiv.textContent = text;
    container.appendChild(finalDiv);
}

function stopASR() {
    if (processor) {
        processor.disconnect();
        processor = null;
    }
    if (source) {
        source.disconnect();
        source = null;
    }
    if (audioContext) {
        audioContext.close();
        audioContext = null;
    }
    if (stream) {
        stream.getTracks().forEach(track => track.stop());
        stream = null;
    }
    if (ws) {
        ws.close();
        ws = null;
    }
    console.log('⏹️ ASR stopped');
}

// Usage
document.addEventListener('DOMContentLoaded', () => {
    const startBtn = document.getElementById('start-asr');
    const stopBtn = document.getElementById('stop-asr');
    
    startBtn?.addEventListener('click', startASR);
    stopBtn?.addEventListener('click', stopASR);
});

Introduction

Getting Started

Text to Speech

Speech to Text (Automatic Speech Recognition)

Voice Cloning

Integrations

Product

Best Practices

ASR Code Examples

Non-Streaming Transcription using Python SDK

ASR Streaming - WebSocket Code Examples

Server-Side Implementations

JavaScript (Browser with Microphone)

Introduction

Getting Started

Text to Speech

Speech to Text (Automatic Speech Recognition)

Voice Cloning

Integrations

Product

Best Practices

​Non-Streaming Transcription using Python SDK

​ASR Streaming - WebSocket Code Examples

​Server-Side Implementations

​JavaScript (Browser with Microphone)

Non-Streaming Transcription using Python SDK

ASR Streaming - WebSocket Code Examples

Server-Side Implementations

JavaScript (Browser with Microphone)