import os
from jiwer import wer, cer
from whisper_normalizer.english import EnglishTextNormalizer
from smallestai.waves import WavesClient
client = WavesClient(api_key=os.environ["SMALLEST_AI_API_KEY"])
normalizer = EnglishTextNormalizer()
def run_sample(sample):
response = client.transcribe(
audio_file=sample["audio"],
language=sample["language"],
word_timestamps=True,
diarize=True
)
ref = normalizer(sample["reference"])
hyp = normalizer(response.transcription)
return {
"path": sample["audio"],
"wer": wer(ref, hyp),
"cer": cer(ref, hyp),
"latency_ms": response.metrics["latency_ms"],
"rtf": response.metrics["real_time_factor"],
"transcription": response.transcription
}