diff --git a/Cargo.toml b/Cargo.toml index 99096be..8329fb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = ["sys"] +exclude = ["examples/full_usage"] [package] name = "whisper-rs" diff --git a/examples/full_usage/2830-3980-0043.wav b/examples/full_usage/2830-3980-0043.wav new file mode 100644 index 0000000..d6b8463 Binary files /dev/null and b/examples/full_usage/2830-3980-0043.wav differ diff --git a/examples/full_usage/Cargo.toml b/examples/full_usage/Cargo.toml new file mode 100644 index 0000000..97020b0 --- /dev/null +++ b/examples/full_usage/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "full_usage" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +hound = "3" +whisper-rs = { path = "../.." } diff --git a/examples/full_usage/src/main.rs b/examples/full_usage/src/main.rs new file mode 100644 index 0000000..d7da7a9 --- /dev/null +++ b/examples/full_usage/src/main.rs @@ -0,0 +1,62 @@ +#![allow(clippy::uninlined_format_args)] + +use hound::{SampleFormat, WavReader}; +use std::path::Path; +use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; + +fn parse_wav_file(path: &Path) -> Vec { + let reader = WavReader::open(path).expect("failed to read file"); + + if reader.spec().channels != 1 { + panic!("expected mono audio file"); + } + if reader.spec().sample_format != SampleFormat::Int { + panic!("expected integer sample format"); + } + if reader.spec().sample_rate != 16000 { + panic!("expected 16KHz sample rate"); + } + if reader.spec().bits_per_sample != 16 { + panic!("expected 16 bits per sample"); + } + + reader + .into_samples::() + .map(|x| x.expect("sample")) + .collect::>() +} + +fn main() { + let arg1 = std::env::args() + .nth(1) + .expect("first argument should be path to WAV file"); + let audio_path = Path::new(&arg1); + if !audio_path.exists() && !audio_path.is_file() { + panic!("expected a file"); + } + let arg2 = std::env::args() + .nth(2) + .expect("second argument should be path to Whisper model"); + let whisper_path = Path::new(&arg2); + if !whisper_path.exists() && !whisper_path.is_file() { + panic!("expected a whisper directory") + } + + let original_samples = parse_wav_file(audio_path); + let samples = whisper_rs::convert_integer_to_float_audio(&original_samples); + + let mut ctx = + WhisperContext::new(&whisper_path.to_string_lossy()).expect("failed to open model"); + let params = FullParams::new(SamplingStrategy::default()); + + ctx.full(params, &samples) + .expect("failed to convert samples"); + + let num_segments = ctx.full_n_segments(); + for i in 0..num_segments { + let segment = ctx.full_get_segment_text(i).expect("failed to get segment"); + let start_timestamp = ctx.full_get_segment_t0(i); + let end_timestamp = ctx.full_get_segment_t1(i); + println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); + } +}