fastrepl · yujonglee · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 29, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/transcribe-whisper-local/src/service/streaming.rs b/crates/transcribe-whisper-local/src/service/streaming.rs
@@ -90,15 +90,18 @@ where
                 }
             };
 
+            let languages = params
+                .languages
+                .iter()
+                .filter_map(|lang| lang.clone().try_into().ok())
+                .collect::<Vec<hypr_whisper::Language>>();
+
+            let vocabulary = params.vocabulary.clone();
+
             let model = match hypr_whisper_local::Whisper::builder()
                 .model_path(model_path.to_str().unwrap())
-                .languages(
-                    params
-                        .languages
-                        .iter()
-                        .filter_map(|lang| lang.clone().try_into().ok())
-                        .collect::<Vec<hypr_whisper::Language>>(),
-                )
+                .languages(languages)
+                .vocabulary(vocabulary)
                 .build()
             {
                 Ok(model) => model,

diff --git a/crates/whisper-local/Cargo.toml b/crates/whisper-local/Cargo.toml
@@ -17,18 +17,23 @@ openmp = ["whisper-rs/openmp"]
 [dev-dependencies]
 hypr-data = { workspace = true }
 
+criterion = { workspace = true }
 dirs = { workspace = true }
 futures-util = { workspace = true }
 tokio = { workspace = true }
 
+[[bench]]
+name = "whisper_transcription"
+harness = false
+
 [dependencies]
 hypr-audio-utils = { workspace = true }
 hypr-whisper = { workspace = true }
 
 dasp = { workspace = true }
 kalosm-sound = { workspace = true, default-features = false }
 rodio = { workspace = true }
-whisper-rs = { version = "0.15.0", features = ["raw-api", "tracing_backend"] }
+whisper-rs = { git = "https://codeberg.org/tazz4843/whisper-rs", rev = "3e6d3da", features = ["raw-api", "tracing_backend"] }
 
 futures-util = { workspace = true }
 tracing = { workspace = true }
@@ -37,6 +42,7 @@ serde = { workspace = true }
 serde_json = { workspace = true }
 specta = { workspace = true, features = ["derive"] }
 thiserror = { workspace = true }
+trie-rs = "0.4.2"
 
 lazy_static = { workspace = true }
 regex = { workspace = true }
diff --git a/crates/whisper-local/benches/whisper_transcription.rs b/crates/whisper-local/benches/whisper_transcription.rs
@@ -0,0 +1,88 @@
+use std::hint::black_box;
+use std::time::Duration;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+use hypr_whisper::Language;
+use whisper_local::Whisper;
+
+fn benchmark_whisper_transcription(c: &mut Criterion) {
+    let audio: Vec<f32> = hypr_data::english_1::AUDIO
+        .chunks_exact(2)
+        .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]) as f32 / 32768.0)
+        .collect();
+
+    let model_path = concat!(env!("CARGO_MANIFEST_DIR"), "/model.bin");
+
+    let mut whisper_without_vocab = Whisper::builder()
+        .model_path(model_path)
+        .languages(vec![Language::En])
+        .build()
+        .unwrap();
+
+    let mut whisper_with_vocab = Whisper::builder()
+        .model_path(model_path)
+        .languages(vec![Language::En])
+        .vocabulary(
+            vec![
+                "profound",
+                "acquire",
+                "complementary",
+                "deeply",
+                "repositories",
+                "brilliant",
+                "pockets",
+                "thread",
+                "stumbling",
+                "stumble",
+                "communities",
+                "invested",
+                "undergrad",
+                "Googleable",
+                "exploring",
+                "neuroscientist",
+                "psychology",
+                "engineering",
+                "researcher",
+                "thinker",
+                "skill",
+                "invest",
+                "solved",
+                "entire",
+                "especially",
+                "actually",
+                "often",
+                "already",
+                "important",
+                "definitely",
+                "much",
+            ]
+            .into_iter()
+            .map(|s| s.into())
+            .collect(),
+        )
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("whisper_comparison");
+    group.measurement_time(Duration::from_secs(100));
+    group.sample_size(10);
+
+    group.bench_function("without_vocab", |b| {
+        b.iter(|| {
+            let segments = whisper_without_vocab.transcribe(black_box(&audio)).unwrap();
+            black_box(segments)
+        })
+    });
+
+    group.bench_function("with_vocab", |b| {
+        b.iter(|| {
+            let segments = whisper_with_vocab.transcribe(black_box(&audio)).unwrap();
+            black_box(segments)
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, benchmark_whisper_transcription);
+criterion_main!(benches);
diff --git a/crates/whisper-local/src/bias.rs b/crates/whisper-local/src/bias.rs
@@ -0,0 +1,93 @@
+use trie_rs::map::{Trie, TrieBuilder};
+use whisper_rs::{WhisperContext, WhisperTokenId};
+
+#[derive(Clone)]
+pub struct BiasTrie {
+    trie: Trie<WhisperTokenId, f32>,
+}
+
+impl BiasTrie {
+    pub fn new(ctx: &WhisperContext, custom_vocab: &[&str]) -> Result<Self, crate::Error> {
+        let mut builder = TrieBuilder::new();
+
+        for word in custom_vocab {
+            let variants = Self::generate_tokenization_variants(ctx, word)?;
+
+            for tokens in variants {
+                for i in 1..=tokens.len() {
+                    let progress = i as f32 / tokens.len() as f32;
+
+                    let prefix_bias = 10.0 + 90.0 * progress.powi(2);
+
+                    let prefix = &tokens[..i];
+                    builder.push(prefix, prefix_bias);
+                }
+            }
+        }
-        for word in custom_vocab {
-            let variants = Self::generate_tokenization_variants(ctx, word)?;
-
-            for tokens in variants {
-                for i in 1..=tokens.len() {
-                    let progress = i as f32 / tokens.len() as f32;
-
-                    let prefix_bias = 10.0 + 90.0 * progress.powi(2);
-
-                    let prefix = &tokens[..i];
-                    builder.push(prefix, prefix_bias);
-                }
-            }
-        }
+        // Deduplicate overlapping prefixes across all variants, keeping the max bias per prefix.
+        let mut acc: HashMap<Vec<WhisperTokenId>, f32> = HashMap::new();
+        for word in custom_vocab {
+            let variants = Self::generate_tokenization_variants(ctx, word)?;
+            for tokens in variants {
+                if tokens.is_empty() { continue; }
+                for i in 1..=tokens.len() {
+                    let progress = i as f32 / tokens.len() as f32;
+                    let bias = 10.0 + 90.0 * progress.powi(2);
+                    let key = tokens[..i].to_vec();
+                    acc.entry(key)
+                       .and_modify(|v| *v = v.max(bias))
+                       .or_insert(bias);
+                }
+            }
+        }
+        for (k, v) in acc {
+            builder.push(&k, v);
+        }
-        for word in custom_vocab {
-            let variants = Self::generate_tokenization_variants(ctx, word)?;
-
-            for tokens in variants {
-                for i in 1..=tokens.len() {
-                    let progress = i as f32 / tokens.len() as f32;
-
-                    let prefix_bias = 10.0 + 90.0 * progress.powi(2);
-
-                    let prefix = &tokens[..i];
-                    builder.push(prefix, prefix_bias);
-                }
-            }
-        }
+        // Deduplicate overlapping prefixes across all variants, keeping the max bias per prefix.
+        let mut acc: HashMap<Vec<WhisperTokenId>, f32> = HashMap::new();
+        for word in custom_vocab {
+            let variants = Self::generate_tokenization_variants(ctx, word)?;
+            for tokens in variants {
+                if tokens.is_empty() { continue; }
+                for i in 1..=tokens.len() {
+                    let progress = i as f32 / tokens.len() as f32;
+                    let bias = 10.0 + 90.0 * progress.powi(2);
+                    let key = tokens[..i].to_vec();
+                    acc.entry(key)
+                       .and_modify(|v| *v = v.max(bias))
+                       .or_insert(bias);
+                }
+            }
+        }
+        for (k, v) in acc {
+            builder.push(&k, v);
+        }
+
+        let trie = builder.build();
+        Ok(BiasTrie { trie })
+    }
+
+    fn generate_tokenization_variants(
+        ctx: &WhisperContext,
+        word: &str,
+    ) -> Result<Vec<Vec<WhisperTokenId>>, crate::Error> {
+        let mut variants = Vec::new();
+
+        variants.push(ctx.tokenize(word, 99)?);
+        variants.push(ctx.tokenize(&format!(" {}", word), 99)?);
+
+        let lower = word.to_lowercase();
+        if lower != word {
+            variants.push(ctx.tokenize(&lower, 99)?);
+            variants.push(ctx.tokenize(&format!(" {}", lower), 99)?);
+        }
+
+        let upper = word.to_uppercase();
+        if upper != word {
+            variants.push(ctx.tokenize(&upper, 99)?);
+        }
+
+        variants.push(ctx.tokenize(&format!("'{}", word), 99)?);
+        variants.push(ctx.tokenize(&format!("\"{}", word), 99)?);
+
+        Ok(variants)
+    }
+
+    pub unsafe fn apply_bias_to_logits(
+        &self,
+        tokens: *const whisper_rs::whisper_rs_sys::whisper_token_data,
+        n_tokens: std::os::raw::c_int,
+        logits: *mut f32,
+    ) {
+        if tokens.is_null() || n_tokens <= 0 {
+            return;
+        }
+
+        let current_tokens: Vec<WhisperTokenId> =
+            std::slice::from_raw_parts(tokens, n_tokens as usize)
+                .iter()
+                .map(|t| t.id)
+                .collect();
+
+        for suffix_len in 1..=std::cmp::min(10, current_tokens.len()) {
+            let suffix = &current_tokens[current_tokens.len() - suffix_len..];
+
+            for (full_sequence, bias_value_ref) in self.trie.predictive_search(suffix) {
+                let bias_value = *bias_value_ref;
+                let full_sequence: Vec<WhisperTokenId> = full_sequence;
+
+                if full_sequence.len() > suffix.len() {
+                    let next_token = full_sequence[suffix.len()];
+                    let current_logit = *logits.offset(next_token as isize);
+
+                    let boost = bias_value.ln() * 2.0;
+                    let new_logit = current_logit + boost;
+
+                    *logits.offset(next_token as isize) = new_logit;
+                }
+            }
+        }
+    }
+}
diff --git a/crates/whisper-local/src/lib.rs b/crates/whisper-local/src/lib.rs
@@ -9,6 +9,9 @@ pub use model::*;
 mod error;
 pub use error::*;
 
+mod bias;
+use bias::*;
+
 #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, specta::Type)]
 pub struct GgmlBackend {
     pub kind: String,