From c65bbca4e9b10f21fa138bc0da09a37a3682130d Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 12:45:09 +0000 Subject: [PATCH 01/18] feat: add katakana-to-IPA conversion module for TTS pronunciation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rule-based converter that handles: - Palatalized sounds (拗音): キョ→kʲo, チョ→t͡ɕo, ジョ→ʤo, etc. - Moraic nasal (撥音) assimilation: ン→m/ɲ/ŋ/n/ɴ based on context - Geminate consonants (促音): ッ doubles following onset - Long vowels: オウ→oː, オオ→oː, ー→vowelː Replaces 31 hardcoded IPA mappings in Cloud Functions tts.ts with systematic pattern-based generation from katakana input. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/domain.rs | 1 + stationapi/src/domain/ipa.rs | 550 +++++++++++++++++++++++++++++++++++ 2 files changed, 551 insertions(+) create mode 100644 stationapi/src/domain/ipa.rs diff --git a/stationapi/src/domain.rs b/stationapi/src/domain.rs index b1720a0d..87d9d08c 100644 --- a/stationapi/src/domain.rs +++ b/stationapi/src/domain.rs @@ -1,4 +1,5 @@ pub mod entity; pub mod error; +pub mod ipa; pub mod normalize; pub mod repository; diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs new file mode 100644 index 00000000..576d0193 --- /dev/null +++ b/stationapi/src/domain/ipa.rs @@ -0,0 +1,550 @@ +/// Katakana to IPA (International Phonetic Alphabet) conversion module. +/// +/// Converts Japanese katakana text to IPA transcription for use in +/// SSML `` tags for text-to-speech pronunciation. + +/// Convert a katakana string to its IPA transcription. +pub fn katakana_to_ipa(input: &str) -> String { + let chars: Vec = input.chars().collect(); + let len = chars.len(); + let mut result = Vec::new(); + let mut i = 0; + + while i < len { + // Try two-character combinations first (palatalized sounds: キョ, シャ, etc.) + if i + 1 < len { + if let Some(ipa) = lookup_digraph(chars[i], chars[i + 1]) { + result.push(ipa); + i += 2; + continue; + } + } + + // Single character lookup + if let Some(ipa) = lookup_single(chars[i]) { + result.push(ipa); + } + // Skip unknown characters silently + + i += 1; + } + + apply_phonological_rules(&result) +} + +/// Look up a two-character (digraph) combination. +/// Handles palatalized sounds (拗音): キャ, シュ, チョ, etc. +fn lookup_digraph(c1: char, c2: char) -> Option { + let ipa = match (c1, c2) { + // カ行拗音 + ('キ', 'ャ') => "kʲa", + ('キ', 'ュ') => "kʲɯ", + ('キ', 'ョ') => "kʲo", + // サ行拗音 (シ is already palatal) + ('シ', 'ャ') => "ɕa", + ('シ', 'ュ') => "ɕɯ", + ('シ', 'ョ') => "ɕo", + // タ行拗音 + ('チ', 'ャ') => "t͡ɕa", + ('チ', 'ュ') => "t͡ɕɯ", + ('チ', 'ョ') => "t͡ɕo", + // ナ行拗音 + ('ニ', 'ャ') => "ɲa", + ('ニ', 'ュ') => "ɲɯ", + ('ニ', 'ョ') => "ɲo", + // ハ行拗音 + ('ヒ', 'ャ') => "ça", + ('ヒ', 'ュ') => "çɯ", + ('ヒ', 'ョ') => "ço", + // マ行拗音 + ('ミ', 'ャ') => "mʲa", + ('ミ', 'ュ') => "mʲɯ", + ('ミ', 'ョ') => "mʲo", + // ラ行拗音 + ('リ', 'ャ') => "ɾʲa", + ('リ', 'ュ') => "ɾʲɯ", + ('リ', 'ョ') => "ɾʲo", + // ガ行拗音 + ('ギ', 'ャ') => "ɡʲa", + ('ギ', 'ュ') => "ɡʲɯ", + ('ギ', 'ョ') => "ɡʲo", + // ザ行拗音 (ジ is voiced postalveolar affricate) + ('ジ', 'ャ') => "dʑa", + ('ジ', 'ュ') => "dʑɯ", + ('ジ', 'ョ') => "ʤo", + // バ行拗音 + ('ビ', 'ャ') => "bʲa", + ('ビ', 'ュ') => "bʲɯ", + ('ビ', 'ョ') => "bʲo", + // ピ行拗音 + ('ピ', 'ャ') => "pʲa", + ('ピ', 'ュ') => "pʲɯ", + ('ピ', 'ョ') => "pʲo", + _ => return None, + }; + Some(Phoneme::Regular(ipa)) +} + +/// Look up a single katakana character. +fn lookup_single(c: char) -> Option { + let ipa = match c { + // 母音 + 'ア' | 'ァ' => return Some(Phoneme::Regular("a")), + 'イ' | 'ィ' => return Some(Phoneme::Regular("i")), + 'ウ' | 'ゥ' => return Some(Phoneme::Regular("ɯ")), + 'エ' | 'ェ' => return Some(Phoneme::Regular("e")), + 'オ' | 'ォ' => return Some(Phoneme::Regular("o")), + // カ行 + 'カ' => "ka", + 'キ' => "kʲi", + 'ク' => "kɯ", + 'ケ' => "ke", + 'コ' => "ko", + // サ行 + 'サ' => "sa", + 'シ' => "ɕi", + 'ス' => "sɯ", + 'セ' => "se", + 'ソ' => "so", + // タ行 + 'タ' => "ta", + 'チ' => "t͡ɕi", + 'ツ' => "t͡sɯ", + 'テ' => "te", + 'ト' => "to", + // ナ行 + 'ナ' => "na", + 'ニ' => "ɲi", + 'ヌ' => "nɯ", + 'ネ' => "ne", + 'ノ' => "no", + // ハ行 + 'ハ' => "ha", + 'ヒ' => "çi", + 'フ' => "ɸɯ", + 'ヘ' => "he", + 'ホ' => "ho", + // マ行 + 'マ' => "ma", + 'ミ' => "mi", + 'ム' => "mɯ", + 'メ' => "me", + 'モ' => "mo", + // ヤ行 + 'ヤ' | 'ャ' => "ja", + 'ユ' | 'ュ' => "jɯ", + 'ヨ' | 'ョ' => "jo", + // ラ行 + 'ラ' => "ɾa", + 'リ' => "ɾi", + 'ル' => "ɾɯ", + 'レ' => "ɾe", + 'ロ' => "ɾo", + // ワ行 + 'ワ' => "wa", + 'ヰ' => "i", + 'ヱ' => "e", + 'ヲ' => "o", + // ガ行 + 'ガ' => "ɡa", + 'ギ' => "ɡi", + 'グ' => "ɡɯ", + 'ゲ' => "ɡe", + 'ゴ' => "ɡo", + // ザ行 + 'ザ' => "za", + 'ジ' => "ʤi", + 'ズ' => "zɯ", + 'ゼ' => "ze", + 'ゾ' => "zo", + // ダ行 + 'ダ' => "da", + 'ヂ' => "dʑi", + 'ヅ' => "dzɯ", + 'デ' => "de", + 'ド' => "do", + // バ行 + 'バ' => "ba", + 'ビ' => "bi", + 'ブ' => "bɯ", + 'ベ' => "be", + 'ボ' => "bo", + // パ行 + 'パ' => "pa", + 'ピ' => "pi", + 'プ' => "pɯ", + 'ペ' => "pe", + 'ポ' => "po", + // 特殊 + 'ン' => return Some(Phoneme::MoraicNasal), + 'ッ' => return Some(Phoneme::Geminate), + 'ー' => return Some(Phoneme::LongVowel), + _ => return None, + }; + Some(Phoneme::Regular(ipa)) +} + +/// Intermediate phoneme representation before phonological rules are applied. +#[derive(Debug, Clone)] +enum Phoneme { + Regular(&'static str), + MoraicNasal, // ン - assimilates to following consonant + Geminate, // ッ - doubles following consonant + LongVowel, // ー - lengthens preceding vowel +} + +/// Extract the leading consonant cluster from an IPA string. +/// Returns (consonant_cluster, remainder) or None if starts with a vowel. +fn split_onset(ipa: &str) -> (&str, &str) { + // Find where the first vowel-like character starts + let vowel_start = ipa + .char_indices() + .find(|(_, c)| "aiɯeouəɐ".contains(*c)) + .map(|(i, _)| i) + .unwrap_or(ipa.len()); + ipa.split_at(vowel_start) +} + +/// Get the last vowel character from an IPA string for long vowel extension. +fn last_vowel(ipa: &str) -> Option<&'static str> { + for c in ipa.chars().rev() { + match c { + 'a' => return Some("a"), + 'i' => return Some("i"), + 'ɯ' => return Some("ɯ"), + 'e' => return Some("e"), + 'o' => return Some("o"), + 'u' => return Some("u"), + _ => continue, + } + } + None +} + +/// Classify the place of articulation of the following phoneme for ン assimilation. +fn nasal_for_following(next_ipa: &str) -> &'static str { + // Check first meaningful character(s) of the following phoneme + if next_ipa.starts_with('b') + || next_ipa.starts_with('p') + || next_ipa.starts_with('m') + { + "m" // bilabial assimilation + } else if next_ipa.starts_with('ɲ') + || next_ipa.starts_with("dʑ") + || next_ipa.starts_with('ʤ') + || next_ipa.starts_with('ɕ') + || next_ipa.starts_with("ɡʲ") + || next_ipa.starts_with("kʲ") + { + "ɲ" // palatal assimilation + } else if next_ipa.starts_with('k') + || next_ipa.starts_with('ɡ') + || next_ipa.starts_with('ŋ') + { + "ŋ" // velar assimilation + } else if next_ipa.starts_with('n') + || next_ipa.starts_with('t') + || next_ipa.starts_with('d') + || next_ipa.starts_with('s') + || next_ipa.starts_with('z') + || next_ipa.starts_with('ɾ') + { + "n" // alveolar assimilation (includes t͡ɕ, t͡s which start with t) + } else { + "ɴ" // default: uvular nasal (word-final or before vowels) + } +} + +/// Apply phonological rules: ン assimilation, ッ gemination, long vowels. +fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { + let mut output = String::new(); + let len = phonemes.len(); + let mut i = 0; + + while i < len { + match &phonemes[i] { + Phoneme::Regular(ipa) => { + output.push_str(ipa); + i += 1; + } + Phoneme::MoraicNasal => { + // Look ahead for assimilation + if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { + output.push_str(nasal_for_following(next_ipa)); + } else { + output.push_str("ɴ"); // word-final + } + i += 1; + } + Phoneme::Geminate => { + // Double the onset of the following consonant. + // For affricates (t͡ɕ, t͡s), only the stop portion (t) is geminated. + if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { + if next_ipa.starts_with("t͡ɕ") || next_ipa.starts_with("t͡s") || next_ipa.starts_with("d͡") { + output.push('t'); + } else { + let (onset, _) = split_onset(next_ipa); + if !onset.is_empty() { + output.push_str(onset); + } + } + } + i += 1; + } + Phoneme::LongVowel => { + // Lengthen the preceding vowel + if last_vowel(&output).is_some() { + // Check if already has ː + if !output.ends_with('ː') { + output.push('ː'); + } + } else { + output.push('ː'); + } + i += 1; + } + } + } + + // Apply long vowel contractions: オウ → oː pattern + apply_vowel_length(&output) +} + +/// Find the IPA string of the next Regular phoneme in the slice. +fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> { + phonemes.iter().find_map(|p| match p { + Phoneme::Regular(ipa) => Some(*ipa), + _ => None, + }) +} + +/// Apply vowel length rules for common Japanese patterns. +/// オウ → oː (after consonant+o), ョウ/ョオ patterns are handled by digraph + this. +fn apply_vowel_length(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let chars: Vec = input.chars().collect(); + let len = chars.len(); + let mut i = 0; + + while i < len { + if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'ɯ' { + // oɯ → oː (おう/こう pattern) + result.push('o'); + result.push('ː'); + i += 2; + continue; + } + if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' { + // oo → oː (おお pattern) + result.push('o'); + result.push('ː'); + i += 2; + continue; + } + result.push(chars[i]); + i += 1; + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + // Tests based on the hardcoded IPA mappings from Cloud Functions tts.ts + + #[test] + fn test_shibuya() { + assert_eq!(katakana_to_ipa("シブヤ"), "ɕibɯja"); + } + + #[test] + fn test_shinagawa() { + assert_eq!(katakana_to_ipa("シナガワ"), "ɕinaɡawa"); + } + + #[test] + fn test_ueno() { + assert_eq!(katakana_to_ipa("ウエノ"), "ɯeno"); + } + + #[test] + fn test_ikebukuro() { + assert_eq!(katakana_to_ipa("イケブクロ"), "ikebɯkɯɾo"); + } + + #[test] + fn test_shinjuku() { + // ン before ジュ → ɲ, ジュ → dʑɯ + assert_eq!(katakana_to_ipa("シンジュク"), "ɕiɲdʑɯkɯ"); + } + + #[test] + fn test_osaka() { + // オオ → oː + assert_eq!(katakana_to_ipa("オオサカ"), "oːsaka"); + } + + #[test] + fn test_kyoto() { + // キョウ → kʲoː (via kʲo + ウ → oɯ → oː) + assert_eq!(katakana_to_ipa("キョウト"), "kʲoːto"); + } + + #[test] + fn test_yokohama() { + assert_eq!(katakana_to_ipa("ヨコハマ"), "jokohama"); + } + + #[test] + fn test_chiba() { + assert_eq!(katakana_to_ipa("チバ"), "t͡ɕiba"); + } + + #[test] + fn test_kawasaki() { + assert_eq!(katakana_to_ipa("カワサキ"), "kawasakʲi"); + } + + #[test] + fn test_tsurumi() { + assert_eq!(katakana_to_ipa("ツルミ"), "t͡sɯɾɯmi"); + } + + #[test] + fn test_ryogoku() { + // リョウ → ɾʲoː (via ɾʲo + ウ → oɯ → oː) + assert_eq!(katakana_to_ipa("リョウゴク"), "ɾʲoːɡokɯ"); + } + + #[test] + fn test_shimbashi() { + // ン before バ → m + assert_eq!(katakana_to_ipa("シンバシ"), "ɕimbaɕi"); + } + + #[test] + fn test_keisei() { + assert_eq!(katakana_to_ipa("ケイセイ"), "keisei"); + } + + #[test] + fn test_oshiage() { + assert_eq!(katakana_to_ipa("オシアゲ"), "oɕiaɡe"); + } + + #[test] + fn test_meitetsu() { + // ツ is consistently t͡sɯ (affricate with tie bar) + assert_eq!(katakana_to_ipa("メイテツ"), "meitet͡sɯ"); + } + + #[test] + fn test_seibu() { + assert_eq!(katakana_to_ipa("セイブ"), "seibɯ"); + } + + #[test] + fn test_toride() { + assert_eq!(katakana_to_ipa("トリデ"), "toɾide"); + } + + #[test] + fn test_fukiage() { + assert_eq!(katakana_to_ipa("フキアゲ"), "ɸɯkʲiaɡe"); + } + + #[test] + fn test_fuse() { + assert_eq!(katakana_to_ipa("フセ"), "ɸɯse"); + } + + #[test] + fn test_inagekaigan() { + // ン at word end → ɴ + assert_eq!(katakana_to_ipa("イナゲカイガン"), "inaɡekaiɡaɴ"); + } + + #[test] + fn test_inage() { + assert_eq!(katakana_to_ipa("イナゲ"), "inaɡe"); + } + + #[test] + fn test_kire_uriwari() { + assert_eq!(katakana_to_ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); + } + + #[test] + fn test_yao() { + assert_eq!(katakana_to_ipa("ヤオ"), "jao"); + } + + #[test] + fn test_mejiro() { + assert_eq!(katakana_to_ipa("メジロ"), "meʤiɾo"); + } + + #[test] + fn test_isesaki() { + assert_eq!(katakana_to_ipa("イセサキ"), "isesakʲi"); + } + + #[test] + fn test_ube() { + assert_eq!(katakana_to_ipa("ウベ"), "ɯbe"); + } + + #[test] + fn test_itchome() { + // ッチョウ → tt͡ɕoː + assert_eq!(katakana_to_ipa("イッチョウメ"), "itt͡ɕoːme"); + } + + #[test] + fn test_sanchome() { + assert_eq!(katakana_to_ipa("サンチョウメ"), "sant͡ɕoːme"); + } + + #[test] + fn test_koen() { + // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ + // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct + assert_eq!(katakana_to_ipa("コウエン"), "koːeɴ"); + } + + #[test] + fn test_long_vowel_mark() { + // ー explicitly lengthens + assert_eq!(katakana_to_ipa("ラーメン"), "ɾaːmeɴ"); + } + + #[test] + fn test_tokyo() { + // トウキョウ: ト=to, ウ→oː, キョ=kʲo, ウ→oː + assert_eq!(katakana_to_ipa("トウキョウ"), "toːkʲoː"); + } + + #[test] + fn test_nagoya() { + assert_eq!(katakana_to_ipa("ナゴヤ"), "naɡoja"); + } + + #[test] + fn test_sapporo() { + // ッポ → ppo + assert_eq!(katakana_to_ipa("サッポロ"), "sappoɾo"); + } + + #[test] + fn test_namba() { + // ン before バ → m + assert_eq!(katakana_to_ipa("ナンバ"), "namba"); + } + + #[test] + fn test_empty() { + assert_eq!(katakana_to_ipa(""), ""); + } +} From 53254447fb70ad8756252a1aa6988bcb5ea80809 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:16:58 +0000 Subject: [PATCH 02/18] chore: update gRPCProto submodule with name_ipa fields Points to local branch adding optional name_ipa to Station and Line messages for TTS IPA transcription support. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index d97e808d..86c302c7 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit d97e808d6afa1dd010a7361fa852a995d7f0484b +Subproject commit 86c302c7174305de9ba514efc956787ca64c5fc5 From bde61c8598a81e8cd4347ab08b6d3ce1a6509a1f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:23:09 +0000 Subject: [PATCH 03/18] feat: wire up IPA generation in Station/Line gRPC responses Compute name_ipa from name_katakana using katakana_to_ipa() in: - Station DTO (From for GrpcStation) - Line DTO (From for GrpcLine) - StationMinimal construction in route queries The IPA field is optional and only set when the katakana input produces a non-empty transcription. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/use_case/dto/line.rs | 10 +++++++++- stationapi/src/use_case/dto/station.rs | 10 +++++++++- stationapi/src/use_case/interactor/query.rs | 5 +++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index 9f18aeee..dd9dfde4 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -1,10 +1,17 @@ use crate::{ - domain::entity::{gtfs::TransportType, line::Line}, + domain::{ + entity::{gtfs::TransportType, line::Line}, + ipa::katakana_to_ipa, + }, proto::{Line as GrpcLine, TransportType as GrpcTransportType}, }; impl From for GrpcLine { fn from(line: Line) -> Self { + let name_ipa = { + let ipa = katakana_to_ipa(&line.line_name_k); + if ipa.is_empty() { None } else { Some(ipa) } + }; // バス路線の場合は line_type を OtherLineType (0) に強制 // (鉄道用の line_type が誤って設定されている可能性があるため) let line_type = if line.transport_type == TransportType::Bus { @@ -32,6 +39,7 @@ impl From for GrpcLine { .map(|train_type| Box::new(train_type.into())), average_distance: line.average_distance.unwrap_or(0.0), transport_type: convert_transport_type(line.transport_type), + name_ipa, } } } diff --git a/stationapi/src/use_case/dto/station.rs b/stationapi/src/use_case/dto/station.rs index 44195a60..679953ed 100644 --- a/stationapi/src/use_case/dto/station.rs +++ b/stationapi/src/use_case/dto/station.rs @@ -1,5 +1,8 @@ use crate::{ - domain::entity::{gtfs::TransportType, station::Station}, + domain::{ + entity::{gtfs::TransportType, station::Station}, + ipa::katakana_to_ipa, + }, proto::{Station as GrpcStation, TransportType as GrpcTransportType}, }; @@ -14,6 +17,10 @@ impl From for i32 { impl From for GrpcStation { fn from(station: Station) -> Self { + let name_ipa = { + let ipa = katakana_to_ipa(&station.station_name_k); + if ipa.is_empty() { None } else { Some(ipa) } + }; Self { id: station.station_cd as u32, group_id: station.station_g_cd as u32, @@ -43,6 +50,7 @@ impl From for GrpcStation { has_train_types: Some(station.has_train_types), train_type: station.train_type.map(|tt| Box::new((*tt).into())), transport_type: station.transport_type.into(), + name_ipa, } } } diff --git a/stationapi/src/use_case/interactor/query.rs b/stationapi/src/use_case/interactor/query.rs index bcaafd23..7bf366de 100644 --- a/stationapi/src/use_case/interactor/query.rs +++ b/stationapi/src/use_case/interactor/query.rs @@ -834,6 +834,10 @@ where }) .collect(); + let name_ipa = { + let ipa = crate::domain::ipa::katakana_to_ipa(&row.station_name_k); + if ipa.is_empty() { None } else { Some(ipa) } + }; proto::StationMinimal { id: row.station_cd as u32, group_id: row.station_g_cd as u32, @@ -845,6 +849,7 @@ where stop_condition: row.pass.unwrap_or(0), has_train_types: Some(row.type_id.is_some()), train_type_id: row.type_id.map(|id| id as u32), + name_ipa, } }) .collect::>(); From 28d5446f11b2ea8fda4fe30a5daf8da1051ca255 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:24:03 +0000 Subject: [PATCH 04/18] chore: update gRPCProto submodule with StationMinimal name_ipa field https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index 86c302c7..9a6c2c45 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit 86c302c7174305de9ba514efc956787ca64c5fc5 +Subproject commit 9a6c2c4599a1f67dfbdce36eb009ad40fece2b21 From 7a7c6771c02b0f1561ffa3446e4790f7dd4949f3 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:28:14 +0000 Subject: [PATCH 05/18] fix: resolve clippy warnings and fmt issues in ipa module https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/domain/ipa.rs | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 576d0193..dbe31ae4 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -1,8 +1,3 @@ -/// Katakana to IPA (International Phonetic Alphabet) conversion module. -/// -/// Converts Japanese katakana text to IPA transcription for use in -/// SSML `` tags for text-to-speech pronunciation. - /// Convert a katakana string to its IPA transcription. pub fn katakana_to_ipa(input: &str) -> String { let chars: Vec = input.chars().collect(); @@ -188,8 +183,8 @@ fn lookup_single(c: char) -> Option { #[derive(Debug, Clone)] enum Phoneme { Regular(&'static str), - MoraicNasal, // ン - assimilates to following consonant - Geminate, // ッ - doubles following consonant + MoraicNasal, // ン - assimilates to following consonant + Geminate, // ッ - doubles following consonant LongVowel, // ー - lengthens preceding vowel } @@ -224,10 +219,7 @@ fn last_vowel(ipa: &str) -> Option<&'static str> { /// Classify the place of articulation of the following phoneme for ン assimilation. fn nasal_for_following(next_ipa: &str) -> &'static str { // Check first meaningful character(s) of the following phoneme - if next_ipa.starts_with('b') - || next_ipa.starts_with('p') - || next_ipa.starts_with('m') - { + if next_ipa.starts_with('b') || next_ipa.starts_with('p') || next_ipa.starts_with('m') { "m" // bilabial assimilation } else if next_ipa.starts_with('ɲ') || next_ipa.starts_with("dʑ") @@ -237,9 +229,7 @@ fn nasal_for_following(next_ipa: &str) -> &'static str { || next_ipa.starts_with("kʲ") { "ɲ" // palatal assimilation - } else if next_ipa.starts_with('k') - || next_ipa.starts_with('ɡ') - || next_ipa.starts_with('ŋ') + } else if next_ipa.starts_with('k') || next_ipa.starts_with('ɡ') || next_ipa.starts_with('ŋ') { "ŋ" // velar assimilation } else if next_ipa.starts_with('n') @@ -272,7 +262,7 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { output.push_str(nasal_for_following(next_ipa)); } else { - output.push_str("ɴ"); // word-final + output.push('ɴ'); // word-final } i += 1; } @@ -280,7 +270,10 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { // Double the onset of the following consonant. // For affricates (t͡ɕ, t͡s), only the stop portion (t) is geminated. if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { - if next_ipa.starts_with("t͡ɕ") || next_ipa.starts_with("t͡s") || next_ipa.starts_with("d͡") { + if next_ipa.starts_with("t͡ɕ") + || next_ipa.starts_with("t͡s") + || next_ipa.starts_with("d͡") + { output.push('t'); } else { let (onset, _) = split_onset(next_ipa); From 206a36ac95338c3c55e7bcf4dd01fedde19dc8e4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:28:38 +0000 Subject: [PATCH 06/18] style: apply cargo fmt to DTO files https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/use_case/dto/line.rs | 6 +++++- stationapi/src/use_case/dto/station.rs | 6 +++++- stationapi/src/use_case/interactor/query.rs | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index dd9dfde4..bfd94912 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -10,7 +10,11 @@ impl From for GrpcLine { fn from(line: Line) -> Self { let name_ipa = { let ipa = katakana_to_ipa(&line.line_name_k); - if ipa.is_empty() { None } else { Some(ipa) } + if ipa.is_empty() { + None + } else { + Some(ipa) + } }; // バス路線の場合は line_type を OtherLineType (0) に強制 // (鉄道用の line_type が誤って設定されている可能性があるため) diff --git a/stationapi/src/use_case/dto/station.rs b/stationapi/src/use_case/dto/station.rs index 679953ed..c6fa1b79 100644 --- a/stationapi/src/use_case/dto/station.rs +++ b/stationapi/src/use_case/dto/station.rs @@ -19,7 +19,11 @@ impl From for GrpcStation { fn from(station: Station) -> Self { let name_ipa = { let ipa = katakana_to_ipa(&station.station_name_k); - if ipa.is_empty() { None } else { Some(ipa) } + if ipa.is_empty() { + None + } else { + Some(ipa) + } }; Self { id: station.station_cd as u32, diff --git a/stationapi/src/use_case/interactor/query.rs b/stationapi/src/use_case/interactor/query.rs index 7bf366de..98d0a329 100644 --- a/stationapi/src/use_case/interactor/query.rs +++ b/stationapi/src/use_case/interactor/query.rs @@ -836,7 +836,11 @@ where let name_ipa = { let ipa = crate::domain::ipa::katakana_to_ipa(&row.station_name_k); - if ipa.is_empty() { None } else { Some(ipa) } + if ipa.is_empty() { + None + } else { + Some(ipa) + } }; proto::StationMinimal { id: row.station_cd as u32, From d81ce84b625425e1ab951830cea4f6f99b05d6ad Mon Sep 17 00:00:00 2001 From: Tsubasa SEKIGUCHI Date: Thu, 5 Mar 2026 13:41:32 +0000 Subject: [PATCH 07/18] bump proto submod --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index 9a6c2c45..f05b09f3 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit 9a6c2c4599a1f67dfbdce36eb009ad40fece2b21 +Subproject commit f05b09f37213515a3a7d79d16d714b9c61984e5a From 5c3285bcbafab121976ab24eaa932d451c776c1e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:42:43 +0000 Subject: [PATCH 08/18] chore: sync proto submodule pointer --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index f05b09f3..9a6c2c45 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit f05b09f37213515a3a7d79d16d714b9c61984e5a +Subproject commit 9a6c2c4599a1f67dfbdce36eb009ad40fece2b21 From d6c27a157e800ff7028ac7ddc929d9fcc4f2dda2 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:44:49 +0000 Subject: [PATCH 09/18] fix: improve IPA phonological rules for palatal nasal and voiced affricate gemination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 'j' and 'ç' to palatal assimilation check in nasal_for_following so ン before ヤ行/ヒ行 correctly produces ɲ (e.g. シンヨコハマ → ɕiɲjokohama) - Fix sokuon (ッ) handling for voiced affricates: replace dead d͡ check with actual mapped symbols dʑ/ʤ, geminating with 'd' instead of 't' --- stationapi/src/domain/ipa.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index dbe31ae4..1c3769d2 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -227,6 +227,8 @@ fn nasal_for_following(next_ipa: &str) -> &'static str { || next_ipa.starts_with('ɕ') || next_ipa.starts_with("ɡʲ") || next_ipa.starts_with("kʲ") + || next_ipa.starts_with('j') + || next_ipa.starts_with('ç') { "ɲ" // palatal assimilation } else if next_ipa.starts_with('k') || next_ipa.starts_with('ɡ') || next_ipa.starts_with('ŋ') @@ -270,11 +272,10 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { // Double the onset of the following consonant. // For affricates (t͡ɕ, t͡s), only the stop portion (t) is geminated. if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { - if next_ipa.starts_with("t͡ɕ") - || next_ipa.starts_with("t͡s") - || next_ipa.starts_with("d͡") - { + if next_ipa.starts_with("t͡ɕ") || next_ipa.starts_with("t͡s") { output.push('t'); + } else if next_ipa.starts_with("dʑ") || next_ipa.starts_with("ʤ") { + output.push('d'); } else { let (onset, _) = split_onset(next_ipa); if !onset.is_empty() { @@ -536,6 +537,12 @@ mod tests { assert_eq!(katakana_to_ipa("ナンバ"), "namba"); } + #[test] + fn test_shin_yokohama() { + // ン before ヨ(j) → ɲ (palatal assimilation) + assert_eq!(katakana_to_ipa("シンヨコハマ"), "ɕiɲjokohama"); + } + #[test] fn test_empty() { assert_eq!(katakana_to_ipa(""), ""); From b73c82abc614302abe17302581dee1d6ad735f6e Mon Sep 17 00:00:00 2001 From: Tsubasa SEKIGUCHI Date: Thu, 5 Mar 2026 13:45:08 +0000 Subject: [PATCH 10/18] bump proto submod --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index 9a6c2c45..f05b09f3 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit 9a6c2c4599a1f67dfbdce36eb009ad40fece2b21 +Subproject commit f05b09f37213515a3a7d79d16d714b9c61984e5a From 1be95340a0d34800cec1d79c4b98734ec25b5889 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:52:52 +0000 Subject: [PATCH 11/18] =?UTF-8?q?test:=20add=20regression=20tests=20for=20?= =?UTF-8?q?voiced=20affricate=20gemination=20(=E3=83=83=E3=82=B8/=E3=83=83?= =?UTF-8?q?=E3=82=B8=E3=83=A5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stationapi/src/domain/ipa.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 1c3769d2..bc10a930 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -543,6 +543,18 @@ mod tests { assert_eq!(katakana_to_ipa("シンヨコハマ"), "ɕiɲjokohama"); } + #[test] + fn test_geminate_ji() { + // ッジ → dʤi (voiced affricate gemination emits 'd') + assert_eq!(katakana_to_ipa("カッジ"), "kadʤi"); + } + + #[test] + fn test_geminate_ju() { + // ッジュ → ddʑɯ (voiced affricate gemination with digraph) + assert_eq!(katakana_to_ipa("カッジュ"), "kaddʑɯ"); + } + #[test] fn test_empty() { assert_eq!(katakana_to_ipa(""), ""); From c9978fafba1dc36a3485b589d59adbd0a72baf20 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:53:32 +0000 Subject: [PATCH 12/18] chore: sync proto submodule pointer --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index f05b09f3..9a6c2c45 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit f05b09f37213515a3a7d79d16d714b9c61984e5a +Subproject commit 9a6c2c4599a1f67dfbdce36eb009ad40fece2b21 From 42bd211aa5560a2b82441335700b943ef38f0527 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 13:54:08 +0000 Subject: [PATCH 13/18] revert: restore proto submodule pointer --- stationapi/proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/proto b/stationapi/proto index 9a6c2c45..f05b09f3 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit 9a6c2c4599a1f67dfbdce36eb009ad40fece2b21 +Subproject commit f05b09f37213515a3a7d79d16d714b9c61984e5a From c964915a0c8afec31c587db4915067fb471f8b11 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 14:07:06 +0000 Subject: [PATCH 14/18] fix(ipa): return None on unknown characters, fix geminate palatalized onsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change katakana_to_ipa to return Option, returning None when input contains characters that cannot be converted instead of silently dropping them. - Fix geminate (ッ) handling for palatalized onsets: only duplicate the base consonant (e.g., ッキョ → kkʲo) instead of the full onset including palatalization marker (was producing kʲkʲo). - Update all call sites to use the new Option return type. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/domain/ipa.rs | 121 ++++++++++++-------- stationapi/src/use_case/dto/line.rs | 10 +- stationapi/src/use_case/dto/station.rs | 10 +- stationapi/src/use_case/interactor/query.rs | 10 +- 4 files changed, 80 insertions(+), 71 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index bc10a930..2e494847 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -1,5 +1,10 @@ /// Convert a katakana string to its IPA transcription. -pub fn katakana_to_ipa(input: &str) -> String { +/// Returns `None` if the input contains characters that cannot be converted. +pub fn katakana_to_ipa(input: &str) -> Option { + if input.is_empty() { + return Some(String::new()); + } + let chars: Vec = input.chars().collect(); let len = chars.len(); let mut result = Vec::new(); @@ -15,16 +20,13 @@ pub fn katakana_to_ipa(input: &str) -> String { } } - // Single character lookup - if let Some(ipa) = lookup_single(chars[i]) { - result.push(ipa); - } - // Skip unknown characters silently + // Single character lookup — return None on unknown characters + result.push(lookup_single(chars[i])?); i += 1; } - apply_phonological_rules(&result) + Some(apply_phonological_rules(&result)) } /// Look up a two-character (digraph) combination. @@ -200,6 +202,12 @@ fn split_onset(ipa: &str) -> (&str, &str) { ipa.split_at(vowel_start) } +/// Strip secondary articulation markers (e.g., palatalization ʲ) from an onset, +/// returning only the base consonant(s). +fn strip_secondary_articulation(onset: &str) -> String { + onset.replace('ʲ', "") +} + /// Get the last vowel character from an IPA string for long vowel extension. fn last_vowel(ipa: &str) -> Option<&'static str> { for c in ipa.chars().rev() { @@ -271,6 +279,7 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { Phoneme::Geminate => { // Double the onset of the following consonant. // For affricates (t͡ɕ, t͡s), only the stop portion (t) is geminated. + // For palatalized onsets (kʲ, ɡʲ, etc.), only the base consonant is geminated. if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { if next_ipa.starts_with("t͡ɕ") || next_ipa.starts_with("t͡s") { output.push('t'); @@ -279,7 +288,8 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { } else { let (onset, _) = split_onset(next_ipa); if !onset.is_empty() { - output.push_str(onset); + let base = strip_secondary_articulation(onset); + output.push_str(&base); } } } @@ -346,217 +356,234 @@ fn apply_vowel_length(input: &str) -> String { mod tests { use super::*; + /// Helper: unwrap the Option for concise test assertions. + fn ipa(input: &str) -> String { + katakana_to_ipa(input).expect("expected valid katakana input") + } + // Tests based on the hardcoded IPA mappings from Cloud Functions tts.ts #[test] fn test_shibuya() { - assert_eq!(katakana_to_ipa("シブヤ"), "ɕibɯja"); + assert_eq!(ipa("シブヤ"), "ɕibɯja"); } #[test] fn test_shinagawa() { - assert_eq!(katakana_to_ipa("シナガワ"), "ɕinaɡawa"); + assert_eq!(ipa("シナガワ"), "ɕinaɡawa"); } #[test] fn test_ueno() { - assert_eq!(katakana_to_ipa("ウエノ"), "ɯeno"); + assert_eq!(ipa("ウエノ"), "ɯeno"); } #[test] fn test_ikebukuro() { - assert_eq!(katakana_to_ipa("イケブクロ"), "ikebɯkɯɾo"); + assert_eq!(ipa("イケブクロ"), "ikebɯkɯɾo"); } #[test] fn test_shinjuku() { // ン before ジュ → ɲ, ジュ → dʑɯ - assert_eq!(katakana_to_ipa("シンジュク"), "ɕiɲdʑɯkɯ"); + assert_eq!(ipa("シンジュク"), "ɕiɲdʑɯkɯ"); } #[test] fn test_osaka() { // オオ → oː - assert_eq!(katakana_to_ipa("オオサカ"), "oːsaka"); + assert_eq!(ipa("オオサカ"), "oːsaka"); } #[test] fn test_kyoto() { // キョウ → kʲoː (via kʲo + ウ → oɯ → oː) - assert_eq!(katakana_to_ipa("キョウト"), "kʲoːto"); + assert_eq!(ipa("キョウト"), "kʲoːto"); } #[test] fn test_yokohama() { - assert_eq!(katakana_to_ipa("ヨコハマ"), "jokohama"); + assert_eq!(ipa("ヨコハマ"), "jokohama"); } #[test] fn test_chiba() { - assert_eq!(katakana_to_ipa("チバ"), "t͡ɕiba"); + assert_eq!(ipa("チバ"), "t͡ɕiba"); } #[test] fn test_kawasaki() { - assert_eq!(katakana_to_ipa("カワサキ"), "kawasakʲi"); + assert_eq!(ipa("カワサキ"), "kawasakʲi"); } #[test] fn test_tsurumi() { - assert_eq!(katakana_to_ipa("ツルミ"), "t͡sɯɾɯmi"); + assert_eq!(ipa("ツルミ"), "t͡sɯɾɯmi"); } #[test] fn test_ryogoku() { // リョウ → ɾʲoː (via ɾʲo + ウ → oɯ → oː) - assert_eq!(katakana_to_ipa("リョウゴク"), "ɾʲoːɡokɯ"); + assert_eq!(ipa("リョウゴク"), "ɾʲoːɡokɯ"); } #[test] fn test_shimbashi() { // ン before バ → m - assert_eq!(katakana_to_ipa("シンバシ"), "ɕimbaɕi"); + assert_eq!(ipa("シンバシ"), "ɕimbaɕi"); } #[test] fn test_keisei() { - assert_eq!(katakana_to_ipa("ケイセイ"), "keisei"); + assert_eq!(ipa("ケイセイ"), "keisei"); } #[test] fn test_oshiage() { - assert_eq!(katakana_to_ipa("オシアゲ"), "oɕiaɡe"); + assert_eq!(ipa("オシアゲ"), "oɕiaɡe"); } #[test] fn test_meitetsu() { // ツ is consistently t͡sɯ (affricate with tie bar) - assert_eq!(katakana_to_ipa("メイテツ"), "meitet͡sɯ"); + assert_eq!(ipa("メイテツ"), "meitet͡sɯ"); } #[test] fn test_seibu() { - assert_eq!(katakana_to_ipa("セイブ"), "seibɯ"); + assert_eq!(ipa("セイブ"), "seibɯ"); } #[test] fn test_toride() { - assert_eq!(katakana_to_ipa("トリデ"), "toɾide"); + assert_eq!(ipa("トリデ"), "toɾide"); } #[test] fn test_fukiage() { - assert_eq!(katakana_to_ipa("フキアゲ"), "ɸɯkʲiaɡe"); + assert_eq!(ipa("フキアゲ"), "ɸɯkʲiaɡe"); } #[test] fn test_fuse() { - assert_eq!(katakana_to_ipa("フセ"), "ɸɯse"); + assert_eq!(ipa("フセ"), "ɸɯse"); } #[test] fn test_inagekaigan() { // ン at word end → ɴ - assert_eq!(katakana_to_ipa("イナゲカイガン"), "inaɡekaiɡaɴ"); + assert_eq!(ipa("イナゲカイガン"), "inaɡekaiɡaɴ"); } #[test] fn test_inage() { - assert_eq!(katakana_to_ipa("イナゲ"), "inaɡe"); + assert_eq!(ipa("イナゲ"), "inaɡe"); } #[test] fn test_kire_uriwari() { - assert_eq!(katakana_to_ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); + assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); } #[test] fn test_yao() { - assert_eq!(katakana_to_ipa("ヤオ"), "jao"); + assert_eq!(ipa("ヤオ"), "jao"); } #[test] fn test_mejiro() { - assert_eq!(katakana_to_ipa("メジロ"), "meʤiɾo"); + assert_eq!(ipa("メジロ"), "meʤiɾo"); } #[test] fn test_isesaki() { - assert_eq!(katakana_to_ipa("イセサキ"), "isesakʲi"); + assert_eq!(ipa("イセサキ"), "isesakʲi"); } #[test] fn test_ube() { - assert_eq!(katakana_to_ipa("ウベ"), "ɯbe"); + assert_eq!(ipa("ウベ"), "ɯbe"); } #[test] fn test_itchome() { // ッチョウ → tt͡ɕoː - assert_eq!(katakana_to_ipa("イッチョウメ"), "itt͡ɕoːme"); + assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme"); } #[test] fn test_sanchome() { - assert_eq!(katakana_to_ipa("サンチョウメ"), "sant͡ɕoːme"); + assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme"); } #[test] fn test_koen() { // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct - assert_eq!(katakana_to_ipa("コウエン"), "koːeɴ"); + assert_eq!(ipa("コウエン"), "koːeɴ"); } #[test] fn test_long_vowel_mark() { // ー explicitly lengthens - assert_eq!(katakana_to_ipa("ラーメン"), "ɾaːmeɴ"); + assert_eq!(ipa("ラーメン"), "ɾaːmeɴ"); } #[test] fn test_tokyo() { // トウキョウ: ト=to, ウ→oː, キョ=kʲo, ウ→oː - assert_eq!(katakana_to_ipa("トウキョウ"), "toːkʲoː"); + assert_eq!(ipa("トウキョウ"), "toːkʲoː"); } #[test] fn test_nagoya() { - assert_eq!(katakana_to_ipa("ナゴヤ"), "naɡoja"); + assert_eq!(ipa("ナゴヤ"), "naɡoja"); } #[test] fn test_sapporo() { // ッポ → ppo - assert_eq!(katakana_to_ipa("サッポロ"), "sappoɾo"); + assert_eq!(ipa("サッポロ"), "sappoɾo"); } #[test] fn test_namba() { // ン before バ → m - assert_eq!(katakana_to_ipa("ナンバ"), "namba"); + assert_eq!(ipa("ナンバ"), "namba"); } #[test] fn test_shin_yokohama() { // ン before ヨ(j) → ɲ (palatal assimilation) - assert_eq!(katakana_to_ipa("シンヨコハマ"), "ɕiɲjokohama"); + assert_eq!(ipa("シンヨコハマ"), "ɕiɲjokohama"); } #[test] fn test_geminate_ji() { // ッジ → dʤi (voiced affricate gemination emits 'd') - assert_eq!(katakana_to_ipa("カッジ"), "kadʤi"); + assert_eq!(ipa("カッジ"), "kadʤi"); } #[test] fn test_geminate_ju() { // ッジュ → ddʑɯ (voiced affricate gemination with digraph) - assert_eq!(katakana_to_ipa("カッジュ"), "kaddʑɯ"); + assert_eq!(ipa("カッジュ"), "kaddʑɯ"); } #[test] fn test_empty() { - assert_eq!(katakana_to_ipa(""), ""); + assert_eq!(katakana_to_ipa(""), Some(String::new())); + } + + #[test] + fn test_unknown_characters_returns_none() { + assert_eq!(katakana_to_ipa("ABC"), None); + assert_eq!(katakana_to_ipa("シブヤX"), None); + } + + #[test] + fn test_geminate_palatalized() { + // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ') + assert_eq!(ipa("ニッキョウ"), "ɲikkʲoː"); } } diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index bfd94912..42fac2b0 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -8,14 +8,8 @@ use crate::{ impl From for GrpcLine { fn from(line: Line) -> Self { - let name_ipa = { - let ipa = katakana_to_ipa(&line.line_name_k); - if ipa.is_empty() { - None - } else { - Some(ipa) - } - }; + let name_ipa = + katakana_to_ipa(&line.line_name_k).filter(|ipa| !ipa.is_empty()); // バス路線の場合は line_type を OtherLineType (0) に強制 // (鉄道用の line_type が誤って設定されている可能性があるため) let line_type = if line.transport_type == TransportType::Bus { diff --git a/stationapi/src/use_case/dto/station.rs b/stationapi/src/use_case/dto/station.rs index c6fa1b79..128c1742 100644 --- a/stationapi/src/use_case/dto/station.rs +++ b/stationapi/src/use_case/dto/station.rs @@ -17,14 +17,8 @@ impl From for i32 { impl From for GrpcStation { fn from(station: Station) -> Self { - let name_ipa = { - let ipa = katakana_to_ipa(&station.station_name_k); - if ipa.is_empty() { - None - } else { - Some(ipa) - } - }; + let name_ipa = + katakana_to_ipa(&station.station_name_k).filter(|ipa| !ipa.is_empty()); Self { id: station.station_cd as u32, group_id: station.station_g_cd as u32, diff --git a/stationapi/src/use_case/interactor/query.rs b/stationapi/src/use_case/interactor/query.rs index 98d0a329..ee3f30b9 100644 --- a/stationapi/src/use_case/interactor/query.rs +++ b/stationapi/src/use_case/interactor/query.rs @@ -834,14 +834,8 @@ where }) .collect(); - let name_ipa = { - let ipa = crate::domain::ipa::katakana_to_ipa(&row.station_name_k); - if ipa.is_empty() { - None - } else { - Some(ipa) - } - }; + let name_ipa = crate::domain::ipa::katakana_to_ipa(&row.station_name_k) + .filter(|ipa| !ipa.is_empty()); proto::StationMinimal { id: row.station_cd as u32, group_id: row.station_g_cd as u32, From c2bc39fa83c81c9d2b0df8031d973bb9c36d768d Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 14:08:53 +0000 Subject: [PATCH 15/18] style: fix rustfmt formatting in dto files https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/use_case/dto/line.rs | 3 +-- stationapi/src/use_case/dto/station.rs | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index 42fac2b0..f94c3d48 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -8,8 +8,7 @@ use crate::{ impl From for GrpcLine { fn from(line: Line) -> Self { - let name_ipa = - katakana_to_ipa(&line.line_name_k).filter(|ipa| !ipa.is_empty()); + let name_ipa = katakana_to_ipa(&line.line_name_k).filter(|ipa| !ipa.is_empty()); // バス路線の場合は line_type を OtherLineType (0) に強制 // (鉄道用の line_type が誤って設定されている可能性があるため) let line_type = if line.transport_type == TransportType::Bus { diff --git a/stationapi/src/use_case/dto/station.rs b/stationapi/src/use_case/dto/station.rs index 128c1742..6e52cc19 100644 --- a/stationapi/src/use_case/dto/station.rs +++ b/stationapi/src/use_case/dto/station.rs @@ -17,8 +17,7 @@ impl From for i32 { impl From for GrpcStation { fn from(station: Station) -> Self { - let name_ipa = - katakana_to_ipa(&station.station_name_k).filter(|ipa| !ipa.is_empty()); + let name_ipa = katakana_to_ipa(&station.station_name_k).filter(|ipa| !ipa.is_empty()); Self { id: station.station_cd as u32, group_id: station.station_g_cd as u32, From 2ca98e615bd6461e03dbae6202d160c306fa0b8c Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 14:15:11 +0000 Subject: [PATCH 16/18] =?UTF-8?q?fix:=20use=20consistent=20IPA=20symbol=20?= =?UTF-8?q?d=CA=91=20for=20=E3=82=B8=E8=A1=8C=E6=8B=97=E9=9F=B3=20mappings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ('ジ','ョ') was using "ʤo" while ('ジ','ャ') and ('ジ','ュ') used "dʑ". Updated to "dʑo" for consistency. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/domain/ipa.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 2e494847..667e7f0a 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -68,7 +68,7 @@ fn lookup_digraph(c1: char, c2: char) -> Option { // ザ行拗音 (ジ is voiced postalveolar affricate) ('ジ', 'ャ') => "dʑa", ('ジ', 'ュ') => "dʑɯ", - ('ジ', 'ョ') => "ʤo", + ('ジ', 'ョ') => "dʑo", // バ行拗音 ('ビ', 'ャ') => "bʲa", ('ビ', 'ュ') => "bʲɯ", From c75b6a1b8db514ad2a195fa5a5bbf0934c5d93e8 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 14:24:24 +0000 Subject: [PATCH 17/18] docs: fix split_onset comment to match return type (&str, &str) The comment said "or None if starts with a vowel" but the function returns (&str, &str), with an empty onset when the string starts with a vowel. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/domain/ipa.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 667e7f0a..24ece7ad 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -191,7 +191,7 @@ enum Phoneme { } /// Extract the leading consonant cluster from an IPA string. -/// Returns (consonant_cluster, remainder) or None if starts with a vowel. +/// Returns (onset, remainder). If the string starts with a vowel, onset is "". fn split_onset(ipa: &str) -> (&str, &str) { // Find where the first vowel-like character starts let vowel_start = ipa From c7e7767085d52c20f31d7e430980582a7585a1ac Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 14:32:53 +0000 Subject: [PATCH 18/18] fix: geminate only the first base consonant for multi-char onsets For onsets like "dz", strip_secondary_articulation returns the full string, causing "dzdz..." duplication. Now only the leading character of the base is pushed, matching the intended gemination behavior. https://claude.ai/code/session_01Pjo9E2fzdLZEkvNqxXAPeQ --- stationapi/src/domain/ipa.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 24ece7ad..66f406cc 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -289,7 +289,9 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { let (onset, _) = split_onset(next_ipa); if !onset.is_empty() { let base = strip_secondary_articulation(onset); - output.push_str(&base); + if let Some(c) = base.chars().next() { + output.push(c); + } } } }