diff --git a/stationapi/proto b/stationapi/proto index d97e808d..f05b09f3 160000 --- a/stationapi/proto +++ b/stationapi/proto @@ -1 +1 @@ -Subproject commit d97e808d6afa1dd010a7361fa852a995d7f0484b +Subproject commit f05b09f37213515a3a7d79d16d714b9c61984e5a diff --git a/stationapi/src/domain.rs b/stationapi/src/domain.rs index b1720a0d..87d9d08c 100644 --- a/stationapi/src/domain.rs +++ b/stationapi/src/domain.rs @@ -1,4 +1,5 @@ pub mod entity; pub mod error; +pub mod ipa; pub mod normalize; pub mod repository; diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs new file mode 100644 index 00000000..66f406cc --- /dev/null +++ b/stationapi/src/domain/ipa.rs @@ -0,0 +1,591 @@ +/// Convert a katakana string to its IPA transcription. +/// Returns `None` if the input contains characters that cannot be converted. +pub fn katakana_to_ipa(input: &str) -> Option { + if input.is_empty() { + return Some(String::new()); + } + + let chars: Vec = input.chars().collect(); + let len = chars.len(); + let mut result = Vec::new(); + let mut i = 0; + + while i < len { + // Try two-character combinations first (palatalized sounds: キョ, シャ, etc.) + if i + 1 < len { + if let Some(ipa) = lookup_digraph(chars[i], chars[i + 1]) { + result.push(ipa); + i += 2; + continue; + } + } + + // Single character lookup — return None on unknown characters + result.push(lookup_single(chars[i])?); + + i += 1; + } + + Some(apply_phonological_rules(&result)) +} + +/// Look up a two-character (digraph) combination. +/// Handles palatalized sounds (拗音): キャ, シュ, チョ, etc. +fn lookup_digraph(c1: char, c2: char) -> Option { + let ipa = match (c1, c2) { + // カ行拗音 + ('キ', 'ャ') => "kʲa", + ('キ', 'ュ') => "kʲɯ", + ('キ', 'ョ') => "kʲo", + // サ行拗音 (シ is already palatal) + ('シ', 'ャ') => "ɕa", + ('シ', 'ュ') => "ɕɯ", + ('シ', 'ョ') => "ɕo", + // タ行拗音 + ('チ', 'ャ') => "t͡ɕa", + ('チ', 'ュ') => "t͡ɕɯ", + ('チ', 'ョ') => "t͡ɕo", + // ナ行拗音 + ('ニ', 'ャ') => "ɲa", + ('ニ', 'ュ') => "ɲɯ", + ('ニ', 'ョ') => "ɲo", + // ハ行拗音 + ('ヒ', 'ャ') => "ça", + ('ヒ', 'ュ') => "çɯ", + ('ヒ', 'ョ') => "ço", + // マ行拗音 + ('ミ', 'ャ') => "mʲa", + ('ミ', 'ュ') => "mʲɯ", + ('ミ', 'ョ') => "mʲo", + // ラ行拗音 + ('リ', 'ャ') => "ɾʲa", + ('リ', 'ュ') => "ɾʲɯ", + ('リ', 'ョ') => "ɾʲo", + // ガ行拗音 + ('ギ', 'ャ') => "ɡʲa", + ('ギ', 'ュ') => "ɡʲɯ", + ('ギ', 'ョ') => "ɡʲo", + // ザ行拗音 (ジ is voiced postalveolar affricate) + ('ジ', 'ャ') => "dʑa", + ('ジ', 'ュ') => "dʑɯ", + ('ジ', 'ョ') => "dʑo", + // バ行拗音 + ('ビ', 'ャ') => "bʲa", + ('ビ', 'ュ') => "bʲɯ", + ('ビ', 'ョ') => "bʲo", + // ピ行拗音 + ('ピ', 'ャ') => "pʲa", + ('ピ', 'ュ') => "pʲɯ", + ('ピ', 'ョ') => "pʲo", + _ => return None, + }; + Some(Phoneme::Regular(ipa)) +} + +/// Look up a single katakana character. +fn lookup_single(c: char) -> Option { + let ipa = match c { + // 母音 + 'ア' | 'ァ' => return Some(Phoneme::Regular("a")), + 'イ' | 'ィ' => return Some(Phoneme::Regular("i")), + 'ウ' | 'ゥ' => return Some(Phoneme::Regular("ɯ")), + 'エ' | 'ェ' => return Some(Phoneme::Regular("e")), + 'オ' | 'ォ' => return Some(Phoneme::Regular("o")), + // カ行 + 'カ' => "ka", + 'キ' => "kʲi", + 'ク' => "kɯ", + 'ケ' => "ke", + 'コ' => "ko", + // サ行 + 'サ' => "sa", + 'シ' => "ɕi", + 'ス' => "sɯ", + 'セ' => "se", + 'ソ' => "so", + // タ行 + 'タ' => "ta", + 'チ' => "t͡ɕi", + 'ツ' => "t͡sɯ", + 'テ' => "te", + 'ト' => "to", + // ナ行 + 'ナ' => "na", + 'ニ' => "ɲi", + 'ヌ' => "nɯ", + 'ネ' => "ne", + 'ノ' => "no", + // ハ行 + 'ハ' => "ha", + 'ヒ' => "çi", + 'フ' => "ɸɯ", + 'ヘ' => "he", + 'ホ' => "ho", + // マ行 + 'マ' => "ma", + 'ミ' => "mi", + 'ム' => "mɯ", + 'メ' => "me", + 'モ' => "mo", + // ヤ行 + 'ヤ' | 'ャ' => "ja", + 'ユ' | 'ュ' => "jɯ", + 'ヨ' | 'ョ' => "jo", + // ラ行 + 'ラ' => "ɾa", + 'リ' => "ɾi", + 'ル' => "ɾɯ", + 'レ' => "ɾe", + 'ロ' => "ɾo", + // ワ行 + 'ワ' => "wa", + 'ヰ' => "i", + 'ヱ' => "e", + 'ヲ' => "o", + // ガ行 + 'ガ' => "ɡa", + 'ギ' => "ɡi", + 'グ' => "ɡɯ", + 'ゲ' => "ɡe", + 'ゴ' => "ɡo", + // ザ行 + 'ザ' => "za", + 'ジ' => "ʤi", + 'ズ' => "zɯ", + 'ゼ' => "ze", + 'ゾ' => "zo", + // ダ行 + 'ダ' => "da", + 'ヂ' => "dʑi", + 'ヅ' => "dzɯ", + 'デ' => "de", + 'ド' => "do", + // バ行 + 'バ' => "ba", + 'ビ' => "bi", + 'ブ' => "bɯ", + 'ベ' => "be", + 'ボ' => "bo", + // パ行 + 'パ' => "pa", + 'ピ' => "pi", + 'プ' => "pɯ", + 'ペ' => "pe", + 'ポ' => "po", + // 特殊 + 'ン' => return Some(Phoneme::MoraicNasal), + 'ッ' => return Some(Phoneme::Geminate), + 'ー' => return Some(Phoneme::LongVowel), + _ => return None, + }; + Some(Phoneme::Regular(ipa)) +} + +/// Intermediate phoneme representation before phonological rules are applied. +#[derive(Debug, Clone)] +enum Phoneme { + Regular(&'static str), + MoraicNasal, // ン - assimilates to following consonant + Geminate, // ッ - doubles following consonant + LongVowel, // ー - lengthens preceding vowel +} + +/// Extract the leading consonant cluster from an IPA string. +/// Returns (onset, remainder). If the string starts with a vowel, onset is "". +fn split_onset(ipa: &str) -> (&str, &str) { + // Find where the first vowel-like character starts + let vowel_start = ipa + .char_indices() + .find(|(_, c)| "aiɯeouəɐ".contains(*c)) + .map(|(i, _)| i) + .unwrap_or(ipa.len()); + ipa.split_at(vowel_start) +} + +/// Strip secondary articulation markers (e.g., palatalization ʲ) from an onset, +/// returning only the base consonant(s). +fn strip_secondary_articulation(onset: &str) -> String { + onset.replace('ʲ', "") +} + +/// Get the last vowel character from an IPA string for long vowel extension. +fn last_vowel(ipa: &str) -> Option<&'static str> { + for c in ipa.chars().rev() { + match c { + 'a' => return Some("a"), + 'i' => return Some("i"), + 'ɯ' => return Some("ɯ"), + 'e' => return Some("e"), + 'o' => return Some("o"), + 'u' => return Some("u"), + _ => continue, + } + } + None +} + +/// Classify the place of articulation of the following phoneme for ン assimilation. +fn nasal_for_following(next_ipa: &str) -> &'static str { + // Check first meaningful character(s) of the following phoneme + if next_ipa.starts_with('b') || next_ipa.starts_with('p') || next_ipa.starts_with('m') { + "m" // bilabial assimilation + } else if next_ipa.starts_with('ɲ') + || next_ipa.starts_with("dʑ") + || next_ipa.starts_with('ʤ') + || next_ipa.starts_with('ɕ') + || next_ipa.starts_with("ɡʲ") + || next_ipa.starts_with("kʲ") + || next_ipa.starts_with('j') + || next_ipa.starts_with('ç') + { + "ɲ" // palatal assimilation + } else if next_ipa.starts_with('k') || next_ipa.starts_with('ɡ') || next_ipa.starts_with('ŋ') + { + "ŋ" // velar assimilation + } else if next_ipa.starts_with('n') + || next_ipa.starts_with('t') + || next_ipa.starts_with('d') + || next_ipa.starts_with('s') + || next_ipa.starts_with('z') + || next_ipa.starts_with('ɾ') + { + "n" // alveolar assimilation (includes t͡ɕ, t͡s which start with t) + } else { + "ɴ" // default: uvular nasal (word-final or before vowels) + } +} + +/// Apply phonological rules: ン assimilation, ッ gemination, long vowels. +fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { + let mut output = String::new(); + let len = phonemes.len(); + let mut i = 0; + + while i < len { + match &phonemes[i] { + Phoneme::Regular(ipa) => { + output.push_str(ipa); + i += 1; + } + Phoneme::MoraicNasal => { + // Look ahead for assimilation + if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { + output.push_str(nasal_for_following(next_ipa)); + } else { + output.push('ɴ'); // word-final + } + i += 1; + } + Phoneme::Geminate => { + // Double the onset of the following consonant. + // For affricates (t͡ɕ, t͡s), only the stop portion (t) is geminated. + // For palatalized onsets (kʲ, ɡʲ, etc.), only the base consonant is geminated. + if let Some(next_ipa) = find_next_regular(&phonemes[i + 1..]) { + if next_ipa.starts_with("t͡ɕ") || next_ipa.starts_with("t͡s") { + output.push('t'); + } else if next_ipa.starts_with("dʑ") || next_ipa.starts_with("ʤ") { + output.push('d'); + } else { + let (onset, _) = split_onset(next_ipa); + if !onset.is_empty() { + let base = strip_secondary_articulation(onset); + if let Some(c) = base.chars().next() { + output.push(c); + } + } + } + } + i += 1; + } + Phoneme::LongVowel => { + // Lengthen the preceding vowel + if last_vowel(&output).is_some() { + // Check if already has ː + if !output.ends_with('ː') { + output.push('ː'); + } + } else { + output.push('ː'); + } + i += 1; + } + } + } + + // Apply long vowel contractions: オウ → oː pattern + apply_vowel_length(&output) +} + +/// Find the IPA string of the next Regular phoneme in the slice. +fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> { + phonemes.iter().find_map(|p| match p { + Phoneme::Regular(ipa) => Some(*ipa), + _ => None, + }) +} + +/// Apply vowel length rules for common Japanese patterns. +/// オウ → oː (after consonant+o), ョウ/ョオ patterns are handled by digraph + this. +fn apply_vowel_length(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let chars: Vec = input.chars().collect(); + let len = chars.len(); + let mut i = 0; + + while i < len { + if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'ɯ' { + // oɯ → oː (おう/こう pattern) + result.push('o'); + result.push('ː'); + i += 2; + continue; + } + if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' { + // oo → oː (おお pattern) + result.push('o'); + result.push('ː'); + i += 2; + continue; + } + result.push(chars[i]); + i += 1; + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper: unwrap the Option for concise test assertions. + fn ipa(input: &str) -> String { + katakana_to_ipa(input).expect("expected valid katakana input") + } + + // Tests based on the hardcoded IPA mappings from Cloud Functions tts.ts + + #[test] + fn test_shibuya() { + assert_eq!(ipa("シブヤ"), "ɕibɯja"); + } + + #[test] + fn test_shinagawa() { + assert_eq!(ipa("シナガワ"), "ɕinaɡawa"); + } + + #[test] + fn test_ueno() { + assert_eq!(ipa("ウエノ"), "ɯeno"); + } + + #[test] + fn test_ikebukuro() { + assert_eq!(ipa("イケブクロ"), "ikebɯkɯɾo"); + } + + #[test] + fn test_shinjuku() { + // ン before ジュ → ɲ, ジュ → dʑɯ + assert_eq!(ipa("シンジュク"), "ɕiɲdʑɯkɯ"); + } + + #[test] + fn test_osaka() { + // オオ → oː + assert_eq!(ipa("オオサカ"), "oːsaka"); + } + + #[test] + fn test_kyoto() { + // キョウ → kʲoː (via kʲo + ウ → oɯ → oː) + assert_eq!(ipa("キョウト"), "kʲoːto"); + } + + #[test] + fn test_yokohama() { + assert_eq!(ipa("ヨコハマ"), "jokohama"); + } + + #[test] + fn test_chiba() { + assert_eq!(ipa("チバ"), "t͡ɕiba"); + } + + #[test] + fn test_kawasaki() { + assert_eq!(ipa("カワサキ"), "kawasakʲi"); + } + + #[test] + fn test_tsurumi() { + assert_eq!(ipa("ツルミ"), "t͡sɯɾɯmi"); + } + + #[test] + fn test_ryogoku() { + // リョウ → ɾʲoː (via ɾʲo + ウ → oɯ → oː) + assert_eq!(ipa("リョウゴク"), "ɾʲoːɡokɯ"); + } + + #[test] + fn test_shimbashi() { + // ン before バ → m + assert_eq!(ipa("シンバシ"), "ɕimbaɕi"); + } + + #[test] + fn test_keisei() { + assert_eq!(ipa("ケイセイ"), "keisei"); + } + + #[test] + fn test_oshiage() { + assert_eq!(ipa("オシアゲ"), "oɕiaɡe"); + } + + #[test] + fn test_meitetsu() { + // ツ is consistently t͡sɯ (affricate with tie bar) + assert_eq!(ipa("メイテツ"), "meitet͡sɯ"); + } + + #[test] + fn test_seibu() { + assert_eq!(ipa("セイブ"), "seibɯ"); + } + + #[test] + fn test_toride() { + assert_eq!(ipa("トリデ"), "toɾide"); + } + + #[test] + fn test_fukiage() { + assert_eq!(ipa("フキアゲ"), "ɸɯkʲiaɡe"); + } + + #[test] + fn test_fuse() { + assert_eq!(ipa("フセ"), "ɸɯse"); + } + + #[test] + fn test_inagekaigan() { + // ン at word end → ɴ + assert_eq!(ipa("イナゲカイガン"), "inaɡekaiɡaɴ"); + } + + #[test] + fn test_inage() { + assert_eq!(ipa("イナゲ"), "inaɡe"); + } + + #[test] + fn test_kire_uriwari() { + assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); + } + + #[test] + fn test_yao() { + assert_eq!(ipa("ヤオ"), "jao"); + } + + #[test] + fn test_mejiro() { + assert_eq!(ipa("メジロ"), "meʤiɾo"); + } + + #[test] + fn test_isesaki() { + assert_eq!(ipa("イセサキ"), "isesakʲi"); + } + + #[test] + fn test_ube() { + assert_eq!(ipa("ウベ"), "ɯbe"); + } + + #[test] + fn test_itchome() { + // ッチョウ → tt͡ɕoː + assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme"); + } + + #[test] + fn test_sanchome() { + assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme"); + } + + #[test] + fn test_koen() { + // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ + // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct + assert_eq!(ipa("コウエン"), "koːeɴ"); + } + + #[test] + fn test_long_vowel_mark() { + // ー explicitly lengthens + assert_eq!(ipa("ラーメン"), "ɾaːmeɴ"); + } + + #[test] + fn test_tokyo() { + // トウキョウ: ト=to, ウ→oː, キョ=kʲo, ウ→oː + assert_eq!(ipa("トウキョウ"), "toːkʲoː"); + } + + #[test] + fn test_nagoya() { + assert_eq!(ipa("ナゴヤ"), "naɡoja"); + } + + #[test] + fn test_sapporo() { + // ッポ → ppo + assert_eq!(ipa("サッポロ"), "sappoɾo"); + } + + #[test] + fn test_namba() { + // ン before バ → m + assert_eq!(ipa("ナンバ"), "namba"); + } + + #[test] + fn test_shin_yokohama() { + // ン before ヨ(j) → ɲ (palatal assimilation) + assert_eq!(ipa("シンヨコハマ"), "ɕiɲjokohama"); + } + + #[test] + fn test_geminate_ji() { + // ッジ → dʤi (voiced affricate gemination emits 'd') + assert_eq!(ipa("カッジ"), "kadʤi"); + } + + #[test] + fn test_geminate_ju() { + // ッジュ → ddʑɯ (voiced affricate gemination with digraph) + assert_eq!(ipa("カッジュ"), "kaddʑɯ"); + } + + #[test] + fn test_empty() { + assert_eq!(katakana_to_ipa(""), Some(String::new())); + } + + #[test] + fn test_unknown_characters_returns_none() { + assert_eq!(katakana_to_ipa("ABC"), None); + assert_eq!(katakana_to_ipa("シブヤX"), None); + } + + #[test] + fn test_geminate_palatalized() { + // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ') + assert_eq!(ipa("ニッキョウ"), "ɲikkʲoː"); + } +} diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index 9f18aeee..f94c3d48 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -1,10 +1,14 @@ use crate::{ - domain::entity::{gtfs::TransportType, line::Line}, + domain::{ + entity::{gtfs::TransportType, line::Line}, + ipa::katakana_to_ipa, + }, proto::{Line as GrpcLine, TransportType as GrpcTransportType}, }; impl From for GrpcLine { fn from(line: Line) -> Self { + let name_ipa = katakana_to_ipa(&line.line_name_k).filter(|ipa| !ipa.is_empty()); // バス路線の場合は line_type を OtherLineType (0) に強制 // (鉄道用の line_type が誤って設定されている可能性があるため) let line_type = if line.transport_type == TransportType::Bus { @@ -32,6 +36,7 @@ impl From for GrpcLine { .map(|train_type| Box::new(train_type.into())), average_distance: line.average_distance.unwrap_or(0.0), transport_type: convert_transport_type(line.transport_type), + name_ipa, } } } diff --git a/stationapi/src/use_case/dto/station.rs b/stationapi/src/use_case/dto/station.rs index 44195a60..6e52cc19 100644 --- a/stationapi/src/use_case/dto/station.rs +++ b/stationapi/src/use_case/dto/station.rs @@ -1,5 +1,8 @@ use crate::{ - domain::entity::{gtfs::TransportType, station::Station}, + domain::{ + entity::{gtfs::TransportType, station::Station}, + ipa::katakana_to_ipa, + }, proto::{Station as GrpcStation, TransportType as GrpcTransportType}, }; @@ -14,6 +17,7 @@ impl From for i32 { impl From for GrpcStation { fn from(station: Station) -> Self { + let name_ipa = katakana_to_ipa(&station.station_name_k).filter(|ipa| !ipa.is_empty()); Self { id: station.station_cd as u32, group_id: station.station_g_cd as u32, @@ -43,6 +47,7 @@ impl From for GrpcStation { has_train_types: Some(station.has_train_types), train_type: station.train_type.map(|tt| Box::new((*tt).into())), transport_type: station.transport_type.into(), + name_ipa, } } } diff --git a/stationapi/src/use_case/interactor/query.rs b/stationapi/src/use_case/interactor/query.rs index bcaafd23..ee3f30b9 100644 --- a/stationapi/src/use_case/interactor/query.rs +++ b/stationapi/src/use_case/interactor/query.rs @@ -834,6 +834,8 @@ where }) .collect(); + let name_ipa = crate::domain::ipa::katakana_to_ipa(&row.station_name_k) + .filter(|ipa| !ipa.is_empty()); proto::StationMinimal { id: row.station_cd as u32, group_id: row.station_g_cd as u32, @@ -845,6 +847,7 @@ where stop_condition: row.pass.unwrap_or(0), has_train_types: Some(row.type_id.is_some()), train_type_id: row.type_id.map(|id| id as u32), + name_ipa, } }) .collect::>();