Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ This guide explains how automation agents and human contributors should work wit
- **Lines** – `GetLineById`, `GetLinesByIdList`, `GetLinesByName`. Results include company data and computed line symbols based on repository helpers.
- **Routes** – `GetRoutes`, `GetRoutesMinimal`. The minimal variant returns `RouteMinimalResponse` with deduplicated `LineMinimal` data; paging tokens are currently empty (pagination not implemented).
- **Train types** – `GetTrainTypesByStationId`, `GetRouteTypes`. Train types aggregate by line group and include related lines plus optional train type metadata.
- **TTS metadata** – `Station`, `StationMinimal`, `Line`, and `TrainType` expose `name_ipa` / `name_roman_ipa` plus `name_tts_segments` for multi-segment pronunciation output. Use `name_tts_segments` when clients need per-token SSML construction for mixed-language names such as `Kasai-Rinkai Park`.
- **Connected routes** – `GetConnectedRoutes`. `QueryInteractor::get_connected_stations` is not implemented yet and returns an empty vector; update the use-case and infrastructure layers together when adding real logic.
- Changes to the service contract require coordinated updates to `proto/stationapi.proto`, regenerated code via `tonic-build`, and corresponding adjustments in both presentation and use-case layers.

Expand Down
8 changes: 8 additions & 0 deletions stationapi/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
"StationNumber",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
"TtsAlphabet",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute(
"TtsSegment",
"#[derive(serde::Serialize, serde::Deserialize)]",
)
.type_attribute("Line", "#[derive(serde::Serialize, serde::Deserialize)]")
.type_attribute("Station", "#[derive(serde::Serialize, serde::Deserialize)]")
.type_attribute(
Expand Down
2 changes: 1 addition & 1 deletion stationapi/proto
Submodule proto updated 1 files
+25 −1 stationapi.proto
234 changes: 158 additions & 76 deletions stationapi/src/domain/ipa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,8 @@ pub fn katakana_to_ipa(input: &str) -> Option<String> {
/// Prefers the official romanized/English name when present so mixed names like
/// "Kasai-Rinkai Park" use English pronunciation for translated segments.
pub fn station_name_to_ipa(name_katakana: &str, name_roman: Option<&str>) -> Option<String> {
non_empty_ipa(
name_roman
.map(str::trim)
.filter(|name| !name.is_empty())
.and_then(romanized_name_to_ipa)
.filter(|ipa| !ipa.is_empty())
.or_else(|| katakana_to_ipa(name_katakana)),
)
let segments = station_name_to_tts_segments(name_katakana, name_roman);
non_empty_ipa(join_tts_segment_pronunciations(&segments))
}

pub fn katakana_name_to_ipa(input: &str) -> Option<String> {
Expand All @@ -82,108 +76,219 @@ pub fn non_empty_ipa(ipa: Option<String>) -> Option<String> {
ipa.filter(|ipa| !ipa.is_empty())
}

fn romanized_name_to_ipa(input: &str) -> Option<String> {
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum TtsAlphabetKind {
Ipa,
Yomigana,
Plain,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TtsNameSegment {
pub surface: String,
pub fallback_text: String,
pub pronunciation: String,
pub alphabet: TtsAlphabetKind,
pub lang: &'static str,
pub separator: String,
}

pub fn station_name_to_tts_segments(
name_katakana: &str,
name_roman: Option<&str>,
) -> Vec<TtsNameSegment> {
name_roman
.map(str::trim)
.filter(|name| !name.is_empty())
.and_then(romanized_name_to_tts_segments)
.filter(|segments| !segments.is_empty())
.or_else(|| katakana_name_to_tts_segments(name_katakana))
.unwrap_or_default()
}

fn join_tts_segment_pronunciations(segments: &[TtsNameSegment]) -> Option<String> {
let mut output = String::new();

for segment in segments {
if segment.pronunciation.is_empty() {
continue;
}
output.push_str(&segment.pronunciation);
output.push_str(&segment.separator);
}

non_empty_ipa(Some(output.trim().to_string()))
}

fn katakana_name_to_tts_segments(input: &str) -> Option<Vec<TtsNameSegment>> {
let pronunciation = katakana_name_to_ipa(input)?;
Some(vec![TtsNameSegment {
surface: input.to_string(),
fallback_text: katakana_to_hiragana(input),
pronunciation,
alphabet: TtsAlphabetKind::Ipa,
lang: "ja-JP",
separator: String::new(),
}])
}

fn should_split_camel_case_token(prev: Option<char>, current: char) -> bool {
matches!(prev, Some(prev) if prev.is_ascii_lowercase() && current.is_ascii_uppercase())
}

fn romanized_name_to_tts_segments(input: &str) -> Option<Vec<TtsNameSegment>> {
let mut tokens: Vec<String> = Vec::new();
let mut token = String::new();
let mut emitted_word = false;
let mut prev_token_char: Option<char> = None;

for c in input.chars() {
if is_name_token_char(c) {
if should_split_camel_case_token(prev_token_char, c) {
flush_name_token(&mut output, &mut token, &mut emitted_word)?;
flush_name_token(&mut tokens, &mut token);
}
token.push(c);
prev_token_char = Some(c);
continue;
}

flush_name_token(&mut output, &mut token, &mut emitted_word)?;
flush_name_token(&mut tokens, &mut token);
prev_token_char = None;

if is_separator_like(c) && emitted_word && !output.ends_with(' ') {
output.push(' ');
}
}

flush_name_token(&mut output, &mut token, &mut emitted_word)?;
flush_name_token(&mut tokens, &mut token);

Some(output.trim().to_string())
}
if tokens.is_empty() {
return Some(vec![]);
}

fn should_split_camel_case_token(prev: Option<char>, current: char) -> bool {
matches!(prev, Some(prev) if prev.is_ascii_lowercase() && current.is_ascii_uppercase())
let mut segments = Vec::new();
for (index, token) in tokens.iter().enumerate() {
let mut word_segments = word_to_tts_segments(token)?;
if let Some(last) = word_segments.last_mut() {
last.separator = if index + 1 < tokens.len() {
" ".to_string()
} else {
String::new()
};
}
segments.extend(word_segments);
}

Some(segments)
}

fn flush_name_token(
output: &mut String,
token: &mut String,
emitted_word: &mut bool,
) -> Option<()> {
fn flush_name_token(tokens: &mut Vec<String>, token: &mut String) {
if token.is_empty() {
return Some(());
return;
}

let ipa = word_to_ipa(token)?;
if *emitted_word && !output.ends_with(' ') {
output.push(' ');
}
output.push_str(&ipa);
*emitted_word = true;
tokens.push(token.clone());
token.clear();
Some(())
}

fn word_to_ipa(token: &str) -> Option<String> {
fn word_to_tts_segments(token: &str) -> Option<Vec<TtsNameSegment>> {
let normalized = normalize_name_token(token);
if normalized.is_empty() {
return Some(String::new());
return Some(vec![]);
}

if let Some(ipa) = split_compound_token_to_ipa(&normalized) {
return Some(ipa);
if let Some(segments) = split_compound_token_to_tts_segments(token, &normalized) {
return Some(segments);
}

if let Some(ipa) = lookup_english_word_ipa(&normalized) {
return Some(ipa.to_string());
return Some(vec![TtsNameSegment {
surface: token.to_string(),
fallback_text: token.to_string(),
pronunciation: ipa.to_string(),
alphabet: TtsAlphabetKind::Ipa,
lang: "en-US",
separator: String::new(),
}]);
}

if normalized.chars().all(|c| c.is_ascii_digit()) {
if let Some(ipa) = number_to_ipa(&normalized) {
return Some(ipa.to_string());
return Some(vec![TtsNameSegment {
surface: token.to_string(),
fallback_text: token.to_string(),
pronunciation: ipa.to_string(),
alphabet: TtsAlphabetKind::Ipa,
lang: "en-US",
separator: String::new(),
}]);
}

let mut output = String::new();
let mut pronunciation = String::new();
for digit in normalized.chars() {
let ipa = number_to_ipa(&digit.to_string())?;
output.push_str(ipa);
pronunciation.push_str(ipa);
}
return Some(output);
}

romaji_to_katakana(&normalized).and_then(|katakana| katakana_to_ipa(&katakana))
return Some(vec![TtsNameSegment {
surface: token.to_string(),
fallback_text: token.to_string(),
pronunciation,
alphabet: TtsAlphabetKind::Ipa,
lang: "en-US",
separator: String::new(),
}]);
}

let katakana = romaji_to_katakana(&normalized)?;
let pronunciation = katakana_to_ipa(&katakana)?;
Some(vec![TtsNameSegment {
surface: token.to_string(),
fallback_text: katakana_to_hiragana(&katakana),
pronunciation,
alphabet: TtsAlphabetKind::Ipa,
lang: "ja-JP",
separator: String::new(),
}])
}

fn split_compound_token_to_ipa(token: &str) -> Option<String> {
fn split_compound_token_to_tts_segments(
original: &str,
normalized: &str,
) -> Option<Vec<TtsNameSegment>> {
const JAPANESE_SUFFIXES: &[&str] = &["kaigan"];

for suffix in JAPANESE_SUFFIXES {
if token.len() <= suffix.len() || !token.ends_with(suffix) {
if normalized.len() <= suffix.len() || !normalized.ends_with(suffix) {
continue;
}

let stem = &token[..token.len() - suffix.len()];
let stem_ipa = word_to_ipa(stem)?;
let suffix_ipa = word_to_ipa(suffix)?;
if stem_ipa.is_empty() || suffix_ipa.is_empty() {
let stem_char_count = normalized.chars().count() - suffix.chars().count();
let stem_byte_offset = original
.char_indices()
.nth(stem_char_count)
.map(|(index, _)| index)
.unwrap_or(original.len());
let stem = &original[..stem_byte_offset];
let mut stem_segments = word_to_tts_segments(stem)?;
let suffix_segments = word_to_tts_segments(suffix)?;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if stem_segments.is_empty() || suffix_segments.is_empty() {
return None;
}
return Some(format!("{stem_ipa} {suffix_ipa}"));
if let Some(last) = stem_segments.last_mut() {
last.separator = " ".to_string();
}
stem_segments.extend(suffix_segments);
return Some(stem_segments);
}

None
}

fn katakana_to_hiragana(input: &str) -> String {
input
.chars()
.map(|c| match c {
'ァ'..='ヶ' => char::from_u32(c as u32 - 0x60).unwrap_or(c),
_ => c,
})
.collect()
}

fn is_name_token_char(c: char) -> bool {
c.is_ascii_alphanumeric()
|| matches!(
Expand All @@ -192,29 +297,6 @@ fn is_name_token_char(c: char) -> bool {
)
}

fn is_separator_like(c: char) -> bool {
c.is_whitespace()
|| matches!(
c,
'-' | '‐'
| '‑'
| '‒'
| '–'
| '—'
| '―'
| '/'
| '・'
| '・'
| '·'
| '('
| ')'
| '('
| ')'
| ','
| '、'
)
}

fn normalize_name_token(token: &str) -> String {
token
.trim_matches(|c: char| !is_name_token_char(c))
Expand Down
1 change: 1 addition & 0 deletions stationapi/src/use_case/dto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pub mod line_symbol;
pub mod station;
pub mod station_number;
pub mod train_type;
pub mod tts;
11 changes: 10 additions & 1 deletion stationapi/src/use_case/dto/line.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use crate::{
domain::{
entity::{gtfs::TransportType, line::Line},
ipa::{katakana_name_to_ipa, non_empty_ipa, replace_line_name_suffix, station_name_to_ipa},
ipa::{
katakana_name_to_ipa, non_empty_ipa, replace_line_name_suffix, station_name_to_ipa,
station_name_to_tts_segments,
},
},
proto::{Line as GrpcLine, TransportType as GrpcTransportType},
use_case::dto::tts::to_proto_tts_segments,
};

impl From<Line> for GrpcLine {
Expand All @@ -13,6 +17,10 @@ impl From<Line> for GrpcLine {
non_empty_ipa(katakana_name_to_ipa(stem).map(|ipa| format!("{ipa}{suffix_ipa}")))
};
let name_roman_ipa = station_name_to_ipa("", line.line_name_r.as_deref());
let name_tts_segments = to_proto_tts_segments(station_name_to_tts_segments(
&line.line_name_k,
line.line_name_r.as_deref(),
));
Comment thread
coderabbitai[bot] marked this conversation as resolved.
// バス路線の場合は line_type を OtherLineType (0) に強制
// (鉄道用の line_type が誤って設定されている可能性があるため)
let line_type = if line.transport_type == TransportType::Bus {
Expand Down Expand Up @@ -42,6 +50,7 @@ impl From<Line> for GrpcLine {
transport_type: convert_transport_type(line.transport_type),
name_ipa,
name_roman_ipa,
name_tts_segments,
}
}
}
Expand Down
Loading
Loading