1use std::collections::BTreeMap;
2use std::error::Error;
3use std::fmt;
4use std::sync::OnceLock;
5
6use crate::{
7 FormalizationLevel, Link, LinkId, LinkMetadata, LinkNetwork, LinkQuery, LinkType,
8 LinoSerializationError, NaturalizationDirection, ParseConfiguration, QueryMatch,
9 QueryParseError,
10};
11
12const RULE_SET_TERM: &str = "translation-rule-set";
13const RULE_TERM: &str = "translation-rule";
14const MATCH_TERM: &str = "translation-rule-match";
15const REFERENCE_CAPTURE_LANGUAGE: &str = "translation-rule-reference-capture";
16const TEMPLATE_DEFINITION: &str = "translation-rule-template";
17const FORMAL_LEXICAL_TARGET: &str = "formal:lexical";
18const FORMAL_CONCEPT_TARGET: &str = "formal:concept";
19const FORMAL_LOGICAL_TARGET: &str = "formal:logical";
20
21#[derive(Clone, Debug, PartialEq, Eq)]
23pub struct TranslationRuleSet {
24 name: String,
25 rules: Vec<TranslationRule>,
26}
27
28impl TranslationRuleSet {
29 #[must_use]
31 pub fn new(name: impl Into<String>) -> Self {
32 Self {
33 name: name.into(),
34 rules: Vec::new(),
35 }
36 }
37
38 #[must_use]
40 pub fn name(&self) -> &str {
41 &self.name
42 }
43
44 #[must_use]
46 pub fn rules(&self) -> &[TranslationRule] {
47 &self.rules
48 }
49
50 #[must_use]
52 pub fn with_rule(mut self, rule: TranslationRule) -> Self {
53 self.add_rule(rule);
54 self
55 }
56
57 pub fn add_rule(&mut self, rule: TranslationRule) {
59 self.rules.push(rule);
60 }
61
62 #[must_use]
64 pub fn to_lino(&self) -> String {
65 let mut network = LinkNetwork::new();
66 let root = network.insert_link(
67 [],
68 LinkMetadata::new()
69 .with_link_type(LinkType::Semantic)
70 .with_named(true)
71 .with_term(RULE_SET_TERM)
72 .with_definition(&self.name),
73 );
74
75 for rule in &self.rules {
76 let rule_link = network.insert_link(
77 [root],
78 LinkMetadata::new()
79 .with_link_type(LinkType::Semantic)
80 .with_named(true)
81 .with_term(RULE_TERM)
82 .with_definition(rule.name()),
83 );
84 network.insert_link(
85 [rule_link],
86 LinkMetadata::new()
87 .with_link_type(LinkType::Semantic)
88 .with_named(true)
89 .with_term(MATCH_TERM)
90 .with_definition(query_to_rule_spec(&rule.query)),
91 );
92 for (capture, reference_index) in &rule.reference_captures {
93 network.insert_link(
94 [rule_link],
95 LinkMetadata::new()
96 .with_link_type(LinkType::Semantic)
97 .with_named(true)
98 .with_term(capture)
99 .with_language(REFERENCE_CAPTURE_LANGUAGE)
100 .with_definition(reference_index.to_string()),
101 );
102 }
103 for (target, template) in &rule.templates {
104 network.insert_link(
105 [rule_link],
106 LinkMetadata::new()
107 .with_link_type(LinkType::Semantic)
108 .with_named(true)
109 .with_term(template.source())
110 .with_language(target)
111 .with_definition(TEMPLATE_DEFINITION),
112 );
113 }
114 }
115
116 network.to_lino()
117 }
118
119 pub fn from_lino(text: &str) -> Result<Self, TranslationRuleSetLoadError> {
121 let network = LinkNetwork::from_lino(text)?;
122 let root = network
123 .links()
124 .find(|link| {
125 link.metadata().link_type() == Some(LinkType::Semantic)
126 && link.metadata().term() == Some(RULE_SET_TERM)
127 })
128 .ok_or_else(|| {
129 TranslationRuleSetLoadError::Structure(
130 "missing translation-rule-set root".to_string(),
131 )
132 })?;
133 let mut rules = Vec::new();
134 let mut rule_links = network
135 .links()
136 .filter(|link| {
137 link.references().first().copied() == Some(root.id())
138 && link.metadata().term() == Some(RULE_TERM)
139 })
140 .collect::<Vec<_>>();
141 rule_links.sort_by_key(|link| link.id());
142
143 for rule_link in rule_links {
144 rules.push(load_rule(&network, rule_link)?);
145 }
146
147 Ok(Self {
148 name: root
149 .metadata()
150 .definition()
151 .unwrap_or(RULE_SET_TERM)
152 .to_string(),
153 rules,
154 })
155 }
156
157 #[must_use]
159 pub fn statehood_demo_lino() -> &'static str {
160 static LINO: OnceLock<String> = OnceLock::new();
161 LINO.get_or_init(|| statehood_demo_rule_set().to_lino())
162 }
163
164 #[must_use]
166 pub fn statehood_demo() -> Self {
167 Self::from_lino(Self::statehood_demo_lino())
168 .expect("statehood demo translation rule set must load")
169 }
170
171 pub(crate) fn render(
172 &self,
173 network: &LinkNetwork,
174 target_language: &str,
175 configuration: ParseConfiguration,
176 ) -> Option<String> {
177 let source = network.reconstruct_text();
178 for rule in &self.rules {
179 let Some(template) = rule.template_for(target_language, configuration) else {
180 continue;
181 };
182 let rendered = network
183 .query_matches(&rule.query)
184 .into_iter()
185 .map(|query_match| template.render(network, rule, &query_match, target_language))
186 .collect::<Vec<_>>();
187
188 if !rendered.is_empty() {
189 return Some(with_source_trailing_newline(rendered.join("\n"), &source));
190 }
191 }
192
193 None
194 }
195}
196
197#[derive(Clone, Debug, PartialEq, Eq)]
199pub struct TranslationRule {
200 name: String,
201 query: LinkQuery,
202 reference_captures: BTreeMap<String, usize>,
203 templates: BTreeMap<String, TranslationTemplate>,
204}
205
206impl TranslationRule {
207 #[must_use]
209 pub fn new(name: impl Into<String>, query: LinkQuery) -> Self {
210 Self {
211 name: name.into(),
212 query,
213 reference_captures: BTreeMap::new(),
214 templates: BTreeMap::new(),
215 }
216 }
217
218 #[must_use]
220 pub fn name(&self) -> &str {
221 &self.name
222 }
223
224 #[must_use]
226 pub const fn query(&self) -> &LinkQuery {
227 &self.query
228 }
229
230 #[must_use]
232 pub const fn reference_captures(&self) -> &BTreeMap<String, usize> {
233 &self.reference_captures
234 }
235
236 #[must_use]
238 pub const fn templates(&self) -> &BTreeMap<String, TranslationTemplate> {
239 &self.templates
240 }
241
242 #[must_use]
244 pub fn with_reference_capture(
245 mut self,
246 name: impl Into<String>,
247 reference_index: usize,
248 ) -> Self {
249 self.reference_captures.insert(name.into(), reference_index);
250 self
251 }
252
253 #[must_use]
255 pub fn with_template(
256 mut self,
257 target_language: impl Into<String>,
258 template: impl Into<String>,
259 ) -> Self {
260 self.templates.insert(
261 target_language.into(),
262 TranslationTemplate::new(template.into()),
263 );
264 self
265 }
266
267 #[must_use]
269 pub fn with_formal_template(
270 mut self,
271 level: FormalizationLevel,
272 template: impl Into<String>,
273 ) -> Self {
274 self.templates.insert(
275 formal_template_target(level).to_string(),
276 TranslationTemplate::new(template.into()),
277 );
278 self
279 }
280
281 fn template_for(
282 &self,
283 target_language: &str,
284 configuration: ParseConfiguration,
285 ) -> Option<&TranslationTemplate> {
286 let level = effective_formalization_level(configuration);
287 if level != FormalizationLevel::Natural {
288 return self.templates.get(formal_template_target(level));
289 }
290
291 self.templates.get(target_language).or_else(|| {
292 canonical_reconstruction_language(target_language)
293 .and_then(|language| self.templates.get(language))
294 })
295 }
296}
297
298#[derive(Clone, Debug, PartialEq, Eq)]
300pub struct TranslationTemplate {
301 source: String,
302}
303
304impl TranslationTemplate {
305 #[must_use]
307 pub fn new(source: impl Into<String>) -> Self {
308 Self {
309 source: source.into(),
310 }
311 }
312
313 #[must_use]
315 pub fn source(&self) -> &str {
316 &self.source
317 }
318
319 fn render(
320 &self,
321 network: &LinkNetwork,
322 rule: &TranslationRule,
323 query_match: &QueryMatch,
324 target_language: &str,
325 ) -> String {
326 let mut output = String::new();
327 let mut chars = self.source.chars().peekable();
328 while let Some(character) = chars.next() {
329 match character {
330 '{' if chars.peek() == Some(&'{') => {
331 chars.next();
332 output.push('{');
333 }
334 '{' => {
335 let mut placeholder = String::new();
336 let mut closed = false;
337 for next in chars.by_ref() {
338 if next == '}' {
339 closed = true;
340 break;
341 }
342 placeholder.push(next);
343 }
344 if closed {
345 output.push_str(&render_placeholder(
346 network,
347 rule,
348 query_match,
349 target_language,
350 &placeholder,
351 ));
352 } else {
353 output.push('{');
354 output.push_str(&placeholder);
355 }
356 }
357 '}' if chars.peek() == Some(&'}') => {
358 chars.next();
359 output.push('}');
360 }
361 other => output.push(other),
362 }
363 }
364 output
365 }
366}
367
368#[derive(Clone, Debug, Default, PartialEq, Eq)]
370pub struct TranslationRuleRegistry {
371 rule_sets: BTreeMap<String, TranslationRuleSet>,
372 active_rule_set: Option<String>,
373}
374
375impl TranslationRuleRegistry {
376 #[must_use]
378 pub fn new() -> Self {
379 Self::default()
380 }
381
382 #[must_use]
384 pub fn with_statehood_demo() -> Self {
385 Self::new().with_rule_set(TranslationRuleSet::statehood_demo())
386 }
387
388 #[must_use]
390 pub fn with_rule_set(mut self, rule_set: TranslationRuleSet) -> Self {
391 self.replace_rule_set(rule_set);
392 self
393 }
394
395 pub fn replace_rule_set(&mut self, rule_set: TranslationRuleSet) {
397 let name = rule_set.name().to_string();
398 if self.active_rule_set.is_none() {
399 self.active_rule_set = Some(name.clone());
400 }
401 self.rule_sets.insert(name, rule_set);
402 }
403
404 pub fn set_active_rule_set(&mut self, name: &str) -> bool {
406 if self.rule_sets.contains_key(name) {
407 self.active_rule_set = Some(name.to_string());
408 true
409 } else {
410 false
411 }
412 }
413
414 #[must_use]
416 pub fn active_rule_set(&self) -> Option<&TranslationRuleSet> {
417 self.active_rule_set
418 .as_deref()
419 .and_then(|name| self.rule_sets.get(name))
420 }
421
422 #[must_use]
424 pub fn rule_set(&self, name: &str) -> Option<&TranslationRuleSet> {
425 self.rule_sets.get(name)
426 }
427
428 #[must_use]
430 pub fn len(&self) -> usize {
431 self.rule_sets.len()
432 }
433
434 #[must_use]
436 pub fn is_empty(&self) -> bool {
437 self.rule_sets.is_empty()
438 }
439}
440
441#[derive(Debug, Clone, PartialEq, Eq)]
443pub enum TranslationRuleSetLoadError {
444 Lino(LinoSerializationError),
446 Structure(String),
448 Query(QueryParseError),
450}
451
452impl fmt::Display for TranslationRuleSetLoadError {
453 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
454 match self {
455 Self::Lino(error) => write!(formatter, "{error}"),
456 Self::Structure(message) => {
457 write!(formatter, "translation rule structure error: {message}")
458 }
459 Self::Query(error) => write!(formatter, "translation rule query error: {error}"),
460 }
461 }
462}
463
464impl Error for TranslationRuleSetLoadError {}
465
466impl From<LinoSerializationError> for TranslationRuleSetLoadError {
467 fn from(error: LinoSerializationError) -> Self {
468 Self::Lino(error)
469 }
470}
471
472impl From<QueryParseError> for TranslationRuleSetLoadError {
473 fn from(error: QueryParseError) -> Self {
474 Self::Query(error)
475 }
476}
477
478fn load_rule(
479 network: &LinkNetwork,
480 rule_link: &Link,
481) -> Result<TranslationRule, TranslationRuleSetLoadError> {
482 let name = rule_link.metadata().definition().ok_or_else(|| {
483 TranslationRuleSetLoadError::Structure("rule is missing a name".to_string())
484 })?;
485 let query_source = network
486 .links()
487 .find(|link| {
488 link.references().first().copied() == Some(rule_link.id())
489 && link.metadata().term() == Some(MATCH_TERM)
490 })
491 .and_then(|link| link.metadata().definition())
492 .ok_or_else(|| {
493 TranslationRuleSetLoadError::Structure("rule is missing a match query".to_string())
494 })?;
495 let mut rule = TranslationRule::new(name, query_from_rule_spec(query_source)?);
496 let mut children = network
497 .links()
498 .filter(|link| link.references().first().copied() == Some(rule_link.id()))
499 .collect::<Vec<_>>();
500 children.sort_by_key(|link| link.id());
501
502 for child in children {
503 let metadata = child.metadata();
504 if metadata.term() == Some(MATCH_TERM) {
505 continue;
506 }
507 if metadata.language() == Some(REFERENCE_CAPTURE_LANGUAGE) {
508 let capture = metadata.term().ok_or_else(|| {
509 TranslationRuleSetLoadError::Structure(
510 "reference capture is missing a capture name".to_string(),
511 )
512 })?;
513 let index = metadata
514 .definition()
515 .ok_or_else(|| {
516 TranslationRuleSetLoadError::Structure(
517 "reference capture is missing an index".to_string(),
518 )
519 })?
520 .parse::<usize>()
521 .map_err(|error| {
522 TranslationRuleSetLoadError::Structure(format!(
523 "invalid reference capture index: {error}"
524 ))
525 })?;
526 rule = rule.with_reference_capture(capture, index);
527 } else if metadata.definition() == Some(TEMPLATE_DEFINITION) {
528 let target = metadata.language().ok_or_else(|| {
529 TranslationRuleSetLoadError::Structure("template is missing a target".to_string())
530 })?;
531 let template = metadata.term().ok_or_else(|| {
532 TranslationRuleSetLoadError::Structure(
533 "template is missing source text".to_string(),
534 )
535 })?;
536 rule = rule.with_template(target, template);
537 }
538 }
539
540 Ok(rule)
541}
542
543fn query_to_rule_spec(query: &LinkQuery) -> String {
544 let mut object = serde_json::Map::new();
545 if let Some(link_type) = query.link_type_filter() {
546 object.insert("link_type".to_string(), link_type.to_string().into());
547 }
548 if let Some(term) = query.term_filter() {
549 object.insert("term".to_string(), term.into());
550 }
551 if let Some(language) = query.language_filter() {
552 object.insert("language".to_string(), language.into());
553 }
554 if let Some(named) = query.named_filter() {
555 object.insert("named".to_string(), named.into());
556 }
557 if let Some(pattern_source) = query.pattern_source() {
558 object.insert("sexpression".to_string(), pattern_source.into());
559 }
560
561 serde_json::Value::Object(object).to_string()
562}
563
564fn query_from_rule_spec(source: &str) -> Result<LinkQuery, QueryParseError> {
565 let value = serde_json::from_str::<serde_json::Value>(source)
566 .map_err(|error| QueryParseError::new(format!("invalid query spec: {error}")))?;
567 let object = value
568 .as_object()
569 .ok_or_else(|| QueryParseError::new("query spec must be a JSON object"))?;
570
571 let mut query =
572 if let Some(sexpression) = object.get("sexpression").and_then(|value| value.as_str()) {
573 LinkQuery::from_sexpression(sexpression)?
574 } else {
575 LinkQuery::new()
576 };
577
578 if let Some(link_type) = object.get("link_type").and_then(|value| value.as_str()) {
579 query = query.with_link_type(parse_query_link_type(link_type)?);
580 }
581 if let Some(term) = object.get("term").and_then(|value| value.as_str()) {
582 query = query.with_term(term);
583 }
584 if let Some(language) = object.get("language").and_then(|value| value.as_str()) {
585 query = query.with_language(language);
586 }
587 if let Some(named) = object.get("named").and_then(serde_json::Value::as_bool) {
588 query = query.with_named(named);
589 }
590
591 Ok(query)
592}
593
594fn parse_query_link_type(token: &str) -> Result<LinkType, QueryParseError> {
595 Ok(match token {
596 "link" => LinkType::Link,
597 "reference" => LinkType::Reference,
598 "relation" => LinkType::Relation,
599 "language" => LinkType::Language,
600 "grammar" => LinkType::Grammar,
601 "type" => LinkType::Type,
602 "concept" => LinkType::Concept,
603 "syntax" => LinkType::Syntax,
604 "field" => LinkType::Field,
605 "trivia" => LinkType::Trivia,
606 "token" => LinkType::Token,
607 "document" => LinkType::Document,
608 "semantic" => LinkType::Semantic,
609 "region" => LinkType::Region,
610 "object" => LinkType::Object,
611 other => {
612 return Err(QueryParseError::new(format!(
613 "unknown query link type `{other}`"
614 )))
615 }
616 })
617}
618
619fn render_placeholder(
620 network: &LinkNetwork,
621 rule: &TranslationRule,
622 query_match: &QueryMatch,
623 target_language: &str,
624 placeholder: &str,
625) -> String {
626 let (name, mode) = placeholder.split_once(':').map_or_else(
627 || (placeholder.trim(), "language"),
628 |(name, mode)| (name.trim(), mode.trim()),
629 );
630 let Some(link_id) = placeholder_link(network, rule, query_match, name) else {
631 return format!("{{{placeholder}}}");
632 };
633
634 render_link(network, link_id, target_language, mode)
635}
636
637fn placeholder_link(
638 network: &LinkNetwork,
639 rule: &TranslationRule,
640 query_match: &QueryMatch,
641 name: &str,
642) -> Option<LinkId> {
643 if let Some(link_id) = query_match.captures().first(name) {
644 return Some(link_id);
645 }
646
647 let reference_index = *rule.reference_captures.get(name)?;
648 network
649 .link(query_match.link_id())?
650 .references()
651 .get(reference_index)
652 .copied()
653}
654
655fn render_link(
656 network: &LinkNetwork,
657 link_id: LinkId,
658 target_language: &str,
659 mode: &str,
660) -> String {
661 let Some(link) = network.link(link_id) else {
662 return link_id.to_string();
663 };
664 let concept = concept_id_for_link(network, link);
665 match mode {
666 "concept" => concept
667 .or_else(|| link.metadata().term())
668 .map_or_else(|| link_id.to_string(), str::to_string),
669 "term" => link
670 .metadata()
671 .term()
672 .map_or_else(|| link_id.to_string(), str::to_string),
673 _ => concept
674 .and_then(|concept| reconstruct_concept_for_language(network, concept, target_language))
675 .or_else(|| link.metadata().term())
676 .map_or_else(|| link_id.to_string(), str::to_string),
677 }
678}
679
680fn concept_id_for_link<'a>(network: &'a LinkNetwork, link: &'a Link) -> Option<&'a str> {
681 if link.metadata().link_type() == Some(LinkType::Concept) {
682 return link.metadata().term();
683 }
684 let first_reference = link.references().first().copied()?;
685 let concept = network.link(first_reference)?;
686 (concept.metadata().link_type() == Some(LinkType::Concept))
687 .then(|| concept.metadata().term())
688 .flatten()
689}
690
691fn reconstruct_concept_for_language<'a>(
692 network: &'a LinkNetwork,
693 concept: &str,
694 language: &str,
695) -> Option<&'a str> {
696 network.reconstruct_concept(concept, language).or_else(|| {
697 canonical_reconstruction_language(language)
698 .and_then(|canonical| network.reconstruct_concept(concept, canonical))
699 })
700}
701
702fn statehood_demo_rule_set() -> TranslationRuleSet {
703 TranslationRuleSet::new("statehood-demo").with_rule(
704 TranslationRule::new(
705 "statehood proposition",
706 LinkQuery::by_type(LinkType::Semantic).with_term("proposition:statehood"),
707 )
708 .with_reference_capture("subject", 2)
709 .with_reference_capture("object", 3)
710 .with_template("English", "{subject} is a {object}.")
711 .with_template("en", "{subject} is a {object}.")
712 .with_template("Russian", "{subject} это {object}.")
713 .with_template("ru", "{subject} это {object}.")
714 .with_formal_template(
715 FormalizationLevel::Lexical,
716 "statehood({subject}, {object})",
717 )
718 .with_formal_template(
719 FormalizationLevel::Concept,
720 [
721 "statehood(",
722 "{subject:concept}",
723 ", ",
724 "{object:concept}",
725 ")",
726 ]
727 .concat(),
728 )
729 .with_formal_template(
730 FormalizationLevel::Logical,
731 [
732 "(proposition: statehood (subject: ",
733 "{subject:concept}",
734 ") (object: ",
735 "{object:concept}",
736 ") (truth: true))",
737 ]
738 .concat(),
739 ),
740 )
741}
742
743const fn effective_formalization_level(configuration: ParseConfiguration) -> FormalizationLevel {
744 match (
745 configuration.naturalization_direction(),
746 configuration.formalization_level(),
747 ) {
748 (NaturalizationDirection::Formalize, FormalizationLevel::Natural) => {
749 FormalizationLevel::Lexical
750 }
751 (_, level) => level,
752 }
753}
754
755const fn formal_template_target(level: FormalizationLevel) -> &'static str {
756 match level {
757 FormalizationLevel::Natural => "",
758 FormalizationLevel::Lexical => FORMAL_LEXICAL_TARGET,
759 FormalizationLevel::Concept => FORMAL_CONCEPT_TARGET,
760 FormalizationLevel::Logical => FORMAL_LOGICAL_TARGET,
761 }
762}
763
764fn canonical_reconstruction_language(language: &str) -> Option<&'static str> {
765 match language.to_ascii_lowercase().as_str() {
766 "english" | "en" => Some("English"),
767 "russian" | "ru" => Some("Russian"),
768 _ => None,
769 }
770}
771
772fn with_source_trailing_newline(mut body: String, source: &str) -> String {
773 if source.ends_with('\n') {
774 body.push('\n');
775 }
776 body
777}