Skip to main content

meta_language/
translation_rules.rs

1use std::collections::BTreeMap;
2use std::error::Error;
3use std::fmt;
4use std::sync::OnceLock;
5
6use crate::{
7    FormalizationLevel, Link, LinkId, LinkMetadata, LinkNetwork, LinkQuery, LinkType,
8    LinoSerializationError, NaturalizationDirection, ParseConfiguration, QueryMatch,
9    QueryParseError,
10};
11
12const RULE_SET_TERM: &str = "translation-rule-set";
13const RULE_TERM: &str = "translation-rule";
14const MATCH_TERM: &str = "translation-rule-match";
15const REFERENCE_CAPTURE_LANGUAGE: &str = "translation-rule-reference-capture";
16const TEMPLATE_DEFINITION: &str = "translation-rule-template";
17const FORMAL_LEXICAL_TARGET: &str = "formal:lexical";
18const FORMAL_CONCEPT_TARGET: &str = "formal:concept";
19const FORMAL_LOGICAL_TARGET: &str = "formal:logical";
20
21/// Ordered collection of named translation rules.
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub struct TranslationRuleSet {
24    name: String,
25    rules: Vec<TranslationRule>,
26}
27
28impl TranslationRuleSet {
29    /// Creates an empty named rule set.
30    #[must_use]
31    pub fn new(name: impl Into<String>) -> Self {
32        Self {
33            name: name.into(),
34            rules: Vec::new(),
35        }
36    }
37
38    /// Human-readable rule-set name.
39    #[must_use]
40    pub fn name(&self) -> &str {
41        &self.name
42    }
43
44    /// Ordered rules in this set.
45    #[must_use]
46    pub fn rules(&self) -> &[TranslationRule] {
47        &self.rules
48    }
49
50    /// Returns a copy with one rule appended.
51    #[must_use]
52    pub fn with_rule(mut self, rule: TranslationRule) -> Self {
53        self.add_rule(rule);
54        self
55    }
56
57    /// Appends a rule to the end of the ordered rule set.
58    pub fn add_rule(&mut self, rule: TranslationRule) {
59        self.rules.push(rule);
60    }
61
62    /// Serializes this rule set through the existing canonical `LiNo` network format.
63    #[must_use]
64    pub fn to_lino(&self) -> String {
65        let mut network = LinkNetwork::new();
66        let root = network.insert_link(
67            [],
68            LinkMetadata::new()
69                .with_link_type(LinkType::Semantic)
70                .with_named(true)
71                .with_term(RULE_SET_TERM)
72                .with_definition(&self.name),
73        );
74
75        for rule in &self.rules {
76            let rule_link = network.insert_link(
77                [root],
78                LinkMetadata::new()
79                    .with_link_type(LinkType::Semantic)
80                    .with_named(true)
81                    .with_term(RULE_TERM)
82                    .with_definition(rule.name()),
83            );
84            network.insert_link(
85                [rule_link],
86                LinkMetadata::new()
87                    .with_link_type(LinkType::Semantic)
88                    .with_named(true)
89                    .with_term(MATCH_TERM)
90                    .with_definition(query_to_rule_spec(&rule.query)),
91            );
92            for (capture, reference_index) in &rule.reference_captures {
93                network.insert_link(
94                    [rule_link],
95                    LinkMetadata::new()
96                        .with_link_type(LinkType::Semantic)
97                        .with_named(true)
98                        .with_term(capture)
99                        .with_language(REFERENCE_CAPTURE_LANGUAGE)
100                        .with_definition(reference_index.to_string()),
101                );
102            }
103            for (target, template) in &rule.templates {
104                network.insert_link(
105                    [rule_link],
106                    LinkMetadata::new()
107                        .with_link_type(LinkType::Semantic)
108                        .with_named(true)
109                        .with_term(template.source())
110                        .with_language(target)
111                        .with_definition(TEMPLATE_DEFINITION),
112                );
113            }
114        }
115
116        network.to_lino()
117    }
118
119    /// Loads a rule set from `LiNo` text produced by [`TranslationRuleSet::to_lino`].
120    pub fn from_lino(text: &str) -> Result<Self, TranslationRuleSetLoadError> {
121        let network = LinkNetwork::from_lino(text)?;
122        let root = network
123            .links()
124            .find(|link| {
125                link.metadata().link_type() == Some(LinkType::Semantic)
126                    && link.metadata().term() == Some(RULE_SET_TERM)
127            })
128            .ok_or_else(|| {
129                TranslationRuleSetLoadError::Structure(
130                    "missing translation-rule-set root".to_string(),
131                )
132            })?;
133        let mut rules = Vec::new();
134        let mut rule_links = network
135            .links()
136            .filter(|link| {
137                link.references().first().copied() == Some(root.id())
138                    && link.metadata().term() == Some(RULE_TERM)
139            })
140            .collect::<Vec<_>>();
141        rule_links.sort_by_key(|link| link.id());
142
143        for rule_link in rule_links {
144            rules.push(load_rule(&network, rule_link)?);
145        }
146
147        Ok(Self {
148            name: root
149                .metadata()
150                .definition()
151                .unwrap_or(RULE_SET_TERM)
152                .to_string(),
153            rules,
154        })
155    }
156
157    /// `LiNo` text for the built-in statehood demo rule set.
158    #[must_use]
159    pub fn statehood_demo_lino() -> &'static str {
160        static LINO: OnceLock<String> = OnceLock::new();
161        LINO.get_or_init(|| statehood_demo_rule_set().to_lino())
162    }
163
164    /// Loads the statehood demo from its `LiNo` rule-set representation.
165    #[must_use]
166    pub fn statehood_demo() -> Self {
167        Self::from_lino(Self::statehood_demo_lino())
168            .expect("statehood demo translation rule set must load")
169    }
170
171    pub(crate) fn render(
172        &self,
173        network: &LinkNetwork,
174        target_language: &str,
175        configuration: ParseConfiguration,
176    ) -> Option<String> {
177        let source = network.reconstruct_text();
178        for rule in &self.rules {
179            let Some(template) = rule.template_for(target_language, configuration) else {
180                continue;
181            };
182            let rendered = network
183                .query_matches(&rule.query)
184                .into_iter()
185                .map(|query_match| template.render(network, rule, &query_match, target_language))
186                .collect::<Vec<_>>();
187
188            if !rendered.is_empty() {
189                return Some(with_source_trailing_newline(rendered.join("\n"), &source));
190            }
191        }
192
193        None
194    }
195}
196
197/// A named translation rule with one match query and target-language templates.
198#[derive(Clone, Debug, PartialEq, Eq)]
199pub struct TranslationRule {
200    name: String,
201    query: LinkQuery,
202    reference_captures: BTreeMap<String, usize>,
203    templates: BTreeMap<String, TranslationTemplate>,
204}
205
206impl TranslationRule {
207    /// Creates a named rule that matches links selected by `query`.
208    #[must_use]
209    pub fn new(name: impl Into<String>, query: LinkQuery) -> Self {
210        Self {
211            name: name.into(),
212            query,
213            reference_captures: BTreeMap::new(),
214            templates: BTreeMap::new(),
215        }
216    }
217
218    /// Rule name.
219    #[must_use]
220    pub fn name(&self) -> &str {
221        &self.name
222    }
223
224    /// Link query used by the rule.
225    #[must_use]
226    pub const fn query(&self) -> &LinkQuery {
227        &self.query
228    }
229
230    /// Reference-index captures available to templates.
231    #[must_use]
232    pub const fn reference_captures(&self) -> &BTreeMap<String, usize> {
233        &self.reference_captures
234    }
235
236    /// Target templates keyed by language or formal target.
237    #[must_use]
238    pub const fn templates(&self) -> &BTreeMap<String, TranslationTemplate> {
239        &self.templates
240    }
241
242    /// Adds a placeholder binding that captures one reference from the matched link.
243    #[must_use]
244    pub fn with_reference_capture(
245        mut self,
246        name: impl Into<String>,
247        reference_index: usize,
248    ) -> Self {
249        self.reference_captures.insert(name.into(), reference_index);
250        self
251    }
252
253    /// Adds a template for a natural language target such as `English` or `Russian`.
254    #[must_use]
255    pub fn with_template(
256        mut self,
257        target_language: impl Into<String>,
258        template: impl Into<String>,
259    ) -> Self {
260        self.templates.insert(
261            target_language.into(),
262            TranslationTemplate::new(template.into()),
263        );
264        self
265    }
266
267    /// Adds a template for a formalization level.
268    #[must_use]
269    pub fn with_formal_template(
270        mut self,
271        level: FormalizationLevel,
272        template: impl Into<String>,
273    ) -> Self {
274        self.templates.insert(
275            formal_template_target(level).to_string(),
276            TranslationTemplate::new(template.into()),
277        );
278        self
279    }
280
281    fn template_for(
282        &self,
283        target_language: &str,
284        configuration: ParseConfiguration,
285    ) -> Option<&TranslationTemplate> {
286        let level = effective_formalization_level(configuration);
287        if level != FormalizationLevel::Natural {
288            return self.templates.get(formal_template_target(level));
289        }
290
291        self.templates.get(target_language).or_else(|| {
292            canonical_reconstruction_language(target_language)
293                .and_then(|language| self.templates.get(language))
294        })
295    }
296}
297
298/// A quasiquote-style target template with `{placeholder}` substitutions.
299#[derive(Clone, Debug, PartialEq, Eq)]
300pub struct TranslationTemplate {
301    source: String,
302}
303
304impl TranslationTemplate {
305    /// Creates a template from source text.
306    #[must_use]
307    pub fn new(source: impl Into<String>) -> Self {
308        Self {
309            source: source.into(),
310        }
311    }
312
313    /// Template source text.
314    #[must_use]
315    pub fn source(&self) -> &str {
316        &self.source
317    }
318
319    fn render(
320        &self,
321        network: &LinkNetwork,
322        rule: &TranslationRule,
323        query_match: &QueryMatch,
324        target_language: &str,
325    ) -> String {
326        let mut output = String::new();
327        let mut chars = self.source.chars().peekable();
328        while let Some(character) = chars.next() {
329            match character {
330                '{' if chars.peek() == Some(&'{') => {
331                    chars.next();
332                    output.push('{');
333                }
334                '{' => {
335                    let mut placeholder = String::new();
336                    let mut closed = false;
337                    for next in chars.by_ref() {
338                        if next == '}' {
339                            closed = true;
340                            break;
341                        }
342                        placeholder.push(next);
343                    }
344                    if closed {
345                        output.push_str(&render_placeholder(
346                            network,
347                            rule,
348                            query_match,
349                            target_language,
350                            &placeholder,
351                        ));
352                    } else {
353                        output.push('{');
354                        output.push_str(&placeholder);
355                    }
356                }
357                '}' if chars.peek() == Some(&'}') => {
358                    chars.next();
359                    output.push('}');
360                }
361                other => output.push(other),
362            }
363        }
364        output
365    }
366}
367
368/// Runtime registry for selecting and replacing active translation rule sets.
369#[derive(Clone, Debug, Default, PartialEq, Eq)]
370pub struct TranslationRuleRegistry {
371    rule_sets: BTreeMap<String, TranslationRuleSet>,
372    active_rule_set: Option<String>,
373}
374
375impl TranslationRuleRegistry {
376    /// Creates an empty registry.
377    #[must_use]
378    pub fn new() -> Self {
379        Self::default()
380    }
381
382    /// Creates a registry containing the statehood demo rule set.
383    #[must_use]
384    pub fn with_statehood_demo() -> Self {
385        Self::new().with_rule_set(TranslationRuleSet::statehood_demo())
386    }
387
388    /// Returns a copy with a rule set registered.
389    #[must_use]
390    pub fn with_rule_set(mut self, rule_set: TranslationRuleSet) -> Self {
391        self.replace_rule_set(rule_set);
392        self
393    }
394
395    /// Inserts or replaces a rule set. The first registered set becomes active.
396    pub fn replace_rule_set(&mut self, rule_set: TranslationRuleSet) {
397        let name = rule_set.name().to_string();
398        if self.active_rule_set.is_none() {
399            self.active_rule_set = Some(name.clone());
400        }
401        self.rule_sets.insert(name, rule_set);
402    }
403
404    /// Selects the active rule set by name.
405    pub fn set_active_rule_set(&mut self, name: &str) -> bool {
406        if self.rule_sets.contains_key(name) {
407            self.active_rule_set = Some(name.to_string());
408            true
409        } else {
410            false
411        }
412    }
413
414    /// Returns the active rule set.
415    #[must_use]
416    pub fn active_rule_set(&self) -> Option<&TranslationRuleSet> {
417        self.active_rule_set
418            .as_deref()
419            .and_then(|name| self.rule_sets.get(name))
420    }
421
422    /// Looks up a rule set by name.
423    #[must_use]
424    pub fn rule_set(&self, name: &str) -> Option<&TranslationRuleSet> {
425        self.rule_sets.get(name)
426    }
427
428    /// Number of registered rule sets.
429    #[must_use]
430    pub fn len(&self) -> usize {
431        self.rule_sets.len()
432    }
433
434    /// Whether the registry has no rule sets.
435    #[must_use]
436    pub fn is_empty(&self) -> bool {
437        self.rule_sets.is_empty()
438    }
439}
440
441/// Error returned when a rule set cannot be loaded from `LiNo`.
442#[derive(Debug, Clone, PartialEq, Eq)]
443pub enum TranslationRuleSetLoadError {
444    /// The underlying network `LiNo` failed to load.
445    Lino(LinoSerializationError),
446    /// The loaded network does not match the translation rule schema.
447    Structure(String),
448    /// A persisted `LinkQuery` failed to parse.
449    Query(QueryParseError),
450}
451
452impl fmt::Display for TranslationRuleSetLoadError {
453    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
454        match self {
455            Self::Lino(error) => write!(formatter, "{error}"),
456            Self::Structure(message) => {
457                write!(formatter, "translation rule structure error: {message}")
458            }
459            Self::Query(error) => write!(formatter, "translation rule query error: {error}"),
460        }
461    }
462}
463
464impl Error for TranslationRuleSetLoadError {}
465
466impl From<LinoSerializationError> for TranslationRuleSetLoadError {
467    fn from(error: LinoSerializationError) -> Self {
468        Self::Lino(error)
469    }
470}
471
472impl From<QueryParseError> for TranslationRuleSetLoadError {
473    fn from(error: QueryParseError) -> Self {
474        Self::Query(error)
475    }
476}
477
478fn load_rule(
479    network: &LinkNetwork,
480    rule_link: &Link,
481) -> Result<TranslationRule, TranslationRuleSetLoadError> {
482    let name = rule_link.metadata().definition().ok_or_else(|| {
483        TranslationRuleSetLoadError::Structure("rule is missing a name".to_string())
484    })?;
485    let query_source = network
486        .links()
487        .find(|link| {
488            link.references().first().copied() == Some(rule_link.id())
489                && link.metadata().term() == Some(MATCH_TERM)
490        })
491        .and_then(|link| link.metadata().definition())
492        .ok_or_else(|| {
493            TranslationRuleSetLoadError::Structure("rule is missing a match query".to_string())
494        })?;
495    let mut rule = TranslationRule::new(name, query_from_rule_spec(query_source)?);
496    let mut children = network
497        .links()
498        .filter(|link| link.references().first().copied() == Some(rule_link.id()))
499        .collect::<Vec<_>>();
500    children.sort_by_key(|link| link.id());
501
502    for child in children {
503        let metadata = child.metadata();
504        if metadata.term() == Some(MATCH_TERM) {
505            continue;
506        }
507        if metadata.language() == Some(REFERENCE_CAPTURE_LANGUAGE) {
508            let capture = metadata.term().ok_or_else(|| {
509                TranslationRuleSetLoadError::Structure(
510                    "reference capture is missing a capture name".to_string(),
511                )
512            })?;
513            let index = metadata
514                .definition()
515                .ok_or_else(|| {
516                    TranslationRuleSetLoadError::Structure(
517                        "reference capture is missing an index".to_string(),
518                    )
519                })?
520                .parse::<usize>()
521                .map_err(|error| {
522                    TranslationRuleSetLoadError::Structure(format!(
523                        "invalid reference capture index: {error}"
524                    ))
525                })?;
526            rule = rule.with_reference_capture(capture, index);
527        } else if metadata.definition() == Some(TEMPLATE_DEFINITION) {
528            let target = metadata.language().ok_or_else(|| {
529                TranslationRuleSetLoadError::Structure("template is missing a target".to_string())
530            })?;
531            let template = metadata.term().ok_or_else(|| {
532                TranslationRuleSetLoadError::Structure(
533                    "template is missing source text".to_string(),
534                )
535            })?;
536            rule = rule.with_template(target, template);
537        }
538    }
539
540    Ok(rule)
541}
542
543fn query_to_rule_spec(query: &LinkQuery) -> String {
544    let mut object = serde_json::Map::new();
545    if let Some(link_type) = query.link_type_filter() {
546        object.insert("link_type".to_string(), link_type.to_string().into());
547    }
548    if let Some(term) = query.term_filter() {
549        object.insert("term".to_string(), term.into());
550    }
551    if let Some(language) = query.language_filter() {
552        object.insert("language".to_string(), language.into());
553    }
554    if let Some(named) = query.named_filter() {
555        object.insert("named".to_string(), named.into());
556    }
557    if let Some(pattern_source) = query.pattern_source() {
558        object.insert("sexpression".to_string(), pattern_source.into());
559    }
560
561    serde_json::Value::Object(object).to_string()
562}
563
564fn query_from_rule_spec(source: &str) -> Result<LinkQuery, QueryParseError> {
565    let value = serde_json::from_str::<serde_json::Value>(source)
566        .map_err(|error| QueryParseError::new(format!("invalid query spec: {error}")))?;
567    let object = value
568        .as_object()
569        .ok_or_else(|| QueryParseError::new("query spec must be a JSON object"))?;
570
571    let mut query =
572        if let Some(sexpression) = object.get("sexpression").and_then(|value| value.as_str()) {
573            LinkQuery::from_sexpression(sexpression)?
574        } else {
575            LinkQuery::new()
576        };
577
578    if let Some(link_type) = object.get("link_type").and_then(|value| value.as_str()) {
579        query = query.with_link_type(parse_query_link_type(link_type)?);
580    }
581    if let Some(term) = object.get("term").and_then(|value| value.as_str()) {
582        query = query.with_term(term);
583    }
584    if let Some(language) = object.get("language").and_then(|value| value.as_str()) {
585        query = query.with_language(language);
586    }
587    if let Some(named) = object.get("named").and_then(serde_json::Value::as_bool) {
588        query = query.with_named(named);
589    }
590
591    Ok(query)
592}
593
594fn parse_query_link_type(token: &str) -> Result<LinkType, QueryParseError> {
595    Ok(match token {
596        "link" => LinkType::Link,
597        "reference" => LinkType::Reference,
598        "relation" => LinkType::Relation,
599        "language" => LinkType::Language,
600        "grammar" => LinkType::Grammar,
601        "type" => LinkType::Type,
602        "concept" => LinkType::Concept,
603        "syntax" => LinkType::Syntax,
604        "field" => LinkType::Field,
605        "trivia" => LinkType::Trivia,
606        "token" => LinkType::Token,
607        "document" => LinkType::Document,
608        "semantic" => LinkType::Semantic,
609        "region" => LinkType::Region,
610        "object" => LinkType::Object,
611        other => {
612            return Err(QueryParseError::new(format!(
613                "unknown query link type `{other}`"
614            )))
615        }
616    })
617}
618
619fn render_placeholder(
620    network: &LinkNetwork,
621    rule: &TranslationRule,
622    query_match: &QueryMatch,
623    target_language: &str,
624    placeholder: &str,
625) -> String {
626    let (name, mode) = placeholder.split_once(':').map_or_else(
627        || (placeholder.trim(), "language"),
628        |(name, mode)| (name.trim(), mode.trim()),
629    );
630    let Some(link_id) = placeholder_link(network, rule, query_match, name) else {
631        return format!("{{{placeholder}}}");
632    };
633
634    render_link(network, link_id, target_language, mode)
635}
636
637fn placeholder_link(
638    network: &LinkNetwork,
639    rule: &TranslationRule,
640    query_match: &QueryMatch,
641    name: &str,
642) -> Option<LinkId> {
643    if let Some(link_id) = query_match.captures().first(name) {
644        return Some(link_id);
645    }
646
647    let reference_index = *rule.reference_captures.get(name)?;
648    network
649        .link(query_match.link_id())?
650        .references()
651        .get(reference_index)
652        .copied()
653}
654
655fn render_link(
656    network: &LinkNetwork,
657    link_id: LinkId,
658    target_language: &str,
659    mode: &str,
660) -> String {
661    let Some(link) = network.link(link_id) else {
662        return link_id.to_string();
663    };
664    let concept = concept_id_for_link(network, link);
665    match mode {
666        "concept" => concept
667            .or_else(|| link.metadata().term())
668            .map_or_else(|| link_id.to_string(), str::to_string),
669        "term" => link
670            .metadata()
671            .term()
672            .map_or_else(|| link_id.to_string(), str::to_string),
673        _ => concept
674            .and_then(|concept| reconstruct_concept_for_language(network, concept, target_language))
675            .or_else(|| link.metadata().term())
676            .map_or_else(|| link_id.to_string(), str::to_string),
677    }
678}
679
680fn concept_id_for_link<'a>(network: &'a LinkNetwork, link: &'a Link) -> Option<&'a str> {
681    if link.metadata().link_type() == Some(LinkType::Concept) {
682        return link.metadata().term();
683    }
684    let first_reference = link.references().first().copied()?;
685    let concept = network.link(first_reference)?;
686    (concept.metadata().link_type() == Some(LinkType::Concept))
687        .then(|| concept.metadata().term())
688        .flatten()
689}
690
691fn reconstruct_concept_for_language<'a>(
692    network: &'a LinkNetwork,
693    concept: &str,
694    language: &str,
695) -> Option<&'a str> {
696    network.reconstruct_concept(concept, language).or_else(|| {
697        canonical_reconstruction_language(language)
698            .and_then(|canonical| network.reconstruct_concept(concept, canonical))
699    })
700}
701
702fn statehood_demo_rule_set() -> TranslationRuleSet {
703    TranslationRuleSet::new("statehood-demo").with_rule(
704        TranslationRule::new(
705            "statehood proposition",
706            LinkQuery::by_type(LinkType::Semantic).with_term("proposition:statehood"),
707        )
708        .with_reference_capture("subject", 2)
709        .with_reference_capture("object", 3)
710        .with_template("English", "{subject} is a {object}.")
711        .with_template("en", "{subject} is a {object}.")
712        .with_template("Russian", "{subject} это {object}.")
713        .with_template("ru", "{subject} это {object}.")
714        .with_formal_template(
715            FormalizationLevel::Lexical,
716            "statehood({subject}, {object})",
717        )
718        .with_formal_template(
719            FormalizationLevel::Concept,
720            [
721                "statehood(",
722                "{subject:concept}",
723                ", ",
724                "{object:concept}",
725                ")",
726            ]
727            .concat(),
728        )
729        .with_formal_template(
730            FormalizationLevel::Logical,
731            [
732                "(proposition: statehood (subject: ",
733                "{subject:concept}",
734                ") (object: ",
735                "{object:concept}",
736                ") (truth: true))",
737            ]
738            .concat(),
739        ),
740    )
741}
742
743const fn effective_formalization_level(configuration: ParseConfiguration) -> FormalizationLevel {
744    match (
745        configuration.naturalization_direction(),
746        configuration.formalization_level(),
747    ) {
748        (NaturalizationDirection::Formalize, FormalizationLevel::Natural) => {
749            FormalizationLevel::Lexical
750        }
751        (_, level) => level,
752    }
753}
754
755const fn formal_template_target(level: FormalizationLevel) -> &'static str {
756    match level {
757        FormalizationLevel::Natural => "",
758        FormalizationLevel::Lexical => FORMAL_LEXICAL_TARGET,
759        FormalizationLevel::Concept => FORMAL_CONCEPT_TARGET,
760        FormalizationLevel::Logical => FORMAL_LOGICAL_TARGET,
761    }
762}
763
764fn canonical_reconstruction_language(language: &str) -> Option<&'static str> {
765    match language.to_ascii_lowercase().as_str() {
766        "english" | "en" => Some("English"),
767        "russian" | "ru" => Some("Russian"),
768        _ => None,
769    }
770}
771
772fn with_source_trailing_newline(mut body: String, source: &str) -> String {
773    if source.ends_with('\n') {
774        body.push('\n');
775    }
776    body
777}