Skip to main content

meta_language/
reconstruction.rs

1use crate::{
2    FormalizationLevel, LinkNetwork, LinkType, NaturalizationDirection, ParseConfiguration,
3    TranslationRuleRegistry, TranslationRuleSet,
4};
5
6impl LinkNetwork {
7    /// Reconstructs text for a target language or formalization level.
8    ///
9    /// Natural same-language reconstruction returns the original byte-exact
10    /// token stream. When semantic proposition links are available, target
11    /// natural-language text and configured formal representations are rendered
12    /// through the shared concept mappings.
13    #[must_use]
14    pub fn reconstruct_text_as(
15        &self,
16        target_language: &str,
17        configuration: ParseConfiguration,
18    ) -> String {
19        let source = self.reconstruct_text();
20        if let Some(target_format) =
21            crate::document_formatting::canonical_document_format(target_language)
22        {
23            return self.reconstruct_as_document_format(target_format, source);
24        }
25
26        self.reconstruct_text_as_with_rules(
27            target_language,
28            configuration,
29            &TranslationRuleSet::statehood_demo(),
30        )
31    }
32
33    /// Reconstructs text using a caller-supplied translation rule set.
34    #[must_use]
35    pub fn reconstruct_text_as_with_rules(
36        &self,
37        target_language: &str,
38        configuration: ParseConfiguration,
39        rule_set: &TranslationRuleSet,
40    ) -> String {
41        let source = self.reconstruct_text();
42        if self.is_document_language(target_language)
43            && configuration.formalization_level() == FormalizationLevel::Natural
44            && configuration.naturalization_direction() == NaturalizationDirection::Naturalize
45        {
46            return source;
47        }
48
49        rule_set
50            .render(self, target_language, configuration)
51            .unwrap_or(source)
52    }
53
54    /// Reconstructs text through the active rule set in a registry.
55    #[must_use]
56    pub fn reconstruct_text_as_with_registry(
57        &self,
58        target_language: &str,
59        configuration: ParseConfiguration,
60        registry: &TranslationRuleRegistry,
61    ) -> String {
62        registry.active_rule_set().map_or_else(
63            || self.reconstruct_text(),
64            |rule_set| {
65                self.reconstruct_text_as_with_rules(target_language, configuration, rule_set)
66            },
67        )
68    }
69
70    /// Reconstructs text and records diagnostic links when no rule can render it.
71    pub fn reconstruct_text_as_with_rules_mut(
72        &mut self,
73        target_language: &str,
74        configuration: ParseConfiguration,
75        rule_set: &TranslationRuleSet,
76    ) -> String {
77        let source = self.reconstruct_text();
78        if self.is_document_language(target_language)
79            && configuration.formalization_level() == FormalizationLevel::Natural
80            && configuration.naturalization_direction() == NaturalizationDirection::Naturalize
81        {
82            return source;
83        }
84
85        if let Some(rendered) = rule_set.render(self, target_language, configuration) {
86            return rendered;
87        }
88
89        self.insert_missing_translation_diagnostics(target_language);
90        source
91    }
92
93    fn insert_missing_translation_diagnostics(&mut self, target_language: &str) {
94        let unmatched = self
95            .links()
96            .filter(|link| {
97                link.metadata().link_type() == Some(LinkType::Semantic)
98                    && !link
99                        .metadata()
100                        .term()
101                        .is_some_and(|term| term.starts_with("translation-rule:"))
102            })
103            .map(|link| {
104                (
105                    link.id(),
106                    link.metadata()
107                        .term()
108                        .unwrap_or("semantic link")
109                        .to_string(),
110                )
111            })
112            .collect::<Vec<_>>();
113
114        for (link_id, term) in unmatched {
115            if self.has_missing_translation_diagnostic(link_id, target_language) {
116                continue;
117            }
118            self.insert_link(
119                [link_id],
120                crate::LinkMetadata::new()
121                    .with_link_type(LinkType::Semantic)
122                    .with_named(true)
123                    .with_term("translation-rule:missing")
124                    .with_language(target_language)
125                    .with_definition(format!(
126                        "Missing translation rule for `{term}` targeting `{target_language}`."
127                    )),
128            );
129        }
130    }
131
132    fn has_missing_translation_diagnostic(
133        &self,
134        link_id: crate::LinkId,
135        target_language: &str,
136    ) -> bool {
137        self.links().any(|link| {
138            link.references() == [link_id]
139                && link.metadata().link_type() == Some(LinkType::Semantic)
140                && link.metadata().term() == Some("translation-rule:missing")
141                && link.metadata().language() == Some(target_language)
142        })
143    }
144
145    /// Renders the network's document as a structurally equivalent document in a
146    /// target format (`txt`, `Markdown`, `HTML`, `PDF`, or `DOCX`).
147    ///
148    /// The source document is recovered through the shared, language-free
149    /// formatting concept layer (issue #83): a same-format target re-renders the
150    /// byte-exact source, while a cross-format target is translated into an
151    /// equivalent document carrying the same heading/paragraph/list and
152    /// bold/italic/link structure. Concepts the target cannot represent degrade
153    /// through the documented per-format fallbacks
154    /// (see [`crate::document_format_profile`]). When no document structure is
155    /// recoverable the byte-exact `source` is returned unchanged.
156    fn reconstruct_as_document_format(&self, target_format: &str, source: String) -> String {
157        let Some(source_language) = self.document_source_language() else {
158            return source;
159        };
160        let Some(source_format) =
161            crate::document_formatting::canonical_document_format(&source_language)
162        else {
163            return source;
164        };
165        if source_format == target_format {
166            return source;
167        }
168
169        let Some(document) =
170            crate::document_formatting::parse_markup_document(source_format, &source)
171        else {
172            return source;
173        };
174        if document.blocks.is_empty() {
175            return source;
176        }
177        self.render_markup_document(target_format, &document)
178    }
179
180    /// The language recorded on the network's document root, if any.
181    fn document_source_language(&self) -> Option<String> {
182        self.links()
183            .find(|link| link.metadata().link_type() == Some(LinkType::Document))
184            .and_then(|link| link.metadata().language())
185            .map(ToString::to_string)
186    }
187
188    fn is_document_language(&self, target_language: &str) -> bool {
189        self.links().any(|link| {
190            link.metadata().link_type() == Some(LinkType::Document)
191                && languages_match(link.metadata().language(), target_language)
192        })
193    }
194}
195
196fn canonical_reconstruction_language(language: &str) -> Option<&'static str> {
197    match language.to_ascii_lowercase().as_str() {
198        "english" | "en" => Some("English"),
199        "russian" | "ru" => Some("Russian"),
200        _ => None,
201    }
202}
203
204fn languages_match(source_language: Option<&str>, target_language: &str) -> bool {
205    let Some(source_language) = source_language else {
206        return false;
207    };
208
209    source_language == target_language
210        || canonical_reconstruction_language(source_language)
211            .zip(canonical_reconstruction_language(target_language))
212            .is_some_and(|(source, target)| source == target)
213}