Skip to main content

meta_language/
lino_serialization.rs

1//! Lossless serialization of a [`LinkNetwork`] to and from links-notation text.
2//!
3//! [`LinkNetwork::to_lino`] projects every link in the network onto a single
4//! canonical links-notation statement, keyed by the link's numeric identifier
5//! (a doublets-style id discipline shared by text and future binary storage).
6//! [`LinkNetwork::from_lino`] reconstructs the exact same network from that
7//! text. The pair forms a round-trip: `from_lino(to_lino(n))` is isomorphic to
8//! `n` for any network, covering references, names, types, terms, definitions,
9//! languages, source spans, parse flags, and term registration.
10//!
11//! The emitted dialect is plain links-notation accepted by the
12//! [`links_notation`] 0.13 crate, so other ecosystem parsers can consume the
13//! output. Each statement has the shape:
14//!
15//! ```text
16//! (<id>: <ref> ... (meta: (t: <type>) (n: <0|1>) (term: <pct>) ...))
17//! ```
18//!
19//! where references are decimal link ids and the trailing `meta` sublink
20//! carries metadata. String payloads (`term`, `def`, `lang`) are
21//! percent-encoded so they always form a single escape-free reference token,
22//! sidestepping the crate's quote-escaping edge cases. The `meta` keys are
23//! non-numeric, so they never collide with numeric reference ids, and the
24//! references (`Ref` nodes) are structurally distinct from the `meta` sublink
25//! (a `Link` node) in the parsed AST.
26//!
27//! This is distinct from [`LinkNetwork::parse`] with the `"LiNo"` language,
28//! which interprets human-authored links-notation into a fresh semantic
29//! network. `to_lino`/`from_lino` are an exact serialization pair for an
30//! already-built network.
31
32use std::error::Error;
33use std::fmt;
34use std::fmt::Write as _;
35use std::sync::Arc;
36
37use links_notation::{parse_lino_to_links, LiNo};
38
39use crate::link_flags::LinkFlags;
40use crate::link_network::{Link, LinkId, LinkMetadata, LinkNetwork, LinkType};
41use crate::source::{ByteRange, Point, SourceSpan};
42
43/// Error returned when [`LinkNetwork::from_lino`] cannot reconstruct a network.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum LinoSerializationError {
46    /// The text could not be parsed as links-notation.
47    Parse(String),
48    /// The text parsed but did not match the serialization schema.
49    Structure(String),
50}
51
52impl fmt::Display for LinoSerializationError {
53    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::Parse(message) => write!(formatter, "links-notation parse error: {message}"),
56            Self::Structure(message) => {
57                write!(formatter, "serialization structure error: {message}")
58            }
59        }
60    }
61}
62
63impl Error for LinoSerializationError {}
64
65impl LinkNetwork {
66    /// Serializes the entire network to canonical links-notation text.
67    ///
68    /// Every link becomes one statement keyed by its numeric id. The output is
69    /// accepted by the [`links_notation`] crate parser and round-trips back
70    /// through [`LinkNetwork::from_lino`].
71    #[must_use]
72    pub fn to_lino(&self) -> String {
73        let registered: std::collections::BTreeSet<u64> =
74            self.terms.values().map(|id| id.0).collect();
75        let mut output = String::new();
76        for link in self.links.values() {
77            encode_link(link, registered.contains(&link.id.0), &mut output);
78            output.push('\n');
79        }
80        output
81    }
82
83    /// Reconstructs a network from text produced by [`LinkNetwork::to_lino`].
84    ///
85    /// # Errors
86    ///
87    /// Returns [`LinoSerializationError`] when the text is not valid
88    /// links-notation or does not match the serialization schema.
89    pub fn from_lino(text: &str) -> Result<Self, LinoSerializationError> {
90        let statements = parse_lino_to_links(text)
91            .map_err(|error| LinoSerializationError::Parse(error.to_string()))?;
92        let mut network = Self::new();
93        for statement in &statements {
94            let LiNo::Link {
95                id: Some(id),
96                values,
97            } = statement
98            else {
99                return Err(LinoSerializationError::Structure(
100                    "top-level statement must be an identified link".to_string(),
101                ));
102            };
103            let link_id = LinkId(parse_u64(id)?);
104            let mut references = Vec::new();
105            let mut meta_values: Option<&Vec<LiNo<String>>> = None;
106            for value in values {
107                match value {
108                    LiNo::Ref(reference) => references.push(LinkId(parse_u64(reference)?)),
109                    LiNo::Link {
110                        id: Some(key),
111                        values: fields,
112                    } if key == "meta" => meta_values = Some(fields),
113                    LiNo::Link { .. } => {
114                        return Err(LinoSerializationError::Structure(
115                            "statement values must be references or a meta sublink".to_string(),
116                        ))
117                    }
118                }
119            }
120            let meta_values = meta_values.ok_or_else(|| {
121                LinoSerializationError::Structure(
122                    "statement is missing its meta sublink".to_string(),
123                )
124            })?;
125            let (metadata, registered) = decode_meta(meta_values)?;
126            if registered {
127                if let Some(term) = metadata.term() {
128                    network.terms.insert(Arc::from(term), link_id);
129                }
130            }
131            network.next_id = network.next_id.max(link_id.0 + 1);
132            network.links.insert(
133                link_id,
134                Arc::new(Link {
135                    id: link_id,
136                    references: Arc::from(references),
137                    metadata,
138                }),
139            );
140        }
141        Ok(network)
142    }
143}
144
145/// Writes one `(<id>: <refs> (meta: ...))` statement into `output`.
146fn encode_link(link: &Link, registered: bool, output: &mut String) {
147    write!(output, "({}:", link.id.0).expect("writing to a String never fails");
148    for reference in link.references.iter() {
149        write!(output, " {}", reference.0).expect("writing to a String never fails");
150    }
151    output.push_str(" (meta:");
152    let metadata = &link.metadata;
153    if let Some(link_type) = metadata.link_type() {
154        write!(output, " (t: {link_type})").expect("writing to a String never fails");
155    }
156    write!(output, " (n: {})", u8::from(metadata.is_named()))
157        .expect("writing to a String never fails");
158    if let Some(term) = metadata.term() {
159        write!(output, " (term: {})", percent_encode(term))
160            .expect("writing to a String never fails");
161    }
162    if let Some(definition) = metadata.definition() {
163        write!(output, " (def: {})", percent_encode(definition))
164            .expect("writing to a String never fails");
165    }
166    if let Some(language) = metadata.language() {
167        write!(output, " (lang: {})", percent_encode(language))
168            .expect("writing to a String never fails");
169    }
170    if let Some(span) = metadata.span() {
171        let byte_range = span.byte_range();
172        let start = span.start_point();
173        let end = span.end_point();
174        write!(
175            output,
176            " (span: {} {} {} {} {} {})",
177            byte_range.start(),
178            byte_range.end(),
179            start.row(),
180            start.column(),
181            end.row(),
182            end.column(),
183        )
184        .expect("writing to a String never fails");
185    }
186    let bits = flag_bits(metadata.flags());
187    if bits != 0 {
188        write!(output, " (flags: {bits})").expect("writing to a String never fails");
189    }
190    if registered {
191        output.push_str(" (reg: 1)");
192    }
193    output.push_str("))");
194}
195
196/// Decodes a `meta` sublink's fields into metadata and a registration flag.
197fn decode_meta(fields: &[LiNo<String>]) -> Result<(LinkMetadata, bool), LinoSerializationError> {
198    let mut metadata = LinkMetadata::new();
199    let mut registered = false;
200    let mut flag_bits = 0u8;
201    for field in fields {
202        let LiNo::Link {
203            id: Some(key),
204            values,
205        } = field
206        else {
207            return Err(LinoSerializationError::Structure(
208                "meta field must be an identified link".to_string(),
209            ));
210        };
211        match key.as_str() {
212            "t" => metadata = metadata.with_link_type(parse_link_type(single_ref(values)?)?),
213            "n" => metadata = metadata.with_named(single_ref(values)? == "1"),
214            "term" => metadata = metadata.with_term(percent_decode(single_ref(values)?)?),
215            "def" => metadata = metadata.with_definition(percent_decode(single_ref(values)?)?),
216            "lang" => metadata = metadata.with_language(percent_decode(single_ref(values)?)?),
217            "span" => metadata = metadata.with_span(parse_span(values)?),
218            "flags" => flag_bits = parse_u8(single_ref(values)?)?,
219            "reg" => registered = true,
220            other => {
221                return Err(LinoSerializationError::Structure(format!(
222                    "unknown meta field `{other}`"
223                )))
224            }
225        }
226    }
227    if flag_bits != 0 {
228        let mut flags = LinkFlags::clean();
229        if flag_bits & 0b0001 != 0 {
230            flags = flags.with_error();
231        }
232        if flag_bits & 0b0010 != 0 {
233            flags = flags.with_containing_error();
234        }
235        if flag_bits & 0b0100 != 0 {
236            flags = flags.with_missing();
237        }
238        if flag_bits & 0b1000 != 0 {
239            flags = flags.with_extra();
240        }
241        metadata = metadata.with_flags(flags);
242    }
243    Ok((metadata, registered))
244}
245
246/// Packs the four parse-status bits into a single byte.
247fn flag_bits(flags: LinkFlags) -> u8 {
248    u8::from(flags.is_error())
249        | (u8::from(flags.has_error()) << 1)
250        | (u8::from(flags.is_missing()) << 2)
251        | (u8::from(flags.is_extra()) << 3)
252}
253
254/// Builds a [`SourceSpan`] from the six decimal values of a `span` field.
255fn parse_span(values: &[LiNo<String>]) -> Result<SourceSpan, LinoSerializationError> {
256    if values.len() != 6 {
257        return Err(LinoSerializationError::Structure(
258            "span field requires six numbers".to_string(),
259        ));
260    }
261    let mut numbers = [0usize; 6];
262    for (slot, value) in numbers.iter_mut().zip(values) {
263        let LiNo::Ref(reference) = value else {
264            return Err(LinoSerializationError::Structure(
265                "span field values must be numbers".to_string(),
266            ));
267        };
268        *slot = reference.parse().map_err(|_| {
269            LinoSerializationError::Structure(format!("invalid span number `{reference}`"))
270        })?;
271    }
272    Ok(SourceSpan::new(
273        ByteRange::new(numbers[0], numbers[1]),
274        Point::new(numbers[2], numbers[3]),
275        Point::new(numbers[4], numbers[5]),
276    ))
277}
278
279/// Returns the single reference held by a one-value meta field.
280fn single_ref(values: &[LiNo<String>]) -> Result<&str, LinoSerializationError> {
281    match values {
282        [LiNo::Ref(reference)] => Ok(reference),
283        _ => Err(LinoSerializationError::Structure(
284            "meta field must hold exactly one reference".to_string(),
285        )),
286    }
287}
288
289/// Maps a link-type token back to its [`LinkType`] variant.
290fn parse_link_type(token: &str) -> Result<LinkType, LinoSerializationError> {
291    Ok(match token {
292        "link" => LinkType::Link,
293        "reference" => LinkType::Reference,
294        "relation" => LinkType::Relation,
295        "language" => LinkType::Language,
296        "grammar" => LinkType::Grammar,
297        "type" => LinkType::Type,
298        "concept" => LinkType::Concept,
299        "syntax" => LinkType::Syntax,
300        "field" => LinkType::Field,
301        "trivia" => LinkType::Trivia,
302        "token" => LinkType::Token,
303        "document" => LinkType::Document,
304        "semantic" => LinkType::Semantic,
305        "region" => LinkType::Region,
306        "object" => LinkType::Object,
307        other => {
308            return Err(LinoSerializationError::Structure(format!(
309                "unknown link type `{other}`"
310            )))
311        }
312    })
313}
314
315fn parse_u64(value: &str) -> Result<u64, LinoSerializationError> {
316    value
317        .parse()
318        .map_err(|_| LinoSerializationError::Structure(format!("invalid link id `{value}`")))
319}
320
321fn parse_u8(value: &str) -> Result<u8, LinoSerializationError> {
322    value
323        .parse()
324        .map_err(|_| LinoSerializationError::Structure(format!("invalid flags value `{value}`")))
325}
326
327/// Percent-encodes a string into a single escape-free links-notation token.
328///
329/// The empty string maps to the sentinel `%`, which never results from
330/// encoding a non-empty string (a literal `%` byte becomes `%25`).
331fn percent_encode(value: &str) -> String {
332    if value.is_empty() {
333        return "%".to_string();
334    }
335    let mut encoded = String::with_capacity(value.len());
336    for &byte in value.as_bytes() {
337        if byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.') {
338            encoded.push(byte as char);
339        } else {
340            write!(encoded, "%{byte:02X}").expect("writing to a String never fails");
341        }
342    }
343    encoded
344}
345
346/// Reverses [`percent_encode`].
347fn percent_decode(value: &str) -> Result<String, LinoSerializationError> {
348    if value == "%" {
349        return Ok(String::new());
350    }
351    let bytes = value.as_bytes();
352    let mut decoded = Vec::with_capacity(bytes.len());
353    let mut index = 0;
354    while index < bytes.len() {
355        if bytes[index] == b'%' {
356            if index + 2 >= bytes.len() {
357                return Err(LinoSerializationError::Structure(
358                    "truncated percent escape".to_string(),
359                ));
360            }
361            let high = hex_value(bytes[index + 1])?;
362            let low = hex_value(bytes[index + 2])?;
363            decoded.push((high << 4) | low);
364            index += 3;
365        } else {
366            decoded.push(bytes[index]);
367            index += 1;
368        }
369    }
370    String::from_utf8(decoded).map_err(|_| {
371        LinoSerializationError::Structure("percent escape is not valid UTF-8".to_string())
372    })
373}
374
375fn hex_value(byte: u8) -> Result<u8, LinoSerializationError> {
376    match byte {
377        b'0'..=b'9' => Ok(byte - b'0'),
378        b'a'..=b'f' => Ok(byte - b'a' + 10),
379        b'A'..=b'F' => Ok(byte - b'A' + 10),
380        _ => Err(LinoSerializationError::Structure(
381            "invalid percent escape digit".to_string(),
382        )),
383    }
384}