1use std::error::Error;
33use std::fmt;
34use std::fmt::Write as _;
35use std::sync::Arc;
36
37use links_notation::{parse_lino_to_links, LiNo};
38
39use crate::link_flags::LinkFlags;
40use crate::link_network::{Link, LinkId, LinkMetadata, LinkNetwork, LinkType};
41use crate::source::{ByteRange, Point, SourceSpan};
42
43#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum LinoSerializationError {
46 Parse(String),
48 Structure(String),
50}
51
52impl fmt::Display for LinoSerializationError {
53 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
54 match self {
55 Self::Parse(message) => write!(formatter, "links-notation parse error: {message}"),
56 Self::Structure(message) => {
57 write!(formatter, "serialization structure error: {message}")
58 }
59 }
60 }
61}
62
63impl Error for LinoSerializationError {}
64
65impl LinkNetwork {
66 #[must_use]
72 pub fn to_lino(&self) -> String {
73 let registered: std::collections::BTreeSet<u64> =
74 self.terms.values().map(|id| id.0).collect();
75 let mut output = String::new();
76 for link in self.links.values() {
77 encode_link(link, registered.contains(&link.id.0), &mut output);
78 output.push('\n');
79 }
80 output
81 }
82
83 pub fn from_lino(text: &str) -> Result<Self, LinoSerializationError> {
90 let statements = parse_lino_to_links(text)
91 .map_err(|error| LinoSerializationError::Parse(error.to_string()))?;
92 let mut network = Self::new();
93 for statement in &statements {
94 let LiNo::Link {
95 id: Some(id),
96 values,
97 } = statement
98 else {
99 return Err(LinoSerializationError::Structure(
100 "top-level statement must be an identified link".to_string(),
101 ));
102 };
103 let link_id = LinkId(parse_u64(id)?);
104 let mut references = Vec::new();
105 let mut meta_values: Option<&Vec<LiNo<String>>> = None;
106 for value in values {
107 match value {
108 LiNo::Ref(reference) => references.push(LinkId(parse_u64(reference)?)),
109 LiNo::Link {
110 id: Some(key),
111 values: fields,
112 } if key == "meta" => meta_values = Some(fields),
113 LiNo::Link { .. } => {
114 return Err(LinoSerializationError::Structure(
115 "statement values must be references or a meta sublink".to_string(),
116 ))
117 }
118 }
119 }
120 let meta_values = meta_values.ok_or_else(|| {
121 LinoSerializationError::Structure(
122 "statement is missing its meta sublink".to_string(),
123 )
124 })?;
125 let (metadata, registered) = decode_meta(meta_values)?;
126 if registered {
127 if let Some(term) = metadata.term() {
128 network.terms.insert(Arc::from(term), link_id);
129 }
130 }
131 network.next_id = network.next_id.max(link_id.0 + 1);
132 network.links.insert(
133 link_id,
134 Arc::new(Link {
135 id: link_id,
136 references: Arc::from(references),
137 metadata,
138 }),
139 );
140 }
141 Ok(network)
142 }
143}
144
145fn encode_link(link: &Link, registered: bool, output: &mut String) {
147 write!(output, "({}:", link.id.0).expect("writing to a String never fails");
148 for reference in link.references.iter() {
149 write!(output, " {}", reference.0).expect("writing to a String never fails");
150 }
151 output.push_str(" (meta:");
152 let metadata = &link.metadata;
153 if let Some(link_type) = metadata.link_type() {
154 write!(output, " (t: {link_type})").expect("writing to a String never fails");
155 }
156 write!(output, " (n: {})", u8::from(metadata.is_named()))
157 .expect("writing to a String never fails");
158 if let Some(term) = metadata.term() {
159 write!(output, " (term: {})", percent_encode(term))
160 .expect("writing to a String never fails");
161 }
162 if let Some(definition) = metadata.definition() {
163 write!(output, " (def: {})", percent_encode(definition))
164 .expect("writing to a String never fails");
165 }
166 if let Some(language) = metadata.language() {
167 write!(output, " (lang: {})", percent_encode(language))
168 .expect("writing to a String never fails");
169 }
170 if let Some(span) = metadata.span() {
171 let byte_range = span.byte_range();
172 let start = span.start_point();
173 let end = span.end_point();
174 write!(
175 output,
176 " (span: {} {} {} {} {} {})",
177 byte_range.start(),
178 byte_range.end(),
179 start.row(),
180 start.column(),
181 end.row(),
182 end.column(),
183 )
184 .expect("writing to a String never fails");
185 }
186 let bits = flag_bits(metadata.flags());
187 if bits != 0 {
188 write!(output, " (flags: {bits})").expect("writing to a String never fails");
189 }
190 if registered {
191 output.push_str(" (reg: 1)");
192 }
193 output.push_str("))");
194}
195
196fn decode_meta(fields: &[LiNo<String>]) -> Result<(LinkMetadata, bool), LinoSerializationError> {
198 let mut metadata = LinkMetadata::new();
199 let mut registered = false;
200 let mut flag_bits = 0u8;
201 for field in fields {
202 let LiNo::Link {
203 id: Some(key),
204 values,
205 } = field
206 else {
207 return Err(LinoSerializationError::Structure(
208 "meta field must be an identified link".to_string(),
209 ));
210 };
211 match key.as_str() {
212 "t" => metadata = metadata.with_link_type(parse_link_type(single_ref(values)?)?),
213 "n" => metadata = metadata.with_named(single_ref(values)? == "1"),
214 "term" => metadata = metadata.with_term(percent_decode(single_ref(values)?)?),
215 "def" => metadata = metadata.with_definition(percent_decode(single_ref(values)?)?),
216 "lang" => metadata = metadata.with_language(percent_decode(single_ref(values)?)?),
217 "span" => metadata = metadata.with_span(parse_span(values)?),
218 "flags" => flag_bits = parse_u8(single_ref(values)?)?,
219 "reg" => registered = true,
220 other => {
221 return Err(LinoSerializationError::Structure(format!(
222 "unknown meta field `{other}`"
223 )))
224 }
225 }
226 }
227 if flag_bits != 0 {
228 let mut flags = LinkFlags::clean();
229 if flag_bits & 0b0001 != 0 {
230 flags = flags.with_error();
231 }
232 if flag_bits & 0b0010 != 0 {
233 flags = flags.with_containing_error();
234 }
235 if flag_bits & 0b0100 != 0 {
236 flags = flags.with_missing();
237 }
238 if flag_bits & 0b1000 != 0 {
239 flags = flags.with_extra();
240 }
241 metadata = metadata.with_flags(flags);
242 }
243 Ok((metadata, registered))
244}
245
246fn flag_bits(flags: LinkFlags) -> u8 {
248 u8::from(flags.is_error())
249 | (u8::from(flags.has_error()) << 1)
250 | (u8::from(flags.is_missing()) << 2)
251 | (u8::from(flags.is_extra()) << 3)
252}
253
254fn parse_span(values: &[LiNo<String>]) -> Result<SourceSpan, LinoSerializationError> {
256 if values.len() != 6 {
257 return Err(LinoSerializationError::Structure(
258 "span field requires six numbers".to_string(),
259 ));
260 }
261 let mut numbers = [0usize; 6];
262 for (slot, value) in numbers.iter_mut().zip(values) {
263 let LiNo::Ref(reference) = value else {
264 return Err(LinoSerializationError::Structure(
265 "span field values must be numbers".to_string(),
266 ));
267 };
268 *slot = reference.parse().map_err(|_| {
269 LinoSerializationError::Structure(format!("invalid span number `{reference}`"))
270 })?;
271 }
272 Ok(SourceSpan::new(
273 ByteRange::new(numbers[0], numbers[1]),
274 Point::new(numbers[2], numbers[3]),
275 Point::new(numbers[4], numbers[5]),
276 ))
277}
278
279fn single_ref(values: &[LiNo<String>]) -> Result<&str, LinoSerializationError> {
281 match values {
282 [LiNo::Ref(reference)] => Ok(reference),
283 _ => Err(LinoSerializationError::Structure(
284 "meta field must hold exactly one reference".to_string(),
285 )),
286 }
287}
288
289fn parse_link_type(token: &str) -> Result<LinkType, LinoSerializationError> {
291 Ok(match token {
292 "link" => LinkType::Link,
293 "reference" => LinkType::Reference,
294 "relation" => LinkType::Relation,
295 "language" => LinkType::Language,
296 "grammar" => LinkType::Grammar,
297 "type" => LinkType::Type,
298 "concept" => LinkType::Concept,
299 "syntax" => LinkType::Syntax,
300 "field" => LinkType::Field,
301 "trivia" => LinkType::Trivia,
302 "token" => LinkType::Token,
303 "document" => LinkType::Document,
304 "semantic" => LinkType::Semantic,
305 "region" => LinkType::Region,
306 "object" => LinkType::Object,
307 other => {
308 return Err(LinoSerializationError::Structure(format!(
309 "unknown link type `{other}`"
310 )))
311 }
312 })
313}
314
315fn parse_u64(value: &str) -> Result<u64, LinoSerializationError> {
316 value
317 .parse()
318 .map_err(|_| LinoSerializationError::Structure(format!("invalid link id `{value}`")))
319}
320
321fn parse_u8(value: &str) -> Result<u8, LinoSerializationError> {
322 value
323 .parse()
324 .map_err(|_| LinoSerializationError::Structure(format!("invalid flags value `{value}`")))
325}
326
327fn percent_encode(value: &str) -> String {
332 if value.is_empty() {
333 return "%".to_string();
334 }
335 let mut encoded = String::with_capacity(value.len());
336 for &byte in value.as_bytes() {
337 if byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.') {
338 encoded.push(byte as char);
339 } else {
340 write!(encoded, "%{byte:02X}").expect("writing to a String never fails");
341 }
342 }
343 encoded
344}
345
346fn percent_decode(value: &str) -> Result<String, LinoSerializationError> {
348 if value == "%" {
349 return Ok(String::new());
350 }
351 let bytes = value.as_bytes();
352 let mut decoded = Vec::with_capacity(bytes.len());
353 let mut index = 0;
354 while index < bytes.len() {
355 if bytes[index] == b'%' {
356 if index + 2 >= bytes.len() {
357 return Err(LinoSerializationError::Structure(
358 "truncated percent escape".to_string(),
359 ));
360 }
361 let high = hex_value(bytes[index + 1])?;
362 let low = hex_value(bytes[index + 2])?;
363 decoded.push((high << 4) | low);
364 index += 3;
365 } else {
366 decoded.push(bytes[index]);
367 index += 1;
368 }
369 }
370 String::from_utf8(decoded).map_err(|_| {
371 LinoSerializationError::Structure("percent escape is not valid UTF-8".to_string())
372 })
373}
374
375fn hex_value(byte: u8) -> Result<u8, LinoSerializationError> {
376 match byte {
377 b'0'..=b'9' => Ok(byte - b'0'),
378 b'a'..=b'f' => Ok(byte - b'a' + 10),
379 b'A'..=b'F' => Ok(byte - b'A' + 10),
380 _ => Err(LinoSerializationError::Structure(
381 "invalid percent escape digit".to_string(),
382 )),
383 }
384}