1use std::fmt::Write;
9
10pub mod dialects {
12 pub const RUST: &str = "lino-cst.rust";
13 pub const JS: &str = "lino-cst.js";
14 pub const LEAN: &str = "lino-cst.lean";
15 pub const ROCQ: &str = "lino-cst.rocq";
16 pub const SHARED: &str = "lino-cst.shared";
17}
18
19#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum CstNode {
23 List {
26 tag: Option<String>,
27 open: Option<String>,
28 close: Option<String>,
29 children: Vec<CstNode>,
30 },
31 Token { tag: Option<String>, text: String },
33 Trivia { tag: Option<String>, text: String },
35}
36
37impl CstNode {
38 pub fn list(tag: impl Into<String>, children: Vec<CstNode>) -> Self {
40 CstNode::List {
41 tag: Some(tag.into()),
42 open: None,
43 close: None,
44 children,
45 }
46 }
47
48 pub fn list_with_delims(
50 tag: Option<String>,
51 open: Option<String>,
52 close: Option<String>,
53 children: Vec<CstNode>,
54 ) -> Self {
55 CstNode::List {
56 tag,
57 open,
58 close,
59 children,
60 }
61 }
62
63 pub fn token(text: impl Into<String>, tag: Option<&str>) -> Self {
65 CstNode::Token {
66 tag: tag.map(String::from),
67 text: text.into(),
68 }
69 }
70
71 pub fn trivia(text: impl Into<String>, tag: Option<&str>) -> Self {
73 CstNode::Trivia {
74 tag: tag.map(String::from),
75 text: text.into(),
76 }
77 }
78
79 pub fn is_list(&self) -> bool {
81 matches!(self, CstNode::List { .. })
82 }
83
84 pub fn tag(&self) -> Option<&str> {
86 match self {
87 CstNode::List { tag, .. } => tag.as_deref(),
88 CstNode::Token { tag, .. } => tag.as_deref(),
89 CstNode::Trivia { tag, .. } => tag.as_deref(),
90 }
91 }
92
93 pub fn text(&self) -> &str {
95 match self {
96 CstNode::Token { text, .. } => text,
97 CstNode::Trivia { text, .. } => text,
98 CstNode::List { .. } => "",
99 }
100 }
101
102 pub fn leaves(&self) -> Vec<&CstNode> {
104 let mut out = Vec::new();
105 collect_leaves(self, &mut out);
106 out
107 }
108}
109
110fn collect_leaves<'a>(node: &'a CstNode, out: &mut Vec<&'a CstNode>) {
111 match node {
112 CstNode::List { children, .. } => {
113 for c in children {
114 collect_leaves(c, out);
115 }
116 }
117 _ => out.push(node),
118 }
119}
120
121pub fn print_cst(node: &CstNode) -> String {
123 let mut out = String::new();
124 emit(node, &mut out);
125 out
126}
127
128fn emit(node: &CstNode, out: &mut String) {
129 match node {
130 CstNode::Token { text, .. } | CstNode::Trivia { text, .. } => {
131 out.push_str(text);
132 }
133 CstNode::List {
134 children,
135 open,
136 close,
137 ..
138 } => {
139 if let Some(o) = open {
140 out.push_str(o);
141 }
142 for c in children {
143 emit(c, out);
144 }
145 if let Some(c) = close {
146 out.push_str(c);
147 }
148 }
149 }
150}
151
152pub fn cst_to_lino(node: &CstNode) -> String {
155 let mut out = String::new();
156 write_lino(node, &mut out);
157 out
158}
159
160fn write_lino(node: &CstNode, out: &mut String) {
161 match node {
162 CstNode::Token { text, .. } => {
163 let _ = write!(out, "(lino-cst.token {})", escape_text(text));
164 }
165 CstNode::Trivia { text, .. } => {
166 let _ = write!(out, "(lino-cst.trivia {})", escape_text(text));
167 }
168 CstNode::List {
169 tag,
170 open,
171 close,
172 children,
173 } => {
174 out.push('(');
175 out.push_str("lino-cst.list");
176 if let Some(t) = tag {
177 out.push(' ');
178 out.push_str(t);
179 }
180 if let Some(o) = open {
181 let _ = write!(out, " (open {})", escape_text(o));
182 }
183 if let Some(c) = close {
184 let _ = write!(out, " (close {})", escape_text(c));
185 }
186 for c in children {
187 out.push(' ');
188 write_lino(c, out);
189 }
190 out.push(')');
191 }
192 }
193}
194
195fn escape_text(text: &str) -> String {
196 let mut out = String::with_capacity(text.len() + 2);
197 out.push('"');
198 for ch in text.chars() {
199 match ch {
200 '"' => out.push_str("\\\""),
201 '\\' => out.push_str("\\\\"),
202 '\n' => out.push_str("\\n"),
203 '\r' => out.push_str("\\r"),
204 '\t' => out.push_str("\\t"),
205 '\x08' => out.push_str("\\b"),
206 '\x0C' => out.push_str("\\f"),
207 c if (c as u32) < 0x20 => {
208 let _ = write!(out, "\\u{:04x}", c as u32);
209 }
210 c => out.push(c),
211 }
212 }
213 out.push('"');
214 out
215}
216
217pub fn lino_to_cst(src: &str) -> Result<CstNode, String> {
220 let tokens = tokenise_lino_cst(src)?;
221 let mut idx = 0usize;
222 let node = parse_node(&tokens, &mut idx)?;
223 Ok(node)
224}
225
226fn parse_node(tokens: &[String], idx: &mut usize) -> Result<CstNode, String> {
227 expect(tokens, idx, "(")?;
228 let head = eat(tokens, idx)?;
229 match head.as_str() {
230 "lino-cst.token" => {
231 let lit = eat(tokens, idx)?;
232 expect(tokens, idx, ")")?;
233 Ok(CstNode::token(unescape_text(&lit)?, None))
234 }
235 "lino-cst.trivia" => {
236 let lit = eat(tokens, idx)?;
237 expect(tokens, idx, ")")?;
238 Ok(CstNode::trivia(unescape_text(&lit)?, None))
239 }
240 "lino-cst.list" => {
241 let mut tag: Option<String> = None;
242 let mut open: Option<String> = None;
243 let mut close: Option<String> = None;
244 let mut children: Vec<CstNode> = Vec::new();
245 while peek(tokens, *idx)? != ")" {
246 if peek(tokens, *idx)? == "(" {
247 let lookahead = peek(tokens, *idx + 1)?;
248 if lookahead == "open" {
249 *idx += 2;
250 let lit = eat(tokens, idx)?;
251 expect(tokens, idx, ")")?;
252 open = Some(unescape_text(&lit)?);
253 continue;
254 }
255 if lookahead == "close" {
256 *idx += 2;
257 let lit = eat(tokens, idx)?;
258 expect(tokens, idx, ")")?;
259 close = Some(unescape_text(&lit)?);
260 continue;
261 }
262 children.push(parse_node(tokens, idx)?);
263 } else {
264 if tag.is_some() {
265 return Err(format!("unexpected token {:?}", peek(tokens, *idx)?));
266 }
267 tag = Some(eat(tokens, idx)?);
268 }
269 }
270 expect(tokens, idx, ")")?;
271 Ok(CstNode::list_with_delims(tag, open, close, children))
272 }
273 other => Err(format!("unknown CST tag: {}", other)),
274 }
275}
276
277fn peek(tokens: &[String], idx: usize) -> Result<&str, String> {
278 tokens
279 .get(idx)
280 .map(String::as_str)
281 .ok_or_else(|| "unexpected end of input".to_string())
282}
283
284fn eat(tokens: &[String], idx: &mut usize) -> Result<String, String> {
285 let s = tokens
286 .get(*idx)
287 .cloned()
288 .ok_or_else(|| "unexpected end of input".to_string())?;
289 *idx += 1;
290 Ok(s)
291}
292
293fn expect(tokens: &[String], idx: &mut usize, t: &str) -> Result<(), String> {
294 let got = eat(tokens, idx)?;
295 if got != t {
296 return Err(format!("expected {:?}, got {:?}", t, got));
297 }
298 Ok(())
299}
300
301fn tokenise_lino_cst(src: &str) -> Result<Vec<String>, String> {
302 let bytes: Vec<char> = src.chars().collect();
303 let mut out = Vec::new();
304 let mut i = 0usize;
305 while i < bytes.len() {
306 let c = bytes[i];
307 if c == '(' || c == ')' {
308 out.push(c.to_string());
309 i += 1;
310 continue;
311 }
312 if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
313 i += 1;
314 continue;
315 }
316 if c == '"' {
317 let mut j = i + 1;
318 while j < bytes.len() {
319 if bytes[j] == '\\' {
320 j += 2;
321 continue;
322 }
323 if bytes[j] == '"' {
324 break;
325 }
326 j += 1;
327 }
328 if j >= bytes.len() {
329 return Err("unterminated string literal".to_string());
330 }
331 out.push(bytes[i..=j].iter().collect());
332 i = j + 1;
333 continue;
334 }
335 let mut j = i;
336 while j < bytes.len() && !" \t\n\r()".contains(bytes[j]) {
337 j += 1;
338 }
339 out.push(bytes[i..j].iter().collect());
340 i = j;
341 }
342 Ok(out)
343}
344
345fn unescape_text(literal: &str) -> Result<String, String> {
346 let chars: Vec<char> = literal.chars().collect();
347 if chars.first() != Some(&'"') || chars.last() != Some(&'"') {
348 return Err(format!("not a string literal: {}", literal));
349 }
350 let mut out = String::new();
351 let mut i = 1;
352 while i < chars.len() - 1 {
353 let c = chars[i];
354 if c == '\\' {
355 i += 1;
356 if i >= chars.len() - 1 {
357 return Err("trailing backslash".to_string());
358 }
359 let esc = chars[i];
360 match esc {
361 '"' => out.push('"'),
362 '\\' => out.push('\\'),
363 '/' => out.push('/'),
364 'n' => out.push('\n'),
365 'r' => out.push('\r'),
366 't' => out.push('\t'),
367 'b' => out.push('\x08'),
368 'f' => out.push('\x0C'),
369 'u' => {
370 if i + 4 >= chars.len() {
371 return Err("bad \\u escape".to_string());
372 }
373 let hex: String = chars[i + 1..=i + 4].iter().collect();
374 let cp = u32::from_str_radix(&hex, 16)
375 .map_err(|e| format!("bad \\u escape: {}", e))?;
376 if let Some(ch) = char::from_u32(cp) {
377 out.push(ch);
378 }
379 i += 4;
380 }
381 other => return Err(format!("unknown escape: \\{}", other)),
382 }
383 i += 1;
384 } else {
385 out.push(c);
386 i += 1;
387 }
388 }
389 Ok(out)
390}
391
392pub fn clone_cst(node: &CstNode) -> CstNode {
395 node.clone()
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 fn prints_list_with_open_close() {
404 let n = CstNode::list_with_delims(
405 Some("demo".into()),
406 Some("(".into()),
407 Some(")".into()),
408 vec![CstNode::token("a", None), CstNode::token("b", None)],
409 );
410 assert_eq!(print_cst(&n), "(ab)");
411 }
412
413 #[test]
414 fn lino_round_trip_for_token_trivia_list() {
415 let n = CstNode::list(
416 "lino-cst.rust.fn",
417 vec![
418 CstNode::token("fn ", Some("kw")),
419 CstNode::token("foo", Some("ident")),
420 CstNode::trivia(" ", Some("ws")),
421 ],
422 );
423 let s = cst_to_lino(&n);
424 let back = lino_to_cst(&s).unwrap();
425 assert_eq!(print_cst(&n), print_cst(&back));
426 }
427}