Skip to main content

rml/
cst_rocq.rs

1//! Rocq ↔ `.lino` CST converter (issue #138).
2//!
3//! Token-level lossless converter for Rocq (formerly Coq) source. Produces a
4//! `lino-cst.rocq.*` flat CST whose round-trip is byte-faithful:
5//! `print_rocq(&parse_rocq(src)) == src`. Mirrors `js/src/cst-rocq.mjs`
6//! line for line.
7
8use crate::cst::{dialects::ROCQ, print_cst, CstNode};
9
10/// Parse Rocq source into a `lino-cst.rocq.*` CST.
11pub fn parse_rocq(src: &str) -> CstNode {
12    let children = tokenise(src);
13    CstNode::list(format!("{}.document", ROCQ), children)
14}
15
16/// Print a Rocq CST back to source.
17pub fn print_rocq(node: &CstNode) -> String {
18    print_cst(node)
19}
20
21fn tokenise(src: &str) -> Vec<CstNode> {
22    let chars: Vec<char> = src.chars().collect();
23    let mut out: Vec<CstNode> = Vec::new();
24    let mut i = 0usize;
25
26    while i < chars.len() {
27        let c = chars[i];
28
29        if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
30            let mut j = i;
31            while j < chars.len()
32                && (chars[j] == ' ' || chars[j] == '\t' || chars[j] == '\r' || chars[j] == '\n')
33            {
34                j += 1;
35            }
36            out.push(CstNode::trivia(
37                chars[i..j].iter().collect::<String>(),
38                Some(&format!("{}.whitespace", ROCQ)),
39            ));
40            i = j;
41            continue;
42        }
43
44        if c == '(' && chars.get(i + 1) == Some(&'*') {
45            let j = scan_block_comment(&chars, i);
46            out.push(CstNode::trivia(
47                chars[i..j].iter().collect::<String>(),
48                Some(&format!("{}.comment", ROCQ)),
49            ));
50            i = j;
51            continue;
52        }
53
54        if c == '"' {
55            let j = scan_rocq_string(&chars, i + 1);
56            out.push(CstNode::token(
57                chars[i..j].iter().collect::<String>(),
58                Some(&format!("{}.string_literal", ROCQ)),
59            ));
60            i = j;
61            continue;
62        }
63
64        if c.is_ascii_digit() {
65            let j = scan_number(&chars, i);
66            out.push(CstNode::token(
67                chars[i..j].iter().collect::<String>(),
68                Some(&format!("{}.numeric_literal", ROCQ)),
69            ));
70            i = j;
71            continue;
72        }
73
74        if is_ident_start(c) {
75            let mut j = i + 1;
76            while j < chars.len() && is_ident_continue(chars[j]) {
77                j += 1;
78            }
79            out.push(CstNode::token(
80                chars[i..j].iter().collect::<String>(),
81                Some(&format!("{}.ident", ROCQ)),
82            ));
83            i = j;
84            continue;
85        }
86
87        out.push(CstNode::token(
88            c.to_string(),
89            Some(&format!("{}.punct", ROCQ)),
90        ));
91        i += 1;
92    }
93
94    out
95}
96
97fn scan_block_comment(chars: &[char], i: usize) -> usize {
98    let mut j = i + 2;
99    let mut depth = 1;
100    while j < chars.len() && depth > 0 {
101        if chars[j] == '(' && chars.get(j + 1) == Some(&'*') {
102            depth += 1;
103            j += 2;
104        } else if chars[j] == '*' && chars.get(j + 1) == Some(&')') {
105            depth -= 1;
106            j += 2;
107        } else {
108            j += 1;
109        }
110    }
111    j
112}
113
114fn scan_rocq_string(chars: &[char], mut j: usize) -> usize {
115    while j < chars.len() {
116        if chars[j] == '"' {
117            if chars.get(j + 1) == Some(&'"') {
118                j += 2;
119                continue;
120            }
121            return j + 1;
122        }
123        j += 1;
124    }
125    j
126}
127
128fn scan_number(chars: &[char], i: usize) -> usize {
129    let mut j = i;
130    if chars.get(j) == Some(&'0') && matches!(chars.get(j + 1), Some('x') | Some('X')) {
131        j += 2;
132        while j < chars.len() && chars[j].is_ascii_hexdigit() {
133            j += 1;
134        }
135        return j;
136    }
137    if chars.get(j) == Some(&'0') && matches!(chars.get(j + 1), Some('o') | Some('O')) {
138        j += 2;
139        while j < chars.len() && matches!(chars[j], '0'..='7') {
140            j += 1;
141        }
142        return j;
143    }
144    if chars.get(j) == Some(&'0') && matches!(chars.get(j + 1), Some('b') | Some('B')) {
145        j += 2;
146        while j < chars.len() && matches!(chars[j], '0' | '1') {
147            j += 1;
148        }
149        return j;
150    }
151    while j < chars.len() && chars[j].is_ascii_digit() {
152        j += 1;
153    }
154    j
155}
156
157fn is_ident_start(c: char) -> bool {
158    if c == '_' || c.is_ascii_alphabetic() {
159        return true;
160    }
161    if (c as u32) > 0x7F {
162        return !is_rocq_punct_char(c);
163    }
164    false
165}
166
167fn is_ident_continue(c: char) -> bool {
168    if c == '_' || c == '\'' || c.is_ascii_alphanumeric() {
169        return true;
170    }
171    if (c as u32) > 0x7F {
172        return !is_rocq_punct_char(c);
173    }
174    false
175}
176
177fn is_rocq_punct_char(c: char) -> bool {
178    matches!(c, '→' | '←' | '⟨' | '⟩' | '∀' | '∃' | '∧' | '∨' | '¬')
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    fn rt(src: &str) {
186        let node = parse_rocq(src);
187        let back = print_rocq(&node);
188        assert_eq!(back, src, "round-trip mismatch");
189    }
190
191    #[test]
192    fn empty_string() {
193        rt("");
194    }
195
196    #[test]
197    fn simple_def() {
198        rt("Definition x := 1.\n");
199    }
200
201    #[test]
202    fn nested_block_comment() {
203        rt("(* a (* b *) c *)\nDefinition x := 1.\n");
204    }
205
206    #[test]
207    fn doubled_quote_string() {
208        rt("Definition s := \"hello, \"\"world\"\"\".\n");
209    }
210
211    #[test]
212    fn proof_block() {
213        rt("Theorem t : 1 + 1 = 2. Proof. reflexivity. Qed.\n");
214    }
215
216    #[test]
217    fn hex_number() {
218        rt("Definition n := 0xff.\n");
219    }
220
221    #[test]
222    fn require_import() {
223        rt("Require Import Coq.Lists.List.\n");
224    }
225}