1use crate::cst::{dialects::ROCQ, print_cst, CstNode};
9
10pub fn parse_rocq(src: &str) -> CstNode {
12 let children = tokenise(src);
13 CstNode::list(format!("{}.document", ROCQ), children)
14}
15
16pub fn print_rocq(node: &CstNode) -> String {
18 print_cst(node)
19}
20
21fn tokenise(src: &str) -> Vec<CstNode> {
22 let chars: Vec<char> = src.chars().collect();
23 let mut out: Vec<CstNode> = Vec::new();
24 let mut i = 0usize;
25
26 while i < chars.len() {
27 let c = chars[i];
28
29 if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
30 let mut j = i;
31 while j < chars.len()
32 && (chars[j] == ' ' || chars[j] == '\t' || chars[j] == '\r' || chars[j] == '\n')
33 {
34 j += 1;
35 }
36 out.push(CstNode::trivia(
37 chars[i..j].iter().collect::<String>(),
38 Some(&format!("{}.whitespace", ROCQ)),
39 ));
40 i = j;
41 continue;
42 }
43
44 if c == '(' && chars.get(i + 1) == Some(&'*') {
45 let j = scan_block_comment(&chars, i);
46 out.push(CstNode::trivia(
47 chars[i..j].iter().collect::<String>(),
48 Some(&format!("{}.comment", ROCQ)),
49 ));
50 i = j;
51 continue;
52 }
53
54 if c == '"' {
55 let j = scan_rocq_string(&chars, i + 1);
56 out.push(CstNode::token(
57 chars[i..j].iter().collect::<String>(),
58 Some(&format!("{}.string_literal", ROCQ)),
59 ));
60 i = j;
61 continue;
62 }
63
64 if c.is_ascii_digit() {
65 let j = scan_number(&chars, i);
66 out.push(CstNode::token(
67 chars[i..j].iter().collect::<String>(),
68 Some(&format!("{}.numeric_literal", ROCQ)),
69 ));
70 i = j;
71 continue;
72 }
73
74 if is_ident_start(c) {
75 let mut j = i + 1;
76 while j < chars.len() && is_ident_continue(chars[j]) {
77 j += 1;
78 }
79 out.push(CstNode::token(
80 chars[i..j].iter().collect::<String>(),
81 Some(&format!("{}.ident", ROCQ)),
82 ));
83 i = j;
84 continue;
85 }
86
87 out.push(CstNode::token(
88 c.to_string(),
89 Some(&format!("{}.punct", ROCQ)),
90 ));
91 i += 1;
92 }
93
94 out
95}
96
97fn scan_block_comment(chars: &[char], i: usize) -> usize {
98 let mut j = i + 2;
99 let mut depth = 1;
100 while j < chars.len() && depth > 0 {
101 if chars[j] == '(' && chars.get(j + 1) == Some(&'*') {
102 depth += 1;
103 j += 2;
104 } else if chars[j] == '*' && chars.get(j + 1) == Some(&')') {
105 depth -= 1;
106 j += 2;
107 } else {
108 j += 1;
109 }
110 }
111 j
112}
113
114fn scan_rocq_string(chars: &[char], mut j: usize) -> usize {
115 while j < chars.len() {
116 if chars[j] == '"' {
117 if chars.get(j + 1) == Some(&'"') {
118 j += 2;
119 continue;
120 }
121 return j + 1;
122 }
123 j += 1;
124 }
125 j
126}
127
128fn scan_number(chars: &[char], i: usize) -> usize {
129 let mut j = i;
130 if chars.get(j) == Some(&'0') && matches!(chars.get(j + 1), Some('x') | Some('X')) {
131 j += 2;
132 while j < chars.len() && chars[j].is_ascii_hexdigit() {
133 j += 1;
134 }
135 return j;
136 }
137 if chars.get(j) == Some(&'0') && matches!(chars.get(j + 1), Some('o') | Some('O')) {
138 j += 2;
139 while j < chars.len() && matches!(chars[j], '0'..='7') {
140 j += 1;
141 }
142 return j;
143 }
144 if chars.get(j) == Some(&'0') && matches!(chars.get(j + 1), Some('b') | Some('B')) {
145 j += 2;
146 while j < chars.len() && matches!(chars[j], '0' | '1') {
147 j += 1;
148 }
149 return j;
150 }
151 while j < chars.len() && chars[j].is_ascii_digit() {
152 j += 1;
153 }
154 j
155}
156
157fn is_ident_start(c: char) -> bool {
158 if c == '_' || c.is_ascii_alphabetic() {
159 return true;
160 }
161 if (c as u32) > 0x7F {
162 return !is_rocq_punct_char(c);
163 }
164 false
165}
166
167fn is_ident_continue(c: char) -> bool {
168 if c == '_' || c == '\'' || c.is_ascii_alphanumeric() {
169 return true;
170 }
171 if (c as u32) > 0x7F {
172 return !is_rocq_punct_char(c);
173 }
174 false
175}
176
177fn is_rocq_punct_char(c: char) -> bool {
178 matches!(c, '→' | '←' | '⟨' | '⟩' | '∀' | '∃' | '∧' | '∨' | '¬')
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 fn rt(src: &str) {
186 let node = parse_rocq(src);
187 let back = print_rocq(&node);
188 assert_eq!(back, src, "round-trip mismatch");
189 }
190
191 #[test]
192 fn empty_string() {
193 rt("");
194 }
195
196 #[test]
197 fn simple_def() {
198 rt("Definition x := 1.\n");
199 }
200
201 #[test]
202 fn nested_block_comment() {
203 rt("(* a (* b *) c *)\nDefinition x := 1.\n");
204 }
205
206 #[test]
207 fn doubled_quote_string() {
208 rt("Definition s := \"hello, \"\"world\"\"\".\n");
209 }
210
211 #[test]
212 fn proof_block() {
213 rt("Theorem t : 1 + 1 = 2. Proof. reflexivity. Qed.\n");
214 }
215
216 #[test]
217 fn hex_number() {
218 rt("Definition n := 0xff.\n");
219 }
220
221 #[test]
222 fn require_import() {
223 rt("Require Import Coq.Lists.List.\n");
224 }
225}