1use std::collections::BTreeMap;
10
11use super::DocumentFormatInstance;
12use crate::link_network::LinkNetwork;
13
14#[derive(Clone, Debug, PartialEq, Eq)]
16pub enum InlineNode {
17 Text(String),
19 Wrapped {
22 concept: String,
24 attributes: BTreeMap<String, String>,
26 children: Vec<Self>,
28 },
29}
30
31#[derive(Clone, Debug, PartialEq, Eq)]
33pub enum BlockNode {
34 Heading {
36 level: u8,
38 children: Vec<InlineNode>,
40 },
41 Paragraph {
43 children: Vec<InlineNode>,
45 },
46 List {
48 concept: String,
50 items: Vec<Vec<InlineNode>>,
52 },
53}
54
55impl BlockNode {
56 #[must_use]
58 pub fn concept_id(&self) -> &str {
59 match self {
60 Self::Heading { .. } => "heading",
61 Self::Paragraph { .. } => "paragraph",
62 Self::List { concept, .. } => concept,
63 }
64 }
65}
66
67#[derive(Clone, Debug, Default, PartialEq, Eq)]
69pub struct FormattingDocument {
70 pub blocks: Vec<BlockNode>,
72}
73
74impl LinkNetwork {
75 #[must_use]
78 pub fn render_markup_document(&self, language: &str, document: &FormattingDocument) -> String {
79 if language == "PDF" {
80 return super::render_pdf_document(document);
81 }
82 if language == "DOCX" {
83 return super::render_docx_document(document);
84 }
85 if language == "txt" {
86 return render_txt_document(document);
87 }
88 let block_separator = if language == "Markdown" { "\n\n" } else { "\n" };
89 document
90 .blocks
91 .iter()
92 .map(|block| self.render_block(language, block))
93 .collect::<Vec<_>>()
94 .join(block_separator)
95 }
96
97 fn render_block(&self, language: &str, block: &BlockNode) -> String {
98 match block {
99 BlockNode::Heading { level, children } => {
100 let mut instance =
101 DocumentFormatInstance::from_content(self.render_inline(language, children));
102 instance.level = Some(*level);
103 self.render_document_format("heading", language, &instance)
104 .unwrap_or_default()
105 }
106 BlockNode::Paragraph { children } => {
107 let instance =
108 DocumentFormatInstance::from_content(self.render_inline(language, children));
109 self.render_document_format("paragraph", language, &instance)
110 .unwrap_or_default()
111 }
112 BlockNode::List { concept, items } => {
113 let item_separator = if language == "Markdown" { "\n" } else { "" };
114 let rendered_items = items
115 .iter()
116 .map(|item| {
117 let instance = DocumentFormatInstance::from_content(
118 self.render_inline(language, item),
119 );
120 self.render_document_format("list-item", language, &instance)
121 .unwrap_or_default()
122 })
123 .collect::<Vec<_>>()
124 .join(item_separator);
125 let instance = DocumentFormatInstance::from_content(rendered_items);
126 self.render_document_format(concept, language, &instance)
127 .unwrap_or_default()
128 }
129 }
130 }
131
132 fn render_inline(&self, language: &str, nodes: &[InlineNode]) -> String {
133 let mut output = String::new();
134 for node in nodes {
135 match node {
136 InlineNode::Text(text) => output.push_str(&escape_text(language, text)),
137 InlineNode::Wrapped {
138 concept,
139 attributes,
140 children,
141 } => {
142 let instance = DocumentFormatInstance {
143 content: self.render_inline(language, children),
144 level: None,
145 attributes: attributes.clone(),
146 };
147 if let Some(rendered) =
148 self.render_document_format(concept, language, &instance)
149 {
150 output.push_str(&rendered);
151 }
152 }
153 }
154 }
155 output
156 }
157
158 #[must_use]
165 pub fn translate_markup_document(
166 &self,
167 source_language: &str,
168 target_language: &str,
169 text: &str,
170 ) -> Option<String> {
171 let document = parse_markup_document(source_language, text)?;
172 Some(self.render_markup_document(target_language, &document))
173 }
174}
175
176#[must_use]
181pub fn parse_markup_document(language: &str, text: &str) -> Option<FormattingDocument> {
182 match language {
183 "Markdown" => Some(parse_markdown_document(text)),
184 "HTML" => Some(parse_html_document(text)),
185 "PDF" => Some(super::parse_pdf_document(text)),
186 "DOCX" => Some(super::parse_docx_document(text)),
187 "txt" => Some(parse_txt_document(text)),
188 _ => None,
189 }
190}
191
192fn parse_txt_document(text: &str) -> FormattingDocument {
200 let mut blocks = Vec::new();
201 let mut group: Vec<&str> = Vec::new();
202
203 for line in text.lines() {
204 if line.trim().is_empty() {
205 flush_txt_block(&mut blocks, &group);
206 group.clear();
207 } else {
208 group.push(line);
209 }
210 }
211 flush_txt_block(&mut blocks, &group);
212
213 FormattingDocument { blocks }
214}
215
216fn flush_txt_block(blocks: &mut Vec<BlockNode>, lines: &[&str]) {
217 if lines.is_empty() {
218 return;
219 }
220 blocks.push(BlockNode::Paragraph {
221 children: vec![InlineNode::Text(lines.join(" "))],
222 });
223}
224
225fn render_txt_document(document: &FormattingDocument) -> String {
228 document
229 .blocks
230 .iter()
231 .map(render_txt_block)
232 .collect::<Vec<_>>()
233 .join("\n\n")
234}
235
236fn render_txt_block(block: &BlockNode) -> String {
237 match block {
238 BlockNode::Heading { children, .. } | BlockNode::Paragraph { children } => {
239 flatten_inline_text(children)
240 }
241 BlockNode::List { concept, items } => {
242 let ordered = concept == "ordered-list";
243 items
244 .iter()
245 .enumerate()
246 .map(|(index, item)| {
247 let marker = if ordered {
248 format!("{}. ", index + 1)
249 } else {
250 "- ".to_string()
251 };
252 format!("{marker}{}", flatten_inline_text(item))
253 })
254 .collect::<Vec<_>>()
255 .join("\n")
256 }
257 }
258}
259
260fn flatten_inline_text(nodes: &[InlineNode]) -> String {
262 let mut output = String::new();
263 for node in nodes {
264 match node {
265 InlineNode::Text(text) => output.push_str(text),
266 InlineNode::Wrapped { children, .. } => {
267 output.push_str(&flatten_inline_text(children));
268 }
269 }
270 }
271 output
272}
273
274fn parse_markdown_document(text: &str) -> FormattingDocument {
275 let mut blocks = Vec::new();
276 let mut group: Vec<&str> = Vec::new();
277
278 for line in text.lines() {
279 if line.trim().is_empty() {
280 flush_markdown_block(&mut blocks, &group);
281 group.clear();
282 } else {
283 group.push(line);
284 }
285 }
286 flush_markdown_block(&mut blocks, &group);
287
288 FormattingDocument { blocks }
289}
290
291fn flush_markdown_block(blocks: &mut Vec<BlockNode>, lines: &[&str]) {
292 if lines.is_empty() {
293 return;
294 }
295
296 if lines.iter().all(|line| line.starts_with("- ")) {
297 let items = lines
298 .iter()
299 .map(|line| parse_inline_markdown(&line[2..]))
300 .collect();
301 blocks.push(BlockNode::List {
302 concept: "bullet-list".to_string(),
303 items,
304 });
305 return;
306 }
307
308 if let [line] = lines {
309 let hashes = line
310 .chars()
311 .take_while(|character| *character == '#')
312 .count();
313 if (1..=6).contains(&hashes) && line[hashes..].starts_with(' ') {
314 let level = u8::try_from(hashes).expect("heading level within 1..=6");
315 blocks.push(BlockNode::Heading {
316 level,
317 children: parse_inline_markdown(&line[hashes + 1..]),
318 });
319 return;
320 }
321 }
322
323 blocks.push(BlockNode::Paragraph {
324 children: parse_inline_markdown(&lines.join(" ")),
325 });
326}
327
328fn parse_inline_markdown(input: &str) -> Vec<InlineNode> {
329 let mut nodes = Vec::new();
330 let mut text = String::new();
331 let mut cursor = 0usize;
332
333 while cursor < input.len() {
334 let rest = &input[cursor..];
335 if let Some(inner_len) = wrapped_span(rest, "**", "**") {
336 flush_text(&mut nodes, &mut text);
337 let inner = &rest[2..2 + inner_len];
338 nodes.push(wrapped("strong", parse_inline_markdown(inner)));
339 cursor += 4 + inner_len;
340 } else if let Some(inner_len) = wrapped_span(rest, "*", "*") {
341 flush_text(&mut nodes, &mut text);
342 let inner = &rest[1..=inner_len];
343 nodes.push(wrapped("emphasis", parse_inline_markdown(inner)));
344 cursor += 2 + inner_len;
345 } else if let Some((text_inner, href, consumed)) = markdown_link(rest) {
346 flush_text(&mut nodes, &mut text);
347 nodes.push(hyperlink(href, parse_inline_markdown(text_inner)));
348 cursor += consumed;
349 } else {
350 let character = rest.chars().next().expect("non-empty remainder");
351 text.push(character);
352 cursor += character.len_utf8();
353 }
354 }
355
356 flush_text(&mut nodes, &mut text);
357 nodes
358}
359
360fn wrapped_span(rest: &str, open: &str, close: &str) -> Option<usize> {
362 let body = rest.strip_prefix(open)?;
363 if open == "*" && body.starts_with('*') {
365 return None;
366 }
367 body.find(close)
368}
369
370fn markdown_link(rest: &str) -> Option<(&str, &str, usize)> {
371 let body = rest.strip_prefix('[')?;
372 let text_end = body.find("](")?;
373 let text_inner = &body[..text_end];
374 let after = &body[text_end + 2..];
375 let href_end = after.find(')')?;
376 let href = &after[..href_end];
377 let consumed = 1 + text_end + 2 + href_end + 1;
378 Some((text_inner, href, consumed))
379}
380
381fn parse_html_document(text: &str) -> FormattingDocument {
382 let mut blocks = Vec::new();
383
384 for raw_line in text.lines() {
385 let line = raw_line.trim();
386 if line.is_empty() {
387 continue;
388 }
389
390 if let Some(block) = parse_html_heading(line)
391 .or_else(|| parse_html_list(line))
392 .or_else(|| parse_html_paragraph(line))
393 {
394 blocks.push(block);
395 }
396 }
397
398 FormattingDocument { blocks }
399}
400
401fn parse_html_heading(line: &str) -> Option<BlockNode> {
402 let after_marker = line.strip_prefix("<h")?;
403 let digit = after_marker.chars().next()?;
404 let level = u8::try_from(digit.to_digit(10)?)
405 .ok()
406 .filter(|value| (1..=6).contains(value))?;
407 let open = format!("<h{level}>");
408 let close = format!("</h{level}>");
409 let inner = line.strip_prefix(&open)?.strip_suffix(&close)?;
410 Some(BlockNode::Heading {
411 level,
412 children: parse_inline_html(inner),
413 })
414}
415
416fn parse_html_list(line: &str) -> Option<BlockNode> {
417 let (concept, inner) = if let Some(inner) = wrapped_inner(line, "<ul>", "</ul>") {
418 ("bullet-list", inner)
419 } else if let Some(inner) = wrapped_inner(line, "<ol>", "</ol>") {
420 ("ordered-list", inner)
421 } else {
422 return None;
423 };
424
425 let mut items = Vec::new();
426 let mut rest = inner;
427 while let Some(start) = rest.find("<li>") {
428 let after = &rest[start + 4..];
429 let end = after.find("</li>")?;
430 items.push(parse_inline_html(&after[..end]));
431 rest = &after[end + 5..];
432 }
433
434 Some(BlockNode::List {
435 concept: concept.to_string(),
436 items,
437 })
438}
439
440fn parse_html_paragraph(line: &str) -> Option<BlockNode> {
441 let inner = wrapped_inner(line, "<p>", "</p>")?;
442 Some(BlockNode::Paragraph {
443 children: parse_inline_html(inner),
444 })
445}
446
447fn parse_inline_html(input: &str) -> Vec<InlineNode> {
448 let mut nodes = Vec::new();
449 let mut text = String::new();
450 let mut cursor = 0usize;
451
452 while cursor < input.len() {
453 let rest = &input[cursor..];
454 if let Some((inner, consumed)) = html_tag_span(rest, "strong") {
455 flush_html_text(&mut nodes, &mut text);
456 nodes.push(wrapped("strong", parse_inline_html(inner)));
457 cursor += consumed;
458 } else if let Some((inner, consumed)) = html_tag_span(rest, "em") {
459 flush_html_text(&mut nodes, &mut text);
460 nodes.push(wrapped("emphasis", parse_inline_html(inner)));
461 cursor += consumed;
462 } else if let Some((href, inner, consumed)) = html_anchor(rest) {
463 flush_html_text(&mut nodes, &mut text);
464 nodes.push(hyperlink(href, parse_inline_html(inner)));
465 cursor += consumed;
466 } else {
467 let character = rest.chars().next().expect("non-empty remainder");
468 text.push(character);
469 cursor += character.len_utf8();
470 }
471 }
472
473 flush_html_text(&mut nodes, &mut text);
474 nodes
475}
476
477fn html_tag_span<'a>(rest: &'a str, tag: &str) -> Option<(&'a str, usize)> {
480 let open = format!("<{tag}>");
481 let close = format!("</{tag}>");
482 let body = rest.strip_prefix(&open)?;
483 let inner_end = body.find(&close)?;
484 let inner = &body[..inner_end];
485 let consumed = open.len() + inner_end + close.len();
486 Some((inner, consumed))
487}
488
489fn html_anchor(rest: &str) -> Option<(&str, &str, usize)> {
490 let body = rest.strip_prefix("<a href=\"")?;
491 let href_end = body.find('"')?;
492 let href = &body[..href_end];
493 let after_attr = &body[href_end..];
494 let inner_start = after_attr.strip_prefix("\">")?;
495 let inner_end = inner_start.find("</a>")?;
496 let inner = &inner_start[..inner_end];
497 let consumed = rest.len() - (inner_start.len() - inner_end - "</a>".len());
498 Some((href, inner, consumed))
499}
500
501fn wrapped_inner<'a>(input: &'a str, open: &str, close: &str) -> Option<&'a str> {
502 input.strip_prefix(open)?.strip_suffix(close)
503}
504
505fn wrapped(concept: &str, children: Vec<InlineNode>) -> InlineNode {
506 InlineNode::Wrapped {
507 concept: concept.to_string(),
508 attributes: BTreeMap::new(),
509 children,
510 }
511}
512
513fn hyperlink(href: &str, children: Vec<InlineNode>) -> InlineNode {
514 let mut attributes = BTreeMap::new();
515 attributes.insert("href".to_string(), href.to_string());
516 InlineNode::Wrapped {
517 concept: "hyperlink".to_string(),
518 attributes,
519 children,
520 }
521}
522
523fn flush_text(nodes: &mut Vec<InlineNode>, text: &mut String) {
524 if !text.is_empty() {
525 nodes.push(InlineNode::Text(std::mem::take(text)));
526 }
527}
528
529fn flush_html_text(nodes: &mut Vec<InlineNode>, text: &mut String) {
530 if !text.is_empty() {
531 nodes.push(InlineNode::Text(unescape_text(&std::mem::take(text))));
532 }
533}
534
535fn escape_text(language: &str, text: &str) -> String {
536 if language == "HTML" {
537 text.replace('&', "&")
538 .replace('<', "<")
539 .replace('>', ">")
540 } else {
541 text.to_string()
542 }
543}
544
545fn unescape_text(text: &str) -> String {
546 text.replace("<", "<")
547 .replace(">", ">")
548 .replace("&", "&")
549}