meta_language/document_formatting/
profile.rs1use crate::language_profile::LanguageProfile;
14use crate::link_network::LinkType;
15
16pub const DOCUMENT_FORMATS: &[&str] = &["txt", "Markdown", "HTML", "PDF", "DOCX"];
19
20pub const CROSS_FORMAT_CONCEPTS: &[&str] = &[
24 "heading",
25 "paragraph",
26 "bullet-list",
27 "ordered-list",
28 "list-item",
29 "strong",
30 "emphasis",
31 "hyperlink",
32];
33
34#[must_use]
42pub fn document_format_profile(format: &str) -> Option<LanguageProfile> {
43 let canonical = canonical_document_format(format)?;
44 let profile = base_profile(canonical);
45 Some(match canonical {
46 "txt" => txt_profile(profile),
47 "Markdown" => markdown_profile(profile),
48 "HTML" => html_profile(profile),
49 "PDF" => pdf_profile(profile),
50 "DOCX" => docx_profile(profile),
51 _ => unreachable!("canonical_document_format only yields known formats"),
52 })
53}
54
55#[must_use]
60pub fn canonical_document_format(format: &str) -> Option<&'static str> {
61 match format.to_ascii_lowercase().as_str() {
62 "txt" | "text" | "plain-text" | "plaintext" => Some("txt"),
63 "markdown" | "md" => Some("Markdown"),
64 "html" | "htm" => Some("HTML"),
65 "pdf" => Some("PDF"),
66 "docx" => Some("DOCX"),
67 _ => None,
68 }
69}
70
71fn base_profile(canonical: &str) -> LanguageProfile {
72 LanguageProfile::new(canonical, canonical)
73 .with_link_type(LinkType::Document)
74 .with_link_type(LinkType::Concept)
75 .with_link_type(LinkType::Token)
76}
77
78fn with_supported<'a>(
79 mut profile: LanguageProfile,
80 concepts: impl IntoIterator<Item = &'a str>,
81) -> LanguageProfile {
82 for concept in concepts {
83 profile = profile.with_concept(concept);
84 }
85 profile
86}
87
88fn txt_profile(profile: LanguageProfile) -> LanguageProfile {
89 with_supported(profile, ["paragraph"])
90 .with_concept_fallback(
91 "heading",
92 "flattened to a plain paragraph (heading level dropped)",
93 )
94 .with_concept_fallback(
95 "bullet-list",
96 "flattened to plain lines with a `- ` marker per item",
97 )
98 .with_concept_fallback(
99 "ordered-list",
100 "flattened to plain lines with a `N. ` marker per item",
101 )
102 .with_concept_fallback("list-item", "rendered as a single plain line")
103 .with_concept_fallback("strong", "rendered as unstyled plain text")
104 .with_concept_fallback("emphasis", "rendered as unstyled plain text")
105 .with_concept_fallback("hyperlink", "rendered as its visible text (URL dropped)")
106}
107
108fn markdown_profile(profile: LanguageProfile) -> LanguageProfile {
109 with_supported(
110 profile,
111 [
112 "heading",
113 "paragraph",
114 "bullet-list",
115 "list-item",
116 "strong",
117 "emphasis",
118 "hyperlink",
119 ],
120 )
121 .with_concept_fallback(
122 "ordered-list",
123 "rendered with bullet `- ` markers (ordering not preserved by the Markdown profile)",
124 )
125}
126
127fn html_profile(profile: LanguageProfile) -> LanguageProfile {
128 with_supported(profile, CROSS_FORMAT_CONCEPTS.iter().copied())
130}
131
132fn pdf_profile(profile: LanguageProfile) -> LanguageProfile {
133 with_supported(
134 profile,
135 [
136 "heading",
137 "paragraph",
138 "bullet-list",
139 "ordered-list",
140 "list-item",
141 "strong",
142 "emphasis",
143 ],
144 )
145 .with_concept_fallback(
146 "hyperlink",
147 "rendered as its visible text, unstyled (URL dropped)",
148 )
149}
150
151fn docx_profile(profile: LanguageProfile) -> LanguageProfile {
152 with_supported(
153 profile,
154 [
155 "heading",
156 "paragraph",
157 "bullet-list",
158 "ordered-list",
159 "list-item",
160 "strong",
161 "emphasis",
162 ],
163 )
164 .with_concept_fallback(
165 "hyperlink",
166 "rendered as its visible text, unstyled (URL dropped)",
167 )
168}