Skip to main content

meta_language/
concept_ontology.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::sync::OnceLock;
3
4use crate::link_network::{Link, LinkId, LinkMetadata, LinkNetwork, LinkType};
5use crate::lino_serialization::LinoSerializationError;
6use serde_json::Value;
7
8const EXTERNAL_ID_VOCABULARY_PREFIX: &str = "external-id:";
9
10/// Summary returned after importing concept links from an ontology source.
11#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
12pub struct ConceptOntologyImportReport {
13    concepts: usize,
14    alias_links: usize,
15    syntax_mappings: usize,
16}
17
18impl ConceptOntologyImportReport {
19    const fn new(concepts: usize, alias_links: usize, syntax_mappings: usize) -> Self {
20        Self {
21            concepts,
22            alias_links,
23            syntax_mappings,
24        }
25    }
26
27    /// Number of language-free concepts imported from the source.
28    #[must_use]
29    pub const fn concepts(self) -> usize {
30        self.concepts
31    }
32
33    /// Number of external-id alias links imported from the source.
34    #[must_use]
35    pub const fn alias_links(self) -> usize {
36        self.alias_links
37    }
38
39    /// Number of language-bound expression mappings imported from the source.
40    #[must_use]
41    pub const fn syntax_mappings(self) -> usize {
42        self.syntax_mappings
43    }
44}
45
46/// Summary returned after seeding the shared concept ontology into a network.
47#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
48pub struct ConceptOntologySeedReport {
49    lexicon_concepts: usize,
50    structural_concepts: usize,
51    formatting_concepts: usize,
52    alias_links: usize,
53    syntax_mappings: usize,
54}
55
56impl ConceptOntologySeedReport {
57    const fn new(
58        lexicon_concepts: usize,
59        structural_concepts: usize,
60        formatting_concepts: usize,
61        alias_links: usize,
62        syntax_mappings: usize,
63    ) -> Self {
64        Self {
65            lexicon_concepts,
66            structural_concepts,
67            formatting_concepts,
68            alias_links,
69            syntax_mappings,
70        }
71    }
72
73    /// Number of concepts imported from meta-expression's semantic lexicon JSON.
74    #[must_use]
75    pub const fn lexicon_concepts(self) -> usize {
76        self.lexicon_concepts
77    }
78
79    /// Number of built-in structural programming-language concepts seeded.
80    #[must_use]
81    pub const fn structural_concepts(self) -> usize {
82        self.structural_concepts
83    }
84
85    /// Number of shared document-formatting concepts seeded.
86    #[must_use]
87    pub const fn formatting_concepts(self) -> usize {
88        self.formatting_concepts
89    }
90
91    /// Number of external-id alias links attached to seeded concepts.
92    #[must_use]
93    pub const fn alias_links(self) -> usize {
94        self.alias_links
95    }
96
97    /// Number of semantic concrete-syntax mapping links surfaced by the seed.
98    #[must_use]
99    pub const fn syntax_mappings(self) -> usize {
100        self.syntax_mappings
101    }
102}
103
104struct SemanticLexicon {
105    concept_count: usize,
106    concepts: Vec<SemanticLexiconConcept>,
107}
108
109struct SemanticLexiconConcept {
110    id: String,
111    entity_id: Option<String>,
112    url: Option<String>,
113    description: Option<String>,
114    labels: BTreeMap<String, Vec<String>>,
115    primary: BTreeMap<String, String>,
116}
117
118impl SemanticLexiconConcept {
119    fn id(&self) -> &str {
120        &self.id
121    }
122
123    fn definition(&self) -> String {
124        let mut details = Vec::new();
125        if let Some(entity_id) = &self.entity_id {
126            if is_wikidata_qid(entity_id) {
127                details.push(format!("Wikidata {entity_id}"));
128            } else {
129                details.push(format!("entity {entity_id}"));
130            }
131        } else {
132            details.push(format!("concept {}", self.id));
133        }
134
135        if let Some(description) = &self.description {
136            details.push(description.clone());
137        }
138        if let Some(url) = &self.url {
139            details.push(url.clone());
140        }
141
142        details.join("; ")
143    }
144
145    fn syntax_entries(&self) -> Vec<ConceptSyntaxEntry<'_>> {
146        let primary_languages = self
147            .primary
148            .keys()
149            .map(String::as_str)
150            .collect::<BTreeSet<_>>();
151        let mut seen = BTreeSet::new();
152        let mut entries = Vec::new();
153
154        for (language, syntax) in &self.primary {
155            push_syntax_entry(&mut entries, &mut seen, language, syntax, true);
156        }
157
158        for (language, labels) in &self.labels {
159            for (index, label) in labels.iter().enumerate() {
160                let canonical = !primary_languages.contains(language.as_str()) && index == 0;
161                push_syntax_entry(&mut entries, &mut seen, language, label, canonical);
162            }
163        }
164
165        entries
166    }
167}
168
169struct ConceptSyntaxEntry<'a> {
170    language: &'a str,
171    syntax: &'a str,
172    canonical: bool,
173}
174
175struct StructuralConcept {
176    id: &'static str,
177    definition: &'static str,
178    syntax: &'static [(&'static str, &'static str)],
179}
180
181#[derive(Clone, Copy, Debug, PartialEq, Eq)]
182pub struct StatehoodConceptIds {
183    pub proposition: LinkId,
184    pub subject: LinkId,
185    pub object: LinkId,
186}
187
188const STATEHOOD_PROPOSITION_SYNTAX: &[(&str, &str)] = &[
189    ("English", "Hawaii is a state."),
190    ("en", "Hawaii is a state."),
191    ("Russian", "Гавайи это штат."),
192    ("ru", "Гавайи это штат."),
193];
194
195const HAWAII_ENTITY_SYNTAX: &[(&str, &str)] = &[
196    ("English", "Hawaii"),
197    ("en", "Hawaii"),
198    ("Russian", "Гавайи"),
199    ("ru", "Гавайи"),
200];
201
202const UNITED_STATES_STATE_SYNTAX: &[(&str, &str)] = &[
203    ("English", "state"),
204    ("en", "state"),
205    ("Russian", "штат"),
206    ("ru", "штат"),
207];
208
209const STRUCTURAL_CONCEPTS: &[StructuralConcept] = &[
210    StructuralConcept {
211        id: "function",
212        definition: "Reusable computation with parameters and a result boundary.",
213        syntax: &[
214            ("Rust", "fn"),
215            ("Python", "def"),
216            ("JavaScript", "function"),
217            ("C", "function"),
218            ("C++", "function"),
219            ("C#", "method"),
220            ("Java", "method"),
221            ("Visual Basic", "Function"),
222            ("R", "function"),
223            ("sql-ansi", "CREATE FUNCTION"),
224            ("Delphi/Object Pascal", "function"),
225        ],
226    },
227    StructuralConcept {
228        id: "binding",
229        definition: "Association between a name and a value or computation.",
230        syntax: &[
231            ("Rust", "let"),
232            ("Python", "="),
233            ("JavaScript", "let"),
234            ("C", "="),
235            ("C++", "="),
236            ("C#", "="),
237            ("Java", "="),
238            ("Visual Basic", "Dim"),
239            ("R", "<-"),
240            ("sql-ansi", "AS"),
241            ("Delphi/Object Pascal", ":="),
242        ],
243    },
244    StructuralConcept {
245        id: "application",
246        definition: "Application of a callable expression to arguments.",
247        syntax: &[
248            ("Rust", "call(...)"),
249            ("Python", "call(...)"),
250            ("JavaScript", "call(...)"),
251            ("C", "call(...)"),
252            ("C++", "call(...)"),
253            ("C#", "call(...)"),
254            ("Java", "call(...)"),
255            ("Visual Basic", "Call"),
256            ("R", "call(...)"),
257            ("sql-ansi", "CALL"),
258            ("Delphi/Object Pascal", "call(...)"),
259        ],
260    },
261    StructuralConcept {
262        id: "sequence",
263        definition: "Ordered execution or evaluation of multiple operations.",
264        syntax: &[
265            ("Rust", ";"),
266            ("Python", "newline"),
267            ("JavaScript", ";"),
268            ("C", ";"),
269            ("C++", ";"),
270            ("C#", ";"),
271            ("Java", ";"),
272            ("Visual Basic", "newline"),
273            ("R", ";"),
274            ("sql-ansi", ";"),
275            ("Delphi/Object Pascal", "begin ... end"),
276        ],
277    },
278    StructuralConcept {
279        id: "branch",
280        definition: "Conditional selection among alternative operations.",
281        syntax: &[
282            ("Rust", "if"),
283            ("Python", "if"),
284            ("JavaScript", "if"),
285            ("C", "if"),
286            ("C++", "if"),
287            ("C#", "if"),
288            ("Java", "if"),
289            ("Visual Basic", "If"),
290            ("R", "if"),
291            ("sql-ansi", "CASE"),
292            ("Delphi/Object Pascal", "if"),
293        ],
294    },
295    StructuralConcept {
296        id: "loop",
297        definition: "Repeated execution or evaluation over a condition or iterable.",
298        syntax: &[
299            ("Rust", "loop"),
300            ("Python", "for"),
301            ("JavaScript", "for"),
302            ("C", "for"),
303            ("C++", "for"),
304            ("C#", "for"),
305            ("Java", "for"),
306            ("Visual Basic", "For"),
307            ("R", "for"),
308            ("sql-ansi", "WHILE"),
309            ("Delphi/Object Pascal", "for"),
310        ],
311    },
312    StructuralConcept {
313        id: "parameter",
314        definition: "Named input accepted by a function abstraction.",
315        syntax: &[
316            ("Rust", "parameter"),
317            ("Python", "parameter"),
318            ("JavaScript", "parameter"),
319            ("C", "parameter"),
320            ("C++", "parameter"),
321            ("C#", "parameter"),
322            ("Java", "parameter"),
323            ("Visual Basic", "parameter"),
324            ("R", "parameter"),
325            ("sql-ansi", "parameter"),
326            ("Delphi/Object Pascal", "parameter"),
327        ],
328    },
329    StructuralConcept {
330        id: "argument",
331        definition: "Concrete input supplied to a function application.",
332        syntax: &[
333            ("Rust", "argument"),
334            ("Python", "argument"),
335            ("JavaScript", "argument"),
336            ("C", "argument"),
337            ("C++", "argument"),
338            ("C#", "argument"),
339            ("Java", "argument"),
340            ("Visual Basic", "argument"),
341            ("R", "argument"),
342            ("sql-ansi", "argument"),
343            ("Delphi/Object Pascal", "argument"),
344        ],
345    },
346    StructuralConcept {
347        id: "return",
348        definition: "Transfer of a function result to its caller.",
349        syntax: &[
350            ("Rust", "return"),
351            ("Python", "return"),
352            ("JavaScript", "return"),
353            ("C", "return"),
354            ("C++", "return"),
355            ("C#", "return"),
356            ("Java", "return"),
357            ("Visual Basic", "Return"),
358            ("R", "return"),
359            ("sql-ansi", "RETURN"),
360            ("Delphi/Object Pascal", "Result"),
361        ],
362    },
363    StructuralConcept {
364        id: "assignment",
365        definition: "Update that stores a value into a named location.",
366        syntax: &[
367            ("Rust", "="),
368            ("Python", "="),
369            ("JavaScript", "="),
370            ("C", "="),
371            ("C++", "="),
372            ("C#", "="),
373            ("Java", "="),
374            ("Visual Basic", "="),
375            ("R", "<-"),
376            ("sql-ansi", "="),
377            ("Delphi/Object Pascal", ":="),
378        ],
379    },
380];
381
382impl LinkNetwork {
383    pub(crate) fn seed_statehood_worked_example(&mut self) -> StatehoodConceptIds {
384        let proposition = self.insert_typed_point(
385            "statehood",
386            LinkType::Concept,
387            Some("Statehood proposition connecting Hawaii (Q782) to U.S. state (Q35657)."),
388        );
389        let subject = self.insert_typed_point(
390            "Q782",
391            LinkType::Concept,
392            Some("Wikidata Q782; Hawaii; state of the United States."),
393        );
394        let object = self.insert_typed_point(
395            "Q35657",
396            LinkType::Concept,
397            Some("Wikidata Q35657; state of the United States."),
398        );
399
400        for (language, syntax) in STATEHOOD_PROPOSITION_SYNTAX {
401            self.insert_concept_syntax_mapping(proposition, "statehood", language, syntax, true);
402        }
403        for (language, syntax) in HAWAII_ENTITY_SYNTAX {
404            self.insert_concept_syntax_mapping(subject, "Q782", language, syntax, true);
405        }
406        for (language, syntax) in UNITED_STATES_STATE_SYNTAX {
407            self.insert_concept_syntax_mapping(object, "Q35657", language, syntax, true);
408        }
409
410        StatehoodConceptIds {
411            proposition,
412            subject,
413            object,
414        }
415    }
416
417    /// Seeds the network with the shared common concept ontology.
418    ///
419    /// The seed combines meta-expression's semantic lexicon with structural
420    /// programming-language concepts that are shared across the current
421    /// language targets.
422    #[must_use]
423    pub fn seed_common_concept_ontology(&mut self) -> ConceptOntologySeedReport {
424        let lexicon = semantic_lexicon();
425        let mut alias_links = 0;
426        let mut syntax_mappings = 0;
427
428        for concept in &lexicon.concepts {
429            let definition = concept.definition();
430            let concept_link = self.intern_concept(concept.id(), Some(&definition));
431            alias_links += self.insert_external_aliases(concept_link, concept);
432
433            for entry in concept.syntax_entries() {
434                self.insert_concept_syntax_mapping(
435                    concept_link,
436                    concept.id(),
437                    entry.language,
438                    entry.syntax,
439                    entry.canonical,
440                );
441                syntax_mappings += 1;
442            }
443        }
444
445        let mut structural_concepts = BTreeSet::new();
446        for concept in STRUCTURAL_CONCEPTS {
447            structural_concepts.insert(concept.id);
448            let concept_link = self.intern_concept(concept.id, Some(concept.definition));
449
450            for (language, syntax) in concept.syntax {
451                self.insert_concept_syntax_mapping(
452                    concept_link,
453                    concept.id,
454                    language,
455                    syntax,
456                    true,
457                );
458                syntax_mappings += 1;
459            }
460        }
461
462        let formatting = self.seed_document_formatting_concepts();
463        syntax_mappings += formatting.syntax_mappings();
464
465        let statehood = self.seed_statehood_worked_example();
466        for (concept_link, external_id) in
467            [(statehood.subject, "Q782"), (statehood.object, "Q35657")]
468        {
469            let (_alias, inserted) =
470                self.insert_concept_alias_link(concept_link, "Wikidata", external_id);
471            if inserted {
472                alias_links += 1;
473            }
474        }
475        syntax_mappings += STATEHOOD_PROPOSITION_SYNTAX.len()
476            + HAWAII_ENTITY_SYNTAX.len()
477            + UNITED_STATES_STATE_SYNTAX.len();
478
479        ConceptOntologySeedReport::new(
480            lexicon.concept_count,
481            structural_concepts.len(),
482            formatting.concepts(),
483            alias_links,
484            syntax_mappings,
485        )
486    }
487
488    /// Interns a language-free concept by exact identifier.
489    ///
490    /// The identifier is matched exactly: case changes, diacritic changes, or
491    /// sense suffixes are distinct concept ids and therefore produce distinct
492    /// concept links.
493    pub fn intern_concept(&mut self, exact_id: &str, definition: Option<&str>) -> LinkId {
494        self.insert_typed_point(exact_id, LinkType::Concept, definition)
495    }
496
497    /// Inserts a language-bound expression linked to a language-free concept.
498    ///
499    /// The concept is reused only when `concept` exactly matches an existing
500    /// concept id; otherwise a new concept link is minted.
501    pub fn insert_concept_expression(
502        &mut self,
503        concept: &str,
504        language: &str,
505        expression: &str,
506    ) -> LinkId {
507        let concept_link = self.find_term(concept).unwrap_or_else(|| {
508            self.intern_concept(
509                concept,
510                Some("A language-free concept shared by exact interlingual id."),
511            )
512        });
513        self.insert_concept_syntax_mapping(concept_link, concept, language, expression, true)
514    }
515
516    /// Inserts a concept-to-language syntax mapping and returns the semantic link id.
517    pub fn insert_concept_mapping(
518        &mut self,
519        concept: &str,
520        language: &str,
521        syntax: &str,
522    ) -> LinkId {
523        self.insert_concept_expression(concept, language, syntax)
524    }
525
526    /// Attaches an external vocabulary id to a concept without changing its exact concept id.
527    pub fn insert_concept_alias(
528        &mut self,
529        concept_link: LinkId,
530        vocabulary: &str,
531        external_id: &str,
532    ) -> LinkId {
533        self.insert_concept_alias_link(concept_link, vocabulary, external_id)
534            .0
535    }
536
537    /// Imports concept, expression, and alias links from canonical `LiNo` text.
538    ///
539    /// The input is the links-notation text produced by [`LinkNetwork::to_lino`].
540    /// Importing the same text repeatedly is idempotent because concepts,
541    /// expressions, and aliases are all deduplicated by exact link shape.
542    pub fn import_concept_ontology_lino(
543        &mut self,
544        text: &str,
545    ) -> Result<ConceptOntologyImportReport, LinoSerializationError> {
546        let source = Self::from_lino(text)?;
547        Ok(self.import_concept_ontology_network(&source))
548    }
549
550    fn import_concept_ontology_network(&mut self, source: &Self) -> ConceptOntologyImportReport {
551        let mut concept_links: BTreeMap<LinkId, (LinkId, String)> = BTreeMap::new();
552        let mut concepts = 0;
553        let mut alias_links = 0;
554        let mut syntax_mappings = 0;
555
556        for link in source.links() {
557            if link.metadata().link_type() != Some(LinkType::Concept) {
558                continue;
559            }
560            let Some(term) = link.metadata().term() else {
561                continue;
562            };
563            let concept_link = self.intern_concept(term, link.metadata().definition());
564            concept_links.insert(link.id(), (concept_link, term.to_string()));
565            concepts += 1;
566        }
567
568        for link in source.links() {
569            if link.metadata().link_type() != Some(LinkType::Semantic) {
570                continue;
571            }
572            let [source_concept, source_context] = link.references() else {
573                continue;
574            };
575            let Some((target_concept, concept_id)) = concept_links.get(source_concept) else {
576                continue;
577            };
578            let Some(context) = source.link(*source_context) else {
579                continue;
580            };
581            let Some(term) = link.metadata().term() else {
582                continue;
583            };
584
585            match context.metadata().link_type() {
586                Some(LinkType::Language) => {
587                    if let Some(language) = link
588                        .metadata()
589                        .language()
590                        .or_else(|| context.metadata().term())
591                    {
592                        self.insert_concept_syntax_mapping(
593                            *target_concept,
594                            concept_id,
595                            language,
596                            term,
597                            false,
598                        );
599                        syntax_mappings += 1;
600                    }
601                }
602                Some(LinkType::Type) => {
603                    let vocabulary = link.metadata().language().or_else(|| {
604                        context
605                            .metadata()
606                            .term()
607                            .and_then(external_vocabulary_from_term)
608                    });
609                    if let Some(vocabulary) = vocabulary {
610                        self.insert_concept_alias(*target_concept, vocabulary, term);
611                        alias_links += 1;
612                    }
613                }
614                _ => {}
615            }
616        }
617
618        ConceptOntologyImportReport::new(concepts, alias_links, syntax_mappings)
619    }
620
621    fn insert_external_aliases(
622        &mut self,
623        concept_link: LinkId,
624        concept: &SemanticLexiconConcept,
625    ) -> usize {
626        let mut aliases = BTreeSet::new();
627        if let Some(vocabulary) = external_vocabulary_for_id(concept.id()) {
628            aliases.insert((vocabulary, concept.id()));
629        }
630        if let Some(entity_id) = concept.entity_id.as_deref() {
631            if let Some(vocabulary) = external_vocabulary_for_id(entity_id) {
632                aliases.insert((vocabulary, entity_id));
633            }
634        }
635
636        aliases
637            .into_iter()
638            .filter(|(vocabulary, external_id)| {
639                let (_alias, inserted) =
640                    self.insert_concept_alias_link(concept_link, vocabulary, external_id);
641                inserted
642            })
643            .count()
644    }
645
646    fn insert_concept_alias_link(
647        &mut self,
648        concept_link: LinkId,
649        vocabulary: &str,
650        external_id: &str,
651    ) -> (LinkId, bool) {
652        let vocabulary_term = external_vocabulary_term(vocabulary);
653        let vocabulary_link = self.insert_typed_point(
654            &vocabulary_term,
655            LinkType::Type,
656            Some("External concept identifier vocabulary."),
657        );
658
659        if let Some(existing) =
660            self.find_concept_alias(concept_link, vocabulary_link, vocabulary, external_id)
661        {
662            return (existing, false);
663        }
664
665        (
666            self.insert_link(
667                [concept_link, vocabulary_link],
668                LinkMetadata::new()
669                    .with_link_type(LinkType::Semantic)
670                    .with_named(true)
671                    .with_term(external_id)
672                    .with_language(vocabulary),
673            ),
674            true,
675        )
676    }
677
678    pub(crate) fn insert_concept_syntax_mapping(
679        &mut self,
680        concept_link: LinkId,
681        concept: &str,
682        language: &str,
683        syntax: &str,
684        update_reconstruction: bool,
685    ) -> LinkId {
686        let language_link = self.insert_typed_point(language, LinkType::Language, None);
687        self.cache_concept_syntax(concept, language, syntax, update_reconstruction);
688
689        if let Some(existing) =
690            self.find_concept_syntax_mapping(concept_link, language_link, syntax, language)
691        {
692            return existing;
693        }
694
695        self.insert_link(
696            [concept_link, language_link],
697            LinkMetadata::new()
698                .with_link_type(LinkType::Semantic)
699                .with_named(true)
700                .with_term(syntax)
701                .with_language(language),
702        )
703    }
704
705    fn find_concept_syntax_mapping(
706        &self,
707        concept_link: LinkId,
708        language_link: LinkId,
709        syntax: &str,
710        language: &str,
711    ) -> Option<LinkId> {
712        self.links()
713            .find(|link| {
714                let references = link.references();
715                link.metadata().link_type() == Some(LinkType::Semantic)
716                    && references.len() == 2
717                    && references[0] == concept_link
718                    && references[1] == language_link
719                    && link.metadata().term() == Some(syntax)
720                    && link.metadata().language() == Some(language)
721            })
722            .map(Link::id)
723    }
724
725    fn find_concept_alias(
726        &self,
727        concept_link: LinkId,
728        vocabulary_link: LinkId,
729        vocabulary: &str,
730        external_id: &str,
731    ) -> Option<LinkId> {
732        self.links()
733            .find(|link| {
734                let references = link.references();
735                link.metadata().link_type() == Some(LinkType::Semantic)
736                    && references.len() == 2
737                    && references[0] == concept_link
738                    && references[1] == vocabulary_link
739                    && link.metadata().term() == Some(external_id)
740                    && link.metadata().language() == Some(vocabulary)
741            })
742            .map(Link::id)
743    }
744}
745
746const SEMANTIC_LEXICON_JSON: &str = include_str!("data/semantic-lexicon.json");
747
748fn semantic_lexicon() -> &'static SemanticLexicon {
749    static LEXICON: OnceLock<SemanticLexicon> = OnceLock::new();
750    LEXICON.get_or_init(parse_semantic_lexicon)
751}
752
753fn parse_semantic_lexicon() -> SemanticLexicon {
754    let root: Value =
755        serde_json::from_str(SEMANTIC_LEXICON_JSON).expect("semantic lexicon JSON must parse");
756    let root = root
757        .as_object()
758        .expect("semantic lexicon root must be an object");
759    let concepts = root
760        .get("concepts")
761        .and_then(Value::as_array)
762        .expect("semantic lexicon concepts must be an array")
763        .iter()
764        .map(parse_concept)
765        .collect::<Vec<_>>();
766    let concept_count = root
767        .get("conceptCount")
768        .and_then(Value::as_u64)
769        .map_or(concepts.len(), |count| {
770            usize::try_from(count).expect("semantic lexicon concept count must fit usize")
771        });
772
773    assert_eq!(
774        concept_count,
775        concepts.len(),
776        "semantic lexicon conceptCount must match concepts array length"
777    );
778
779    SemanticLexicon {
780        concept_count,
781        concepts,
782    }
783}
784
785fn parse_concept(value: &Value) -> SemanticLexiconConcept {
786    let concept = value
787        .as_object()
788        .expect("semantic lexicon concept must be an object");
789    SemanticLexiconConcept {
790        id: required_string_field(concept, "id"),
791        entity_id: optional_string_field(concept, "entityId"),
792        url: optional_string_field(concept, "url"),
793        description: optional_string_field(concept, "description"),
794        labels: string_list_map_field(concept, "labels"),
795        primary: string_map_field(concept, "primary"),
796    }
797}
798
799fn required_string_field(object: &serde_json::Map<String, Value>, field: &str) -> String {
800    object
801        .get(field)
802        .and_then(Value::as_str)
803        .unwrap_or_else(|| panic!("semantic lexicon field {field} must be a string"))
804        .to_string()
805}
806
807fn optional_string_field(object: &serde_json::Map<String, Value>, field: &str) -> Option<String> {
808    object
809        .get(field)
810        .and_then(Value::as_str)
811        .map(str::to_string)
812}
813
814fn string_map_field(
815    object: &serde_json::Map<String, Value>,
816    field: &str,
817) -> BTreeMap<String, String> {
818    object
819        .get(field)
820        .and_then(Value::as_object)
821        .map(|entries| {
822            entries
823                .iter()
824                .filter_map(|(language, value)| {
825                    Some((language.clone(), value.as_str()?.to_string()))
826                })
827                .collect()
828        })
829        .unwrap_or_default()
830}
831
832fn string_list_map_field(
833    object: &serde_json::Map<String, Value>,
834    field: &str,
835) -> BTreeMap<String, Vec<String>> {
836    object
837        .get(field)
838        .and_then(Value::as_object)
839        .map(|entries| {
840            entries
841                .iter()
842                .map(|(language, values)| {
843                    (
844                        language.clone(),
845                        values
846                            .as_array()
847                            .into_iter()
848                            .flatten()
849                            .filter_map(Value::as_str)
850                            .map(str::to_string)
851                            .collect(),
852                    )
853                })
854                .collect()
855        })
856        .unwrap_or_default()
857}
858
859fn push_syntax_entry<'a>(
860    entries: &mut Vec<ConceptSyntaxEntry<'a>>,
861    seen: &mut BTreeSet<(&'a str, &'a str)>,
862    language: &'a str,
863    syntax: &'a str,
864    canonical: bool,
865) {
866    if seen.insert((language, syntax)) {
867        entries.push(ConceptSyntaxEntry {
868            language,
869            syntax,
870            canonical,
871        });
872    }
873}
874
875fn is_wikidata_qid(value: &str) -> bool {
876    value.strip_prefix('Q').is_some_and(|suffix| {
877        !suffix.is_empty() && suffix.chars().all(|character| character.is_ascii_digit())
878    })
879}
880
881fn is_wordnet_cili_id(value: &str) -> bool {
882    value.starts_with("ili:") || value.starts_with("ili-")
883}
884
885fn external_vocabulary_for_id(value: &str) -> Option<&'static str> {
886    if is_wikidata_qid(value) {
887        Some("Wikidata")
888    } else if is_wordnet_cili_id(value) {
889        Some("WordNet CILI")
890    } else {
891        None
892    }
893}
894
895fn external_vocabulary_term(vocabulary: &str) -> String {
896    format!("{EXTERNAL_ID_VOCABULARY_PREFIX}{vocabulary}")
897}
898
899fn external_vocabulary_from_term(term: &str) -> Option<&str> {
900    term.strip_prefix(EXTERNAL_ID_VOCABULARY_PREFIX)
901}