Skip to main content

meta_language/
query.rs

1use std::fmt;
2
3use crate::link_network::{Link, LinkId, LinkNetwork, LinkType};
4
5/// Structural query over links.
6#[derive(Clone, Debug, Default, PartialEq, Eq)]
7pub struct LinkQuery {
8    link_type: Option<LinkType>,
9    term: Option<String>,
10    language: Option<String>,
11    named: Option<bool>,
12    pattern: Option<QueryPattern>,
13    pattern_source: Option<String>,
14    predicates: Vec<QueryPredicate>,
15}
16
17impl LinkQuery {
18    /// Creates an empty query that matches every link.
19    #[must_use]
20    pub fn new() -> Self {
21        Self::default()
22    }
23
24    /// Creates a query restricted to a link type.
25    #[must_use]
26    pub fn by_type(link_type: LinkType) -> Self {
27        Self::new().with_link_type(link_type)
28    }
29
30    /// Parses a tree-sitter-query-like S-expression query.
31    ///
32    /// The structural engine binds captures and leaves predicate evaluation to
33    /// a caller-provided [`QueryPredicateHost`].
34    pub fn from_sexpression(source: &str) -> Result<Self, QueryParseError> {
35        let tokens = tokenize(source)?;
36        let mut parser = QueryParser::new(tokens);
37        let (pattern, predicates) = parser.parse()?;
38        Ok(Self {
39            pattern: Some(pattern),
40            pattern_source: Some(source.to_string()),
41            predicates,
42            ..Self::default()
43        })
44    }
45
46    /// Restricts matches to a link type.
47    #[must_use]
48    pub const fn with_link_type(mut self, link_type: LinkType) -> Self {
49        self.link_type = Some(link_type);
50        self
51    }
52
53    /// Restricts matches to a term.
54    #[must_use]
55    pub fn with_term(mut self, term: impl Into<String>) -> Self {
56        self.term = Some(term.into());
57        self
58    }
59
60    /// Restricts matches to a language label.
61    #[must_use]
62    pub fn with_language(mut self, language: impl Into<String>) -> Self {
63        self.language = Some(language.into());
64        self
65    }
66
67    /// Restricts matches by the named flag.
68    #[must_use]
69    pub const fn with_named(mut self, named: bool) -> Self {
70        self.named = Some(named);
71        self
72    }
73
74    pub(crate) fn matches_in_network(
75        &self,
76        network: &LinkNetwork,
77        link: &Link,
78        predicate_host: &impl QueryPredicateHost,
79    ) -> Vec<QueryMatch> {
80        if !self.matches_metadata(link) {
81            return Vec::new();
82        }
83
84        let captures = self.pattern.as_ref().map_or_else(
85            || vec![QueryCaptures::default()],
86            |pattern| pattern.match_root(network, link.id()),
87        );
88
89        captures
90            .into_iter()
91            .filter(|captures| {
92                self.predicates
93                    .iter()
94                    .all(|predicate| predicate_host.evaluate(predicate, captures, network))
95            })
96            .map(|captures| QueryMatch::new(link.id(), captures))
97            .collect()
98    }
99
100    fn matches_metadata(&self, link: &Link) -> bool {
101        let metadata = link.metadata();
102        self.link_type
103            .map_or(true, |link_type| metadata.link_type() == Some(link_type))
104            && self
105                .term
106                .as_deref()
107                .map_or(true, |term| metadata.term() == Some(term))
108            && self
109                .language
110                .as_deref()
111                .map_or(true, |language| metadata.language() == Some(language))
112            && self
113                .named
114                .map_or(true, |named| metadata.is_named() == named)
115    }
116
117    pub(crate) const fn link_type_filter(&self) -> Option<LinkType> {
118        self.link_type
119    }
120
121    pub(crate) fn term_filter(&self) -> Option<&str> {
122        self.term.as_deref()
123    }
124
125    pub(crate) fn language_filter(&self) -> Option<&str> {
126        self.language.as_deref()
127    }
128
129    pub(crate) const fn named_filter(&self) -> Option<bool> {
130        self.named
131    }
132
133    pub(crate) fn pattern_source(&self) -> Option<&str> {
134        self.pattern_source.as_deref()
135    }
136}
137
138/// A structural query match and its capture bindings.
139#[derive(Clone, Debug, PartialEq, Eq)]
140pub struct QueryMatch {
141    link_id: LinkId,
142    captures: QueryCaptures,
143}
144
145impl QueryMatch {
146    const fn new(link_id: LinkId, captures: QueryCaptures) -> Self {
147        Self { link_id, captures }
148    }
149
150    /// Link selected by the query root pattern.
151    #[must_use]
152    pub const fn link_id(&self) -> LinkId {
153        self.link_id
154    }
155
156    /// Captures bound while matching this result.
157    #[must_use]
158    pub const fn captures(&self) -> &QueryCaptures {
159        &self.captures
160    }
161}
162
163/// Ordered capture bindings for a query match.
164#[derive(Clone, Debug, Default, PartialEq, Eq)]
165pub struct QueryCaptures {
166    values: Vec<QueryCapture>,
167}
168
169impl QueryCaptures {
170    fn with_capture(mut self, name: &str, link_id: LinkId) -> Self {
171        self.values.push(QueryCapture {
172            name: name.to_string(),
173            link_id,
174        });
175        self
176    }
177
178    /// Returns the first link bound to a capture name.
179    #[must_use]
180    pub fn first(&self, name: &str) -> Option<LinkId> {
181        self.values
182            .iter()
183            .find(|capture| capture.name == name)
184            .map(|capture| capture.link_id)
185    }
186
187    /// Iterates capture bindings in match order.
188    pub fn iter(&self) -> impl Iterator<Item = &QueryCapture> {
189        self.values.iter()
190    }
191}
192
193/// One capture binding.
194#[derive(Clone, Debug, PartialEq, Eq)]
195pub struct QueryCapture {
196    name: String,
197    link_id: LinkId,
198}
199
200impl QueryCapture {
201    /// Capture name without the leading `@`.
202    #[must_use]
203    pub fn name(&self) -> &str {
204        &self.name
205    }
206
207    /// Link bound to this capture.
208    #[must_use]
209    pub const fn link_id(&self) -> LinkId {
210        self.link_id
211    }
212}
213
214/// Host hook for evaluating text, regex, semantic, or other predicates.
215pub trait QueryPredicateHost {
216    /// Returns whether a predicate accepts the current capture set.
217    fn evaluate(
218        &self,
219        predicate: &QueryPredicate,
220        captures: &QueryCaptures,
221        network: &LinkNetwork,
222    ) -> bool;
223}
224
225pub(crate) struct RejectPredicateHost;
226
227impl QueryPredicateHost for RejectPredicateHost {
228    fn evaluate(
229        &self,
230        _predicate: &QueryPredicate,
231        _captures: &QueryCaptures,
232        _network: &LinkNetwork,
233    ) -> bool {
234        false
235    }
236}
237
238/// Predicate expression parsed from an S-expression query.
239#[derive(Clone, Debug, PartialEq, Eq)]
240pub struct QueryPredicate {
241    name: String,
242    arguments: Vec<QueryPredicateArgument>,
243}
244
245impl QueryPredicate {
246    /// Predicate name without the leading `#`.
247    #[must_use]
248    pub fn name(&self) -> &str {
249        &self.name
250    }
251
252    /// Predicate arguments in source order.
253    #[must_use]
254    pub fn arguments(&self) -> &[QueryPredicateArgument] {
255        &self.arguments
256    }
257}
258
259/// Predicate argument parsed from an S-expression query.
260#[derive(Clone, Debug, PartialEq, Eq)]
261pub enum QueryPredicateArgument {
262    /// Capture reference such as `@name`.
263    Capture(String),
264    /// Host literal such as `"main"` or an unquoted atom.
265    Literal(String),
266}
267
268impl QueryPredicateArgument {
269    /// Returns the referenced capture name, if this argument is a capture.
270    #[must_use]
271    pub fn capture_name(&self) -> Option<&str> {
272        match self {
273            Self::Capture(name) => Some(name),
274            Self::Literal(_) => None,
275        }
276    }
277
278    /// Returns the literal value, if this argument is a literal.
279    #[must_use]
280    pub fn literal(&self) -> Option<&str> {
281        match self {
282            Self::Capture(_) => None,
283            Self::Literal(value) => Some(value),
284        }
285    }
286}
287
288/// Error returned while parsing an S-expression query.
289#[derive(Clone, Debug, PartialEq, Eq)]
290pub struct QueryParseError {
291    message: String,
292}
293
294impl QueryParseError {
295    pub(crate) fn new(message: impl Into<String>) -> Self {
296        Self {
297            message: message.into(),
298        }
299    }
300}
301
302impl fmt::Display for QueryParseError {
303    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
304        formatter.write_str(&self.message)
305    }
306}
307
308impl std::error::Error for QueryParseError {}
309
310#[derive(Clone, Debug, PartialEq, Eq)]
311struct QueryPattern {
312    root: QueryNodePattern,
313    capture: Option<String>,
314}
315
316impl QueryPattern {
317    fn match_root(&self, network: &LinkNetwork, link_id: LinkId) -> Vec<QueryCaptures> {
318        let captures = self
319            .root
320            .matches(network, link_id, QueryCaptures::default());
321        captures
322            .into_iter()
323            .map(|captures| {
324                if let Some(capture) = &self.capture {
325                    captures.with_capture(capture, link_id)
326                } else {
327                    captures
328                }
329            })
330            .collect()
331    }
332}
333
334#[derive(Clone, Debug, PartialEq, Eq)]
335struct QueryNodePattern {
336    kind: QueryNodeKind,
337    children: Vec<QueryChildPattern>,
338}
339
340impl QueryNodePattern {
341    fn matches(
342        &self,
343        network: &LinkNetwork,
344        link_id: LinkId,
345        captures: QueryCaptures,
346    ) -> Vec<QueryCaptures> {
347        let Some(link) = network.link(link_id) else {
348            return Vec::new();
349        };
350        if !self.kind.matches(link) || self.has_negated_field(network, link_id) {
351            return Vec::new();
352        }
353
354        let structural_children = structural_children(network, link_id);
355        let context = MatchContext {
356            network,
357            parent: link_id,
358            structural_children: &structural_children,
359        };
360        match_child_patterns(&context, &self.children, 0, false, captures)
361            .into_iter()
362            .map(|(_child_index, captures)| captures)
363            .collect()
364    }
365
366    fn has_negated_field(&self, network: &LinkNetwork, link_id: LinkId) -> bool {
367        self.children.iter().any(|child| {
368            if let QueryChildPattern::NegatedField(label) = child {
369                has_field(network, link_id, label)
370            } else {
371                false
372            }
373        })
374    }
375}
376
377#[derive(Clone, Debug, PartialEq, Eq)]
378enum QueryNodeKind {
379    Exact(String),
380    Wildcard,
381}
382
383impl QueryNodeKind {
384    fn matches(&self, link: &Link) -> bool {
385        match self {
386            Self::Exact(kind) => link.metadata().term() == Some(kind.as_str()),
387            Self::Wildcard => true,
388        }
389    }
390}
391
392#[derive(Clone, Debug, PartialEq, Eq)]
393enum QueryChildPattern {
394    Anchor,
395    NegatedField(String),
396    Pattern(QueryChildExpression),
397}
398
399#[derive(Clone, Debug, PartialEq, Eq)]
400struct QueryChildExpression {
401    field: Option<String>,
402    expression: QueryExpression,
403    capture: Option<String>,
404    quantifier: QueryQuantifier,
405}
406
407#[derive(Clone, Debug, PartialEq, Eq)]
408enum QueryExpression {
409    Node(QueryNodePattern),
410    Alternation(Vec<QueryNodePattern>),
411}
412
413#[derive(Clone, Copy, Debug, PartialEq, Eq)]
414enum QueryQuantifier {
415    One,
416    ZeroOrOne,
417    ZeroOrMore,
418    OneOrMore,
419}
420
421struct MatchContext<'a> {
422    network: &'a LinkNetwork,
423    parent: LinkId,
424    structural_children: &'a [LinkId],
425}
426
427fn match_child_patterns(
428    context: &MatchContext<'_>,
429    patterns: &[QueryChildPattern],
430    child_index: usize,
431    anchored: bool,
432    captures: QueryCaptures,
433) -> Vec<(usize, QueryCaptures)> {
434    let Some((pattern, remaining)) = patterns.split_first() else {
435        return vec![(child_index, captures)];
436    };
437
438    match pattern {
439        QueryChildPattern::Anchor => {
440            if remaining.is_empty() || remaining.iter().all(is_negated_field_pattern) {
441                if child_index == context.structural_children.len() {
442                    match_child_patterns(context, remaining, child_index, true, captures)
443                } else {
444                    Vec::new()
445                }
446            } else {
447                match_child_patterns(context, remaining, child_index, true, captures)
448            }
449        }
450        QueryChildPattern::NegatedField(_) => {
451            match_child_patterns(context, remaining, child_index, anchored, captures)
452        }
453        QueryChildPattern::Pattern(expression) => match_quantified_expression(
454            context,
455            expression,
456            remaining,
457            child_index,
458            anchored,
459            &captures,
460        ),
461    }
462}
463
464const fn is_negated_field_pattern(pattern: &QueryChildPattern) -> bool {
465    matches!(pattern, QueryChildPattern::NegatedField(_))
466}
467
468fn match_quantified_expression(
469    context: &MatchContext<'_>,
470    expression: &QueryChildExpression,
471    remaining: &[QueryChildPattern],
472    child_index: usize,
473    anchored: bool,
474    captures: &QueryCaptures,
475) -> Vec<(usize, QueryCaptures)> {
476    match expression.quantifier {
477        QueryQuantifier::One => match_one_then_continue(
478            context,
479            expression,
480            remaining,
481            child_index,
482            anchored,
483            captures,
484        ),
485        QueryQuantifier::ZeroOrOne => {
486            let mut results =
487                match_child_patterns(context, remaining, child_index, anchored, captures.clone());
488            results.extend(match_one_then_continue(
489                context,
490                expression,
491                remaining,
492                child_index,
493                anchored,
494                captures,
495            ));
496            results
497        }
498        QueryQuantifier::ZeroOrMore => {
499            let mut results =
500                match_child_patterns(context, remaining, child_index, anchored, captures.clone());
501            results.extend(match_repeated_expression(
502                context,
503                expression,
504                remaining,
505                child_index,
506                anchored,
507                captures,
508            ));
509            results
510        }
511        QueryQuantifier::OneOrMore => match_repeated_expression(
512            context,
513            expression,
514            remaining,
515            child_index,
516            anchored,
517            captures,
518        ),
519    }
520}
521
522fn match_repeated_expression(
523    context: &MatchContext<'_>,
524    expression: &QueryChildExpression,
525    remaining: &[QueryChildPattern],
526    child_index: usize,
527    anchored: bool,
528    captures: &QueryCaptures,
529) -> Vec<(usize, QueryCaptures)> {
530    let mut results = Vec::new();
531    for (next_index, next_captures) in
532        match_expression_at_positions(context, expression, child_index, anchored, captures)
533    {
534        results.extend(match_child_patterns(
535            context,
536            remaining,
537            next_index,
538            false,
539            next_captures.clone(),
540        ));
541        results.extend(match_repeated_expression(
542            context,
543            expression,
544            remaining,
545            next_index,
546            true,
547            &next_captures,
548        ));
549    }
550    results
551}
552
553fn match_one_then_continue(
554    context: &MatchContext<'_>,
555    expression: &QueryChildExpression,
556    remaining: &[QueryChildPattern],
557    child_index: usize,
558    anchored: bool,
559    captures: &QueryCaptures,
560) -> Vec<(usize, QueryCaptures)> {
561    match_expression_at_positions(context, expression, child_index, anchored, captures)
562        .into_iter()
563        .flat_map(|(next_index, captures)| {
564            match_child_patterns(context, remaining, next_index, false, captures)
565        })
566        .collect()
567}
568
569fn match_expression_at_positions(
570    context: &MatchContext<'_>,
571    expression: &QueryChildExpression,
572    child_index: usize,
573    anchored: bool,
574    captures: &QueryCaptures,
575) -> Vec<(usize, QueryCaptures)> {
576    let positions: Box<dyn Iterator<Item = usize>> = if anchored {
577        Box::new(std::iter::once(child_index))
578    } else {
579        Box::new(child_index..context.structural_children.len())
580    };
581
582    positions
583        .filter(|position| *position < context.structural_children.len())
584        .flat_map(|position| {
585            let child = context.structural_children[position];
586            match_expression(
587                context.network,
588                context.parent,
589                child,
590                expression,
591                captures.clone(),
592            )
593            .into_iter()
594            .map(move |captures| (position + 1, captures))
595        })
596        .collect()
597}
598
599fn match_expression(
600    network: &LinkNetwork,
601    parent: LinkId,
602    child: LinkId,
603    expression: &QueryChildExpression,
604    captures: QueryCaptures,
605) -> Vec<QueryCaptures> {
606    if let Some(field) = &expression.field {
607        let field_targets = field_targets(network, parent, field);
608        if !field_targets.contains(&child) {
609            return Vec::new();
610        }
611    }
612
613    let matches = match &expression.expression {
614        QueryExpression::Node(node) => node.matches(network, child, captures),
615        QueryExpression::Alternation(alternatives) => alternatives
616            .iter()
617            .flat_map(|alternative| alternative.matches(network, child, captures.clone()))
618            .collect(),
619    };
620
621    matches
622        .into_iter()
623        .map(|captures| {
624            if let Some(capture) = &expression.capture {
625                captures.with_capture(capture, child)
626            } else {
627                captures
628            }
629        })
630        .collect()
631}
632
633fn structural_children(network: &LinkNetwork, parent: LinkId) -> Vec<LinkId> {
634    network
635        .links()
636        .filter(|link| link.references().first().copied() == Some(parent))
637        .filter(|link| {
638            !matches!(
639                link.metadata().link_type(),
640                Some(LinkType::Field | LinkType::Trivia)
641            )
642        })
643        .map(Link::id)
644        .collect()
645}
646
647fn has_field(network: &LinkNetwork, parent: LinkId, label: &str) -> bool {
648    !field_targets(network, parent, label).is_empty()
649}
650
651fn field_targets(network: &LinkNetwork, parent: LinkId, label: &str) -> Vec<LinkId> {
652    network
653        .links()
654        .filter(|link| link.metadata().link_type() == Some(LinkType::Field))
655        .filter_map(|link| {
656            let references = link.references();
657            let [field_parent, field_label, field_child] = references else {
658                return None;
659            };
660            if *field_parent != parent {
661                return None;
662            }
663            let label_link = network.link(*field_label)?;
664            (label_link.metadata().term() == Some(label)).then_some(*field_child)
665        })
666        .collect()
667}
668
669#[derive(Clone, Debug, PartialEq, Eq)]
670enum QueryToken {
671    LParen,
672    RParen,
673    LBracket,
674    RBracket,
675    Colon,
676    Dot,
677    Bang,
678    Question,
679    Star,
680    Plus,
681    Ident(String),
682    Capture(String),
683    Literal(String),
684}
685
686struct QueryParser {
687    tokens: Vec<QueryToken>,
688    position: usize,
689}
690
691impl QueryParser {
692    const fn new(tokens: Vec<QueryToken>) -> Self {
693        Self {
694            tokens,
695            position: 0,
696        }
697    }
698
699    fn parse(&mut self) -> Result<(QueryPattern, Vec<QueryPredicate>), QueryParseError> {
700        let mut pattern = None;
701        let mut predicates = Vec::new();
702
703        while !self.is_at_end() {
704            if self.next_is_predicate() {
705                predicates.push(self.parse_predicate()?);
706            } else if pattern.is_none() {
707                let root = self.parse_node_pattern()?;
708                let capture = self.parse_optional_capture();
709                pattern = Some(QueryPattern { root, capture });
710            } else {
711                return Err(QueryParseError::new(
712                    "query may contain one root pattern followed by predicates",
713                ));
714            }
715        }
716
717        Ok((
718            pattern.ok_or_else(|| QueryParseError::new("query is missing a root pattern"))?,
719            predicates,
720        ))
721    }
722
723    fn next_is_predicate(&self) -> bool {
724        matches!(
725            (self.peek(), self.peek_next()),
726            (
727                Some(QueryToken::LParen),
728                Some(QueryToken::Ident(identifier))
729            ) if identifier.starts_with('#')
730        )
731    }
732
733    fn parse_predicate(&mut self) -> Result<QueryPredicate, QueryParseError> {
734        self.expect(&QueryToken::LParen)?;
735        let name = match self.advance() {
736            Some(QueryToken::Ident(name)) if name.starts_with('#') => {
737                name.trim_start_matches('#').to_string()
738            }
739            _ => return Err(QueryParseError::new("predicate must start with #name")),
740        };
741
742        let mut arguments = Vec::new();
743        while !matches!(self.peek(), Some(QueryToken::RParen)) {
744            match self.advance() {
745                Some(QueryToken::Capture(name)) => {
746                    arguments.push(QueryPredicateArgument::Capture(name));
747                }
748                Some(QueryToken::Literal(value) | QueryToken::Ident(value)) => {
749                    arguments.push(QueryPredicateArgument::Literal(value));
750                }
751                Some(_) => return Err(QueryParseError::new("invalid predicate argument")),
752                None => return Err(QueryParseError::new("unterminated predicate")),
753            }
754        }
755        self.expect(&QueryToken::RParen)?;
756
757        Ok(QueryPredicate { name, arguments })
758    }
759
760    fn parse_node_pattern(&mut self) -> Result<QueryNodePattern, QueryParseError> {
761        self.expect(&QueryToken::LParen)?;
762        let kind = match self.advance() {
763            Some(QueryToken::Ident(identifier)) if identifier == "_" => QueryNodeKind::Wildcard,
764            Some(QueryToken::Ident(identifier)) => QueryNodeKind::Exact(identifier),
765            _ => return Err(QueryParseError::new("node pattern is missing a kind")),
766        };
767
768        let mut children = Vec::new();
769        while !matches!(self.peek(), Some(QueryToken::RParen)) {
770            if self.is_at_end() {
771                return Err(QueryParseError::new("unterminated node pattern"));
772            }
773            children.push(self.parse_child_pattern()?);
774        }
775        self.expect(&QueryToken::RParen)?;
776
777        Ok(QueryNodePattern { kind, children })
778    }
779
780    fn parse_child_pattern(&mut self) -> Result<QueryChildPattern, QueryParseError> {
781        match self.peek() {
782            Some(QueryToken::Dot) => {
783                self.advance();
784                Ok(QueryChildPattern::Anchor)
785            }
786            Some(QueryToken::Bang) => {
787                self.advance();
788                let Some(QueryToken::Ident(label)) = self.advance() else {
789                    return Err(QueryParseError::new("negated field is missing a label"));
790                };
791                Ok(QueryChildPattern::NegatedField(label))
792            }
793            _ => {
794                let field = self.parse_optional_field()?;
795                let expression = if matches!(self.peek(), Some(QueryToken::LBracket)) {
796                    self.parse_alternation()?
797                } else {
798                    QueryExpression::Node(self.parse_node_pattern()?)
799                };
800                let (capture, quantifier) = self.parse_capture_and_quantifier();
801                Ok(QueryChildPattern::Pattern(QueryChildExpression {
802                    field,
803                    expression,
804                    capture,
805                    quantifier,
806                }))
807            }
808        }
809    }
810
811    fn parse_alternation(&mut self) -> Result<QueryExpression, QueryParseError> {
812        self.expect(&QueryToken::LBracket)?;
813        let mut alternatives = Vec::new();
814        while !matches!(self.peek(), Some(QueryToken::RBracket)) {
815            if self.is_at_end() {
816                return Err(QueryParseError::new("unterminated alternation"));
817            }
818            alternatives.push(self.parse_node_pattern()?);
819        }
820        self.expect(&QueryToken::RBracket)?;
821        if alternatives.is_empty() {
822            return Err(QueryParseError::new("alternation must contain patterns"));
823        }
824        Ok(QueryExpression::Alternation(alternatives))
825    }
826
827    fn parse_optional_field(&mut self) -> Result<Option<String>, QueryParseError> {
828        if !matches!(
829            (self.peek(), self.peek_next()),
830            (Some(QueryToken::Ident(_)), Some(QueryToken::Colon))
831        ) {
832            return Ok(None);
833        }
834
835        let Some(QueryToken::Ident(label)) = self.advance() else {
836            return Err(QueryParseError::new("field is missing a label"));
837        };
838        self.expect(&QueryToken::Colon)?;
839        Ok(Some(label))
840    }
841
842    fn parse_capture_and_quantifier(&mut self) -> (Option<String>, QueryQuantifier) {
843        let mut capture = self.parse_optional_capture();
844        let mut quantifier = self.parse_optional_quantifier();
845        if capture.is_none() {
846            capture = self.parse_optional_capture();
847        }
848        if quantifier == QueryQuantifier::One {
849            quantifier = self.parse_optional_quantifier();
850        }
851        (capture, quantifier)
852    }
853
854    fn parse_optional_capture(&mut self) -> Option<String> {
855        if let Some(QueryToken::Capture(name)) = self.peek().cloned() {
856            self.advance();
857            Some(name)
858        } else {
859            None
860        }
861    }
862
863    fn parse_optional_quantifier(&mut self) -> QueryQuantifier {
864        match self.peek() {
865            Some(QueryToken::Question) => {
866                self.advance();
867                QueryQuantifier::ZeroOrOne
868            }
869            Some(QueryToken::Star) => {
870                self.advance();
871                QueryQuantifier::ZeroOrMore
872            }
873            Some(QueryToken::Plus) => {
874                self.advance();
875                QueryQuantifier::OneOrMore
876            }
877            _ => QueryQuantifier::One,
878        }
879    }
880
881    fn expect(&mut self, expected: &QueryToken) -> Result<(), QueryParseError> {
882        let Some(actual) = self.advance() else {
883            return Err(QueryParseError::new("unexpected end of query"));
884        };
885        if std::mem::discriminant(&actual) == std::mem::discriminant(expected) {
886            Ok(())
887        } else {
888            Err(QueryParseError::new("unexpected token in query"))
889        }
890    }
891
892    fn advance(&mut self) -> Option<QueryToken> {
893        let token = self.tokens.get(self.position).cloned()?;
894        self.position += 1;
895        Some(token)
896    }
897
898    fn peek(&self) -> Option<&QueryToken> {
899        self.tokens.get(self.position)
900    }
901
902    fn peek_next(&self) -> Option<&QueryToken> {
903        self.tokens.get(self.position + 1)
904    }
905
906    fn is_at_end(&self) -> bool {
907        self.position >= self.tokens.len()
908    }
909}
910
911fn tokenize(source: &str) -> Result<Vec<QueryToken>, QueryParseError> {
912    let mut tokens = Vec::new();
913    let mut characters = source.chars().peekable();
914
915    while let Some(character) = characters.peek().copied() {
916        match character {
917            whitespace if whitespace.is_whitespace() => {
918                characters.next();
919            }
920            '(' => push_single(&mut tokens, &mut characters, QueryToken::LParen),
921            ')' => push_single(&mut tokens, &mut characters, QueryToken::RParen),
922            '[' => push_single(&mut tokens, &mut characters, QueryToken::LBracket),
923            ']' => push_single(&mut tokens, &mut characters, QueryToken::RBracket),
924            ':' => push_single(&mut tokens, &mut characters, QueryToken::Colon),
925            '.' => push_single(&mut tokens, &mut characters, QueryToken::Dot),
926            '!' => push_single(&mut tokens, &mut characters, QueryToken::Bang),
927            '?' => push_single(&mut tokens, &mut characters, QueryToken::Question),
928            '*' => push_single(&mut tokens, &mut characters, QueryToken::Star),
929            '+' => push_single(&mut tokens, &mut characters, QueryToken::Plus),
930            '@' => {
931                characters.next();
932                tokens.push(QueryToken::Capture(read_atom(&mut characters)));
933            }
934            '"' => tokens.push(QueryToken::Literal(read_string(&mut characters)?)),
935            _ => tokens.push(QueryToken::Ident(read_atom(&mut characters))),
936        }
937    }
938
939    Ok(tokens)
940}
941
942fn push_single(
943    tokens: &mut Vec<QueryToken>,
944    characters: &mut std::iter::Peekable<std::str::Chars<'_>>,
945    token: QueryToken,
946) {
947    characters.next();
948    tokens.push(token);
949}
950
951fn read_atom(characters: &mut std::iter::Peekable<std::str::Chars<'_>>) -> String {
952    let mut atom = String::new();
953    while let Some(character) = characters.peek().copied() {
954        if character.is_whitespace()
955            || matches!(character, '(' | ')' | '[' | ']' | ':' | '!' | '@' | '"')
956        {
957            break;
958        }
959        atom.push(character);
960        characters.next();
961    }
962    atom
963}
964
965fn read_string(
966    characters: &mut std::iter::Peekable<std::str::Chars<'_>>,
967) -> Result<String, QueryParseError> {
968    let mut literal = String::new();
969    characters.next();
970
971    while let Some(character) = characters.next() {
972        match character {
973            '"' => return Ok(literal),
974            '\\' => {
975                let Some(escaped) = characters.next() else {
976                    return Err(QueryParseError::new("unterminated string escape"));
977                };
978                literal.push(match escaped {
979                    'n' => '\n',
980                    'r' => '\r',
981                    't' => '\t',
982                    other => other,
983                });
984            }
985            other => literal.push(other),
986        }
987    }
988
989    Err(QueryParseError::new("unterminated string literal"))
990}