1use std::collections::{BTreeMap, BTreeSet};
2use std::fmt;
3
4use crate::{Link, LinkId, LinkNetwork, LinkQuery, LinkType, SourceTextPredicateHost};
5
6mod snapshot;
7mod syntax;
8mod text_pattern;
9
10pub use snapshot::{
11 LinkRuleSnapshotCase, LinkRuleSnapshotExpectation, LinkRuleSnapshotReport,
12 LinkRuleSnapshotResult, LinkRuleSnapshotSuite,
13};
14
15use text_pattern::TextPattern;
16
17#[derive(Clone, Debug, PartialEq, Eq)]
19pub struct LinkRule {
20 kind: LinkRuleKind,
21}
22
23impl LinkRule {
24 #[must_use]
26 pub const fn query(query: LinkQuery) -> Self {
27 Self {
28 kind: LinkRuleKind::Query(query),
29 }
30 }
31
32 #[must_use]
34 pub fn kind(kind: impl Into<String>) -> Self {
35 Self {
36 kind: LinkRuleKind::Kind(kind.into()),
37 }
38 }
39
40 #[must_use]
42 pub const fn link_type(link_type: LinkType) -> Self {
43 Self {
44 kind: LinkRuleKind::LinkType(link_type),
45 }
46 }
47
48 #[must_use]
50 pub fn capture(name: impl Into<String>, rule: Self) -> Self {
51 Self {
52 kind: LinkRuleKind::Capture {
53 name: normalize_capture_name(name.into()),
54 rule: Box::new(rule),
55 },
56 }
57 }
58
59 #[must_use]
61 pub fn typed_metavariable(name: impl Into<String>, kind: impl Into<String>) -> Self {
62 Self {
63 kind: LinkRuleKind::TypedMetavariable {
64 name: normalize_capture_name(name.into()),
65 kind: kind.into(),
66 },
67 }
68 }
69
70 #[must_use]
72 pub fn inside(rule: Self, ancestor: Self) -> Self {
73 Self {
74 kind: LinkRuleKind::Inside {
75 rule: Box::new(rule),
76 ancestor: Box::new(ancestor),
77 },
78 }
79 }
80
81 #[must_use]
83 pub fn has(rule: Self, descendant: Self) -> Self {
84 Self {
85 kind: LinkRuleKind::Has {
86 rule: Box::new(rule),
87 descendant: Box::new(descendant),
88 },
89 }
90 }
91
92 #[must_use]
94 pub fn precedes(rule: Self, following: Self) -> Self {
95 Self {
96 kind: LinkRuleKind::Precedes {
97 rule: Box::new(rule),
98 following: Box::new(following),
99 },
100 }
101 }
102
103 #[must_use]
105 pub fn follows(rule: Self, preceding: Self) -> Self {
106 Self {
107 kind: LinkRuleKind::Follows {
108 rule: Box::new(rule),
109 preceding: Box::new(preceding),
110 },
111 }
112 }
113
114 #[must_use]
116 pub fn all(rules: impl Into<Vec<Self>>) -> Self {
117 Self {
118 kind: LinkRuleKind::All(rules.into()),
119 }
120 }
121
122 #[must_use]
124 pub fn any(rules: impl Into<Vec<Self>>) -> Self {
125 Self {
126 kind: LinkRuleKind::Any(rules.into()),
127 }
128 }
129
130 #[must_use]
132 pub fn negate(rule: Self) -> Self {
133 Self {
134 kind: LinkRuleKind::Not(Box::new(rule)),
135 }
136 }
137
138 #[must_use]
140 pub fn named(name: impl Into<String>) -> Self {
141 Self {
142 kind: LinkRuleKind::Ref(name.into()),
143 }
144 }
145
146 #[must_use]
148 pub fn ellipsis_gap(before: Self, after: Self) -> Self {
149 Self {
150 kind: LinkRuleKind::Ellipsis {
151 before: Box::new(before),
152 after: Box::new(after),
153 },
154 }
155 }
156
157 pub fn text(pattern: impl Into<String>) -> Result<Self, LinkRuleParseError> {
159 Ok(Self {
160 kind: LinkRuleKind::Text(TextPattern::parse(pattern.into())?),
161 })
162 }
163
164 pub fn from_sexpression(source: &str) -> Result<Self, LinkRuleParseError> {
166 syntax::parse_rule(source)
167 }
168
169 #[must_use]
171 pub fn matches(
172 &self,
173 network: &LinkNetwork,
174 registry: &LinkRuleRegistry,
175 ) -> Vec<LinkRuleMatch> {
176 let context = RuleContext { network, registry };
177 dedupe_matches(self.evaluate(&context, &mut Vec::new()))
178 }
179
180 fn evaluate(&self, context: &RuleContext<'_>, stack: &mut Vec<String>) -> Vec<LinkRuleMatch> {
181 match &self.kind {
182 LinkRuleKind::Query(query) => context
183 .network
184 .query_matches_with(query, &SourceTextPredicateHost)
185 .into_iter()
186 .map(|query_match| LinkRuleMatch::from_query_match(&query_match))
187 .collect(),
188 LinkRuleKind::Kind(kind) => context
189 .network
190 .links()
191 .filter(|link| link.metadata().term() == Some(kind.as_str()))
192 .map(|link| LinkRuleMatch::new(link.id()))
193 .collect(),
194 LinkRuleKind::LinkType(link_type) => context
195 .network
196 .links()
197 .filter(|link| link.metadata().link_type() == Some(*link_type))
198 .map(|link| LinkRuleMatch::new(link.id()))
199 .collect(),
200 LinkRuleKind::Language(language) => context
201 .network
202 .links()
203 .filter(|link| link.metadata().language() == Some(language.as_str()))
204 .map(|link| LinkRuleMatch::new(link.id()))
205 .collect(),
206 LinkRuleKind::Named(named) => context
207 .network
208 .links()
209 .filter(|link| link.metadata().is_named() == *named)
210 .map(|link| LinkRuleMatch::new(link.id()))
211 .collect(),
212 LinkRuleKind::Capture { name, rule } => rule
213 .evaluate(context, stack)
214 .into_iter()
215 .map(|rule_match| {
216 let link_id = rule_match.link_id;
217 rule_match.with_link_capture(name, link_id)
218 })
219 .collect(),
220 LinkRuleKind::TypedMetavariable { name, kind } => context
221 .network
222 .links()
223 .filter(|link| link.metadata().term() == Some(kind.as_str()))
224 .map(|link| LinkRuleMatch::new(link.id()).with_link_capture(name, link.id()))
225 .collect(),
226 LinkRuleKind::Inside { rule, ancestor } => {
227 let ancestor_ids = ancestor
228 .evaluate(context, stack)
229 .into_iter()
230 .map(|rule_match| rule_match.link_id)
231 .collect::<BTreeSet<_>>();
232 rule.evaluate(context, stack)
233 .into_iter()
234 .filter(|rule_match| {
235 ancestors(context.network, rule_match.link_id)
236 .iter()
237 .any(|ancestor| ancestor_ids.contains(ancestor))
238 })
239 .collect()
240 }
241 LinkRuleKind::Has { rule, descendant } => {
242 let descendants = descendant.evaluate(context, stack);
243 rule.evaluate(context, stack)
244 .into_iter()
245 .flat_map(|outer| {
246 descendants
247 .iter()
248 .filter(move |inner| {
249 is_descendant(context.network, inner.link_id, outer.link_id)
250 })
251 .map(move |inner| outer.merge_as(outer.link_id, inner))
252 })
253 .collect()
254 }
255 LinkRuleKind::Precedes { rule, following } => {
256 let following = following.evaluate(context, stack);
257 rule.evaluate(context, stack)
258 .into_iter()
259 .flat_map(|left| {
260 following
261 .iter()
262 .filter(move |right| {
263 order_key(context.network, left.link_id)
264 < order_key(context.network, right.link_id)
265 })
266 .map(move |right| left.merge_as(left.link_id, right))
267 })
268 .collect()
269 }
270 LinkRuleKind::Follows { rule, preceding } => {
271 let preceding = preceding.evaluate(context, stack);
272 rule.evaluate(context, stack)
273 .into_iter()
274 .flat_map(|right| {
275 preceding
276 .iter()
277 .filter(move |left| {
278 order_key(context.network, left.link_id)
279 < order_key(context.network, right.link_id)
280 })
281 .map(move |left| right.merge_as(right.link_id, left))
282 })
283 .collect()
284 }
285 LinkRuleKind::All(rules) => match rules.split_first() {
286 Some((first, rest)) => {
287 rest.iter()
288 .fold(first.evaluate(context, stack), |acc, rule| {
289 let matches = rule.evaluate(context, stack);
290 intersect_matches(acc, &matches)
291 })
292 }
293 None => Vec::new(),
294 },
295 LinkRuleKind::Any(rules) => {
296 let mut matches = Vec::new();
297 for rule in rules {
298 matches.extend(rule.evaluate(context, stack));
299 }
300 dedupe_matches(matches)
301 }
302 LinkRuleKind::Not(rule) => {
303 let rejected = rule
304 .evaluate(context, stack)
305 .into_iter()
306 .map(|rule_match| rule_match.link_id)
307 .collect::<BTreeSet<_>>();
308 context
309 .network
310 .links()
311 .filter(|link| !rejected.contains(&link.id()))
312 .map(|link| LinkRuleMatch::new(link.id()))
313 .collect()
314 }
315 LinkRuleKind::Ref(name) => {
316 if stack.iter().any(|entry| entry == name) {
317 return Vec::new();
318 }
319 let Some(rule) = context.registry.get(name) else {
320 return Vec::new();
321 };
322 stack.push(name.clone());
323 let matches = rule.evaluate(context, stack);
324 stack.pop();
325 matches
326 }
327 LinkRuleKind::Ellipsis { before, after } => {
328 ellipsis_matches(context, before, after, stack)
329 }
330 LinkRuleKind::Text(pattern) => pattern.matches(context.network),
331 }
332 }
333}
334
335#[derive(Clone, Debug, PartialEq, Eq)]
336enum LinkRuleKind {
337 Query(LinkQuery),
338 Kind(String),
339 LinkType(LinkType),
340 Language(String),
341 Named(bool),
342 Capture {
343 name: String,
344 rule: Box<LinkRule>,
345 },
346 TypedMetavariable {
347 name: String,
348 kind: String,
349 },
350 Inside {
351 rule: Box<LinkRule>,
352 ancestor: Box<LinkRule>,
353 },
354 Has {
355 rule: Box<LinkRule>,
356 descendant: Box<LinkRule>,
357 },
358 Precedes {
359 rule: Box<LinkRule>,
360 following: Box<LinkRule>,
361 },
362 Follows {
363 rule: Box<LinkRule>,
364 preceding: Box<LinkRule>,
365 },
366 All(Vec<LinkRule>),
367 Any(Vec<LinkRule>),
368 Not(Box<LinkRule>),
369 Ref(String),
370 Ellipsis {
371 before: Box<LinkRule>,
372 after: Box<LinkRule>,
373 },
374 Text(TextPattern),
375}
376
377#[derive(Clone, Debug, Default, PartialEq, Eq)]
379pub struct LinkRuleRegistry {
380 rules: BTreeMap<String, LinkRule>,
381}
382
383impl LinkRuleRegistry {
384 #[must_use]
386 pub fn new() -> Self {
387 Self::default()
388 }
389
390 #[must_use]
392 pub fn with_rule(mut self, name: impl Into<String>, rule: LinkRule) -> Self {
393 self.insert(name, rule);
394 self
395 }
396
397 pub fn insert(&mut self, name: impl Into<String>, rule: LinkRule) {
399 self.rules.insert(name.into(), rule);
400 }
401
402 #[must_use]
404 pub fn get(&self, name: &str) -> Option<&LinkRule> {
405 self.rules.get(name)
406 }
407}
408
409impl std::ops::Not for LinkRule {
410 type Output = Self;
411
412 fn not(self) -> Self::Output {
413 Self {
414 kind: LinkRuleKind::Not(Box::new(self)),
415 }
416 }
417}
418
419#[derive(Clone, Debug, PartialEq, Eq)]
421pub struct LinkRuleMatch {
422 link_id: LinkId,
423 captures: LinkRuleCaptures,
424}
425
426impl LinkRuleMatch {
427 const fn new(link_id: LinkId) -> Self {
428 Self {
429 link_id,
430 captures: LinkRuleCaptures { values: Vec::new() },
431 }
432 }
433
434 fn from_query_match(query_match: &crate::QueryMatch) -> Self {
435 let mut captures = LinkRuleCaptures::default();
436 for capture in query_match.captures().iter() {
437 captures = captures.with_link(capture.name(), capture.link_id());
438 }
439 Self {
440 link_id: query_match.link_id(),
441 captures,
442 }
443 }
444
445 fn with_link_capture(mut self, name: &str, link_id: LinkId) -> Self {
446 self.captures = self.captures.with_link(name, link_id);
447 self
448 }
449
450 fn merge(&self, other: &Self) -> Option<Self> {
451 (self.link_id == other.link_id).then(|| Self {
452 link_id: self.link_id,
453 captures: self.captures.merged(&other.captures),
454 })
455 }
456
457 fn merge_as(&self, link_id: LinkId, other: &Self) -> Self {
458 Self {
459 link_id,
460 captures: self.captures.merged(&other.captures),
461 }
462 }
463
464 #[must_use]
466 pub const fn link_id(&self) -> LinkId {
467 self.link_id
468 }
469
470 #[must_use]
472 pub const fn captures(&self) -> &LinkRuleCaptures {
473 &self.captures
474 }
475}
476
477#[derive(Clone, Debug, Default, PartialEq, Eq)]
479pub struct LinkRuleCaptures {
480 values: Vec<LinkRuleCapture>,
481}
482
483impl LinkRuleCaptures {
484 fn with_link(mut self, name: &str, link_id: LinkId) -> Self {
485 self.values.push(LinkRuleCapture {
486 name: normalize_capture_name(name),
487 link_ids: vec![link_id],
488 text: None,
489 });
490 self
491 }
492
493 fn with_text(mut self, name: &str, text: String, link_ids: Vec<LinkId>) -> Self {
494 self.values.push(LinkRuleCapture {
495 name: normalize_capture_name(name),
496 link_ids,
497 text: Some(text),
498 });
499 self
500 }
501
502 fn merged(&self, other: &Self) -> Self {
503 let mut values = self.values.clone();
504 values.extend(other.values.clone());
505 Self { values }
506 }
507
508 #[must_use]
510 pub fn first(&self, name: &str) -> Option<LinkId> {
511 let name = normalize_capture_name(name);
512 self.values
513 .iter()
514 .find(|capture| capture.name == name)
515 .and_then(|capture| capture.link_ids.first().copied())
516 }
517
518 #[must_use]
520 pub fn text(&self, name: &str) -> Option<&str> {
521 let name = normalize_capture_name(name);
522 self.values
523 .iter()
524 .find(|capture| capture.name == name)
525 .and_then(|capture| capture.text.as_deref())
526 }
527
528 pub fn iter(&self) -> impl Iterator<Item = &LinkRuleCapture> {
530 self.values.iter()
531 }
532}
533
534#[derive(Clone, Debug, PartialEq, Eq)]
536pub struct LinkRuleCapture {
537 name: String,
538 link_ids: Vec<LinkId>,
539 text: Option<String>,
540}
541
542impl LinkRuleCapture {
543 #[must_use]
545 pub fn name(&self) -> &str {
546 &self.name
547 }
548
549 #[must_use]
551 pub fn link_ids(&self) -> &[LinkId] {
552 &self.link_ids
553 }
554
555 #[must_use]
557 pub fn text(&self) -> Option<&str> {
558 self.text.as_deref()
559 }
560}
561
562#[derive(Clone, Copy, Debug, PartialEq, Eq)]
564pub enum TraversalStrategy {
565 TopDown,
567 BottomUp,
569 Innermost,
571 Fixpoint { max_iterations: usize },
573}
574
575impl TraversalStrategy {
576 #[must_use]
578 pub fn matches(
579 self,
580 network: &LinkNetwork,
581 rule: &LinkRule,
582 registry: &LinkRuleRegistry,
583 ) -> Vec<LinkRuleMatch> {
584 let mut matches = rule.matches(network, registry);
585 match self {
586 Self::TopDown | Self::Fixpoint { .. } => {
587 matches.sort_by_key(|rule_match| {
588 (
589 depth(network, rule_match.link_id),
590 order_key(network, rule_match.link_id),
591 )
592 });
593 }
594 Self::BottomUp => {
595 matches.sort_by_key(|rule_match| {
596 (
597 std::cmp::Reverse(depth(network, rule_match.link_id)),
598 order_key(network, rule_match.link_id),
599 )
600 });
601 }
602 Self::Innermost => {
603 let all_matches = matches.clone();
604 matches.retain(|candidate| {
605 !all_matches.iter().any(|other| {
606 other.link_id != candidate.link_id
607 && is_descendant(network, other.link_id, candidate.link_id)
608 })
609 });
610 matches.sort_by_key(|rule_match| {
611 (
612 std::cmp::Reverse(depth(network, rule_match.link_id)),
613 order_key(network, rule_match.link_id),
614 )
615 });
616 }
617 }
618 matches
619 }
620
621 pub fn apply_mut<F>(
624 self,
625 network: &mut LinkNetwork,
626 rule: &LinkRule,
627 registry: &LinkRuleRegistry,
628 mut visitor: F,
629 ) -> TraversalReport
630 where
631 F: FnMut(&mut LinkNetwork, &LinkRuleMatch) -> bool,
632 {
633 match self {
634 Self::Fixpoint { max_iterations } => {
635 let mut report = TraversalReport::default();
636 for _ in 0..max_iterations {
637 let matches = Self::TopDown.matches(network, rule, registry);
638 if matches.is_empty() {
639 break;
640 }
641 report.iterations += 1;
642 let mut changed_this_iteration = 0;
643 for rule_match in matches {
644 report.visited += 1;
645 if visitor(network, &rule_match) {
646 report.changed += 1;
647 changed_this_iteration += 1;
648 }
649 }
650 if changed_this_iteration == 0 {
651 break;
652 }
653 }
654 report
655 }
656 strategy => {
657 let matches = strategy.matches(network, rule, registry);
658 let mut report = TraversalReport {
659 iterations: usize::from(!matches.is_empty()),
660 visited: 0,
661 changed: 0,
662 };
663 for rule_match in matches {
664 report.visited += 1;
665 if visitor(network, &rule_match) {
666 report.changed += 1;
667 }
668 }
669 report
670 }
671 }
672 }
673}
674
675#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
677pub struct TraversalReport {
678 iterations: usize,
679 visited: usize,
680 changed: usize,
681}
682
683impl TraversalReport {
684 #[must_use]
686 pub const fn iterations(&self) -> usize {
687 self.iterations
688 }
689
690 #[must_use]
692 pub const fn visited(&self) -> usize {
693 self.visited
694 }
695
696 #[must_use]
698 pub const fn changed(&self) -> usize {
699 self.changed
700 }
701}
702
703#[derive(Clone, Debug, PartialEq, Eq)]
705pub struct LinkRuleParseError {
706 message: String,
707}
708
709impl LinkRuleParseError {
710 fn new(message: impl Into<String>) -> Self {
711 Self {
712 message: message.into(),
713 }
714 }
715}
716
717impl fmt::Display for LinkRuleParseError {
718 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
719 formatter.write_str(&self.message)
720 }
721}
722
723impl std::error::Error for LinkRuleParseError {}
724
725struct RuleContext<'a> {
726 network: &'a LinkNetwork,
727 registry: &'a LinkRuleRegistry,
728}
729
730fn ellipsis_matches(
731 context: &RuleContext<'_>,
732 before: &LinkRule,
733 after: &LinkRule,
734 stack: &mut Vec<String>,
735) -> Vec<LinkRuleMatch> {
736 let before_matches = before.evaluate(context, stack);
737 let after_matches = after.evaluate(context, stack);
738 let before_by_id = matches_by_id(&before_matches);
739 let after_by_id = matches_by_id(&after_matches);
740 let mut matches = Vec::new();
741
742 for parent in context.network.links() {
743 let children = structural_children(context.network, parent.id());
744 for (left_index, left) in children.iter().enumerate() {
745 let Some(left_matches) = before_by_id.get(left) else {
746 continue;
747 };
748 for right in children.iter().skip(left_index + 1) {
749 let Some(right_matches) = after_by_id.get(right) else {
750 continue;
751 };
752 for left_match in left_matches {
753 for right_match in right_matches {
754 matches.push(left_match.merge_as(parent.id(), right_match));
755 }
756 }
757 }
758 }
759 }
760
761 matches
762}
763
764fn intersect_matches(left: Vec<LinkRuleMatch>, right: &[LinkRuleMatch]) -> Vec<LinkRuleMatch> {
765 let right_by_id = matches_by_id(right);
766 left.into_iter()
767 .flat_map(|left_match| {
768 right_by_id
769 .get(&left_match.link_id)
770 .into_iter()
771 .flatten()
772 .filter_map(move |right_match| left_match.merge(right_match))
773 })
774 .collect()
775}
776
777fn matches_by_id(matches: &[LinkRuleMatch]) -> BTreeMap<LinkId, Vec<LinkRuleMatch>> {
778 let mut by_id = BTreeMap::<LinkId, Vec<LinkRuleMatch>>::new();
779 for rule_match in matches {
780 by_id
781 .entry(rule_match.link_id)
782 .or_default()
783 .push(rule_match.clone());
784 }
785 by_id
786}
787
788fn dedupe_matches(matches: Vec<LinkRuleMatch>) -> Vec<LinkRuleMatch> {
789 let mut seen = BTreeSet::new();
790 let mut deduped = Vec::new();
791 for rule_match in matches {
792 if seen.insert(rule_match.link_id) {
793 deduped.push(rule_match);
794 }
795 }
796 deduped
797}
798
799fn structural_children(network: &LinkNetwork, parent: LinkId) -> Vec<LinkId> {
800 let mut children = network
801 .links()
802 .filter(|link| link.references().first().copied() == Some(parent))
803 .filter(|link| {
804 !matches!(
805 link.metadata().link_type(),
806 Some(LinkType::Field | LinkType::Trivia)
807 )
808 })
809 .map(Link::id)
810 .collect::<Vec<_>>();
811 children.sort_by_key(|child| order_key(network, *child));
812 children
813}
814
815fn ancestors(network: &LinkNetwork, link_id: LinkId) -> Vec<LinkId> {
816 let mut ancestors = Vec::new();
817 let mut visited = BTreeSet::new();
818 let mut current = link_id;
819 while visited.insert(current) {
820 let Some(parent) = network
821 .link(current)
822 .and_then(|link| link.references().first().copied())
823 else {
824 break;
825 };
826 if parent == current {
827 break;
828 }
829 ancestors.push(parent);
830 current = parent;
831 }
832 ancestors
833}
834
835fn depth(network: &LinkNetwork, link_id: LinkId) -> usize {
836 ancestors(network, link_id).len()
837}
838
839fn is_descendant(network: &LinkNetwork, descendant: LinkId, ancestor: LinkId) -> bool {
840 ancestors(network, descendant).contains(&ancestor)
841}
842
843fn order_key(network: &LinkNetwork, link_id: LinkId) -> (usize, u64) {
844 let start = network
845 .link(link_id)
846 .and_then(|link| link.metadata().span())
847 .map_or(usize::MAX, |span| span.byte_range().start());
848 (start, link_id.as_u64())
849}
850
851fn normalize_capture_name(name: impl AsRef<str>) -> String {
852 name.as_ref().trim_start_matches('@').to_string()
853}