wit_parser/ast/
toposort.rs

1use crate::ast::{Id, Span};
2use anyhow::Result;
3use indexmap::IndexMap;
4use std::collections::BinaryHeap;
5use std::fmt;
6use std::mem;
7
8#[derive(Default, Clone)]
9struct State {
10    /// Number of outbound edges from this node which have still not been
11    /// processed into the topological ordering.
12    outbound_remaining: usize,
13
14    /// Indices of nodes that depend on this one, used when this node is added
15    /// to the binary heap to decrement `outbound_remaining`.
16    reverse_deps: Vec<usize>,
17}
18
19/// Performs a topological sort of the `deps` provided, returning the order in
20/// which to visit the nodes in reverse-dep order.
21///
22/// This sort goes one level further as well to produce a stable ordering
23/// regardless of the input edges so long as the structure of the graph has
24/// changed. Notably the nodes are sorted, by name, in the output in addition to
25/// being sorted in dependency order. This is done to assist with round-tripping
26/// documents where new edges are discovered during world elaboration that
27/// doesn't change the dependency graph but can change the dependency listings
28/// between serializations.
29///
30/// The algorithm chosen here to do this is:
31///
32/// * Build some metadata about all nodes including their count of outbound
33///   edges remaining to be added to the order and a reverse dependency list.
34/// * Collect all nodes with 0 outbound edges into a binary heap.
35/// * Pop from the binary heap and decrement outbound edges that depend on
36///   this node.
37/// * Iterate until the dependency ordering is the same size as the dependency
38///   array.
39///
40/// This sort will also detect when dependencies are missing or when cycles are
41/// present and return an error.
42pub fn toposort<'a>(
43    kind: &str,
44    deps: &IndexMap<&'a str, Vec<Id<'a>>>,
45) -> Result<Vec<&'a str>, Error> {
46    // Initialize a `State` per-node with the number of outbound edges and
47    // additionally filling out the `reverse_deps` array.
48    let mut states = vec![State::default(); deps.len()];
49    for (i, (_, edges)) in deps.iter().enumerate() {
50        states[i].outbound_remaining = edges.len();
51        for edge in edges {
52            let (j, _, _) = deps
53                .get_full(edge.name)
54                .ok_or_else(|| Error::NonexistentDep {
55                    span: edge.span,
56                    name: edge.name.to_string(),
57                    kind: kind.to_string(),
58                    highlighted: None,
59                })?;
60            states[j].reverse_deps.push(i);
61        }
62    }
63
64    let mut order = Vec::new();
65    let mut heap = BinaryHeap::new();
66
67    // Seed the `heap` with edges that have no outbound edges
68    //
69    // The heap here is keyed by `(usize, &str, usize)` where the first `usize`
70    // is unique which is what determines the order of the heap. The other two
71    // fields are metadata used when pulling from the heap. The first `usize` is
72    // the index of the item within the original dependency map which should
73    // reflect the original source order of the item. Note that this is stored
74    // in reverse order to ensure that when there are multiple items in the heap
75    // the first item in the original order is popped first.
76    for (i, dep) in deps.keys().enumerate() {
77        if states[i].outbound_remaining == 0 {
78            heap.push((deps.len() - i, *dep, i));
79        }
80    }
81
82    // Drain the binary heap which represents all nodes that have had all their
83    // dependencies processed. Iteratively add to the heap as well as nodes are
84    // removed.
85    while let Some((_order, node, i)) = heap.pop() {
86        order.push(node);
87        for i in mem::take(&mut states[i].reverse_deps) {
88            states[i].outbound_remaining -= 1;
89            if states[i].outbound_remaining == 0 {
90                let (dep, _) = deps.get_index(i).unwrap();
91                heap.push((deps.len() - i, *dep, i));
92            }
93        }
94    }
95
96    // If all nodes are present in order then a topological ordering was
97    // achieved and it can be returned.
98    if order.len() == deps.len() {
99        return Ok(order);
100    }
101
102    // ... otherwise there are still dependencies with remaining edges which
103    // means that a cycle must be present, so find the cycle and report the
104    // error.
105    for (i, state) in states.iter().enumerate() {
106        if state.outbound_remaining == 0 {
107            continue;
108        }
109        let (_, edges) = deps.get_index(i).unwrap();
110        for dep in edges {
111            let (j, _, _) = deps.get_full(dep.name).unwrap();
112            if states[j].outbound_remaining == 0 {
113                continue;
114            }
115            return Err(Error::Cycle {
116                span: dep.span,
117                name: dep.name.to_string(),
118                kind: kind.to_string(),
119                highlighted: None,
120            });
121        }
122    }
123
124    unreachable!()
125}
126
127#[derive(Debug)]
128pub enum Error {
129    NonexistentDep {
130        span: Span,
131        name: String,
132        kind: String,
133        highlighted: Option<String>,
134    },
135    Cycle {
136        span: Span,
137        name: String,
138        kind: String,
139        highlighted: Option<String>,
140    },
141}
142
143impl Error {
144    pub(crate) fn highlighted(&self) -> Option<&str> {
145        match self {
146            Error::NonexistentDep { highlighted, .. } | Error::Cycle { highlighted, .. } => {
147                highlighted.as_deref()
148            }
149        }
150    }
151    pub(crate) fn set_highlighted(&mut self, string: String) {
152        match self {
153            Error::NonexistentDep { highlighted, .. } | Error::Cycle { highlighted, .. } => {
154                *highlighted = Some(string);
155            }
156        }
157    }
158}
159
160impl fmt::Display for Error {
161    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
162        if let Some(s) = self.highlighted() {
163            return f.write_str(s);
164        }
165        match self {
166            Error::NonexistentDep { kind, name, .. } => {
167                write!(f, "{kind} `{name}` does not exist")
168            }
169            Error::Cycle { kind, name, .. } => {
170                write!(f, "{kind} `{name}` depends on itself")
171            }
172        }
173    }
174}
175
176impl std::error::Error for Error {}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    fn id(name: &str) -> Id<'_> {
183        Id {
184            name,
185            span: Span { start: 0, end: 0 },
186        }
187    }
188
189    #[test]
190    fn smoke() {
191        let empty: Vec<&str> = Vec::new();
192        assert_eq!(toposort("", &IndexMap::new()).unwrap(), empty);
193
194        let mut nonexistent = IndexMap::new();
195        nonexistent.insert("a", vec![id("b")]);
196        assert!(matches!(
197            toposort("", &nonexistent),
198            Err(Error::NonexistentDep { .. })
199        ));
200
201        let mut one = IndexMap::new();
202        one.insert("a", vec![]);
203        assert_eq!(toposort("", &one).unwrap(), ["a"]);
204
205        let mut two = IndexMap::new();
206        two.insert("a", vec![]);
207        two.insert("b", vec![id("a")]);
208        assert_eq!(toposort("", &two).unwrap(), ["a", "b"]);
209
210        let mut two = IndexMap::new();
211        two.insert("a", vec![id("b")]);
212        two.insert("b", vec![]);
213        assert_eq!(toposort("", &two).unwrap(), ["b", "a"]);
214    }
215
216    #[test]
217    fn cycles() {
218        let mut cycle = IndexMap::new();
219        cycle.insert("a", vec![id("a")]);
220        assert!(matches!(toposort("", &cycle), Err(Error::Cycle { .. })));
221
222        let mut cycle = IndexMap::new();
223        cycle.insert("a", vec![id("b")]);
224        cycle.insert("b", vec![id("c")]);
225        cycle.insert("c", vec![id("a")]);
226        assert!(matches!(toposort("", &cycle), Err(Error::Cycle { .. })));
227    }
228
229    #[test]
230    fn depend_twice() {
231        let mut two = IndexMap::new();
232        two.insert("b", vec![id("a"), id("a")]);
233        two.insert("a", vec![]);
234        assert_eq!(toposort("", &two).unwrap(), ["a", "b"]);
235    }
236
237    #[test]
238    fn preserve_order() {
239        let mut order = IndexMap::new();
240        order.insert("a", vec![]);
241        order.insert("b", vec![]);
242        assert_eq!(toposort("", &order).unwrap(), ["a", "b"]);
243
244        let mut order = IndexMap::new();
245        order.insert("b", vec![]);
246        order.insert("a", vec![]);
247        assert_eq!(toposort("", &order).unwrap(), ["b", "a"]);
248    }
249}