diff --git a/mypy/build.py b/mypy/build.py index 93180e1eed5e9..b7561a57770e1 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -94,7 +94,7 @@ ErrorTupleRaw, report_internal_error, ) -from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort +from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2 from mypy.indirection import TypeIndirectionVisitor from mypy.ipc import BadStatus, IPCClient, IPCMessage, read_status, ready_to_read, receive, send from mypy.messages import MessageBuilder @@ -4236,7 +4236,7 @@ def sorted_components(graph: Graph) -> list[SCC]: scc_dep_map = prepare_sccs_full(strongly_connected_components(vertices, edges), edges) # Topsort. res = [] - for ready in topsort(scc_dep_map): + for ready in topsort2(scc_dep_map): # Sort the sets in ready by reversed smallest State.order. Examples: # # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get @@ -4271,7 +4271,7 @@ def sorted_components_inner( edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices} sccs = list(strongly_connected_components(vertices, edges)) res = [] - for ready in topsort(prepare_sccs(sccs, edges)): + for ready in topsort2(prepare_sccs(sccs, edges)): res.extend(sorted(ready, key=lambda scc: -min(graph[id].order for id in scc))) return res diff --git a/mypy/graph_utils.py b/mypy/graph_utils.py index 154efcef48a93..30d1660e4c0a5 100644 --- a/mypy/graph_utils.py +++ b/mypy/graph_utils.py @@ -115,3 +115,76 @@ def topsort(data: dict[T, set[T]]) -> Iterable[set[T]]: yield ready data = {item: (dep - ready) for item, dep in data.items() if item not in ready} assert not data, f"A cyclic dependency exists amongst {data!r}" + + +class topsort2(Iterator[set[T]]): # noqa: N801 + """Topological sort using Kahn's algorithm. + + This is functionally equivalent to topsort() but avoids rebuilding + the full dict and set objects on each iteration. Instead it uses + in-degree counters and a reverse adjacency list, so the total work + is O(V + E) rather than O(depth * V). + + Implemented as a class rather than a generator for better mypyc + compilation. + + Args: + data: A map from vertices to all vertices that it has an edge + connecting it to. NOTE: This data structure + is modified in place -- for normalization purposes, + self-dependencies are removed and entries representing + orphans are added. + """ + + def __init__(self, data: dict[T, set[T]]) -> None: + # Single pass: remove self-deps, build reverse adjacency list, + # compute in-degree counts, detect orphans, and find initial ready set. + in_degree: dict[T, int] = {} + rev: dict[T, list[T]] = {} + ready: set[T] = set() + for item, deps in data.items(): + deps.discard(item) # Ignore self dependencies. + deg = len(deps) + in_degree[item] = deg + if deg == 0: + ready.add(item) + if item not in rev: + rev[item] = [] + for dep in deps: + if dep in rev: + rev[dep].append(item) + else: + rev[dep] = [item] + if dep not in data: + # Orphan: appears as dependency but has no entry in data. + in_degree[dep] = 0 + ready.add(dep) + + self.in_degree = in_degree + self.rev = rev + self.ready = ready + self.remaining = len(in_degree) - len(ready) + + def __iter__(self) -> Iterator[set[T]]: + return self + + def __next__(self) -> set[T]: + ready = self.ready + if not ready: + assert self.remaining == 0, ( + f"A cyclic dependency exists amongst " + f"{[k for k, deg in self.in_degree.items() if deg > 0]!r}" + ) + raise StopIteration + in_degree = self.in_degree + rev = self.rev + new_ready: set[T] = set() + for item in ready: + for dependent in rev[item]: + new_deg = in_degree[dependent] - 1 + in_degree[dependent] = new_deg + if new_deg == 0: + new_ready.add(dependent) + self.remaining -= len(new_ready) + self.ready = new_ready + return ready diff --git a/mypy/solve.py b/mypy/solve.py index e3709106996cd..57c002ff9b55c 100644 --- a/mypy/solve.py +++ b/mypy/solve.py @@ -8,7 +8,7 @@ from mypy.constraints import SUBTYPE_OF, SUPERTYPE_OF, Constraint, infer_constraints, neg_op from mypy.expandtype import expand_type -from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort +from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2 from mypy.join import join_type_list from mypy.meet import meet_type_list, meet_types from mypy.subtypes import is_subtype @@ -147,7 +147,7 @@ def solve_with_dependent( sccs = list(strongly_connected_components(set(vars), dmap)) if not all(check_linear(scc, lowers, uppers) for scc in sccs): return {}, [] - raw_batches = list(topsort(prepare_sccs(sccs, dmap))) + raw_batches = list(topsort2(prepare_sccs(sccs, dmap))) free_vars = [] free_solutions = {} diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py index 29696c760b9c2..b1d4daf079815 100644 --- a/mypy/test/testgraph.py +++ b/mypy/test/testgraph.py @@ -8,7 +8,7 @@ from mypy.build import BuildManager, BuildSourceSet, State, order_ascc, sorted_components from mypy.errors import Errors from mypy.fscache import FileSystemCache -from mypy.graph_utils import strongly_connected_components, topsort +from mypy.graph_utils import strongly_connected_components, topsort, topsort2 from mypy.modulefinder import SearchPaths from mypy.options import Options from mypy.plugin import Plugin @@ -18,14 +18,77 @@ class GraphSuite(Suite): + def test_topsort_empty(self) -> None: + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {} + assert_equal(list(topsort2(data)), []) + def test_topsort(self) -> None: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - d = frozenset({"D"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}} - res = list(topsort(data)) - assert_equal(res, [{d}, {b, c}, {a}]) + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + d = frozenset({"D"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}} + res = list(topsort_func(data)) + assert_equal(res, [{d}, {b, c}, {a}]) + + def test_topsort_orphan(self) -> None: + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}} + res = list(topsort_func(data)) + assert_equal(res, [{b}, {a}]) + + def test_topsort_independent(self) -> None: + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()} + res = list(topsort_func(data)) + assert_equal(res, [{a, b, c}]) + + def test_topsort_linear_chain(self) -> None: + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + d = frozenset({"D"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = { + a: {b}, + b: {c}, + c: {d}, + d: set(), + } + res = list(topsort_func(data)) + assert_equal(res, [{d}, {c}, {b}, {a}]) + + def test_topsort_self_dependency(self) -> None: + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()} + res = list(topsort_func(data)) + assert_equal(res, [{b}, {a}]) + + def test_topsort_orphan_diamond(self) -> None: + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + # B and C are orphans -- they appear only in values, not as keys. + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}} + res = list(topsort_func(data)) + assert_equal(res, [{b, c}, {a}]) + + def test_topsort_cycle(self) -> None: + for topsort_func in [topsort, topsort2]: + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}} + with self.assertRaises(AssertionError): + list(topsort_func(data)) def test_scc(self) -> None: vertices = {"A", "B", "C", "D"}