Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
ErrorTupleRaw,
report_internal_error,
)
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2
from mypy.indirection import TypeIndirectionVisitor
from mypy.ipc import BadStatus, IPCClient, IPCMessage, read_status, ready_to_read, receive, send
from mypy.messages import MessageBuilder
Expand Down Expand Up @@ -4236,7 +4236,7 @@ def sorted_components(graph: Graph) -> list[SCC]:
scc_dep_map = prepare_sccs_full(strongly_connected_components(vertices, edges), edges)
# Topsort.
res = []
for ready in topsort(scc_dep_map):
for ready in topsort2(scc_dep_map):
# Sort the sets in ready by reversed smallest State.order. Examples:
#
# - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get
Expand Down Expand Up @@ -4271,7 +4271,7 @@ def sorted_components_inner(
edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices}
sccs = list(strongly_connected_components(vertices, edges))
res = []
for ready in topsort(prepare_sccs(sccs, edges)):
for ready in topsort2(prepare_sccs(sccs, edges)):
res.extend(sorted(ready, key=lambda scc: -min(graph[id].order for id in scc)))
return res

Expand Down
73 changes: 73 additions & 0 deletions mypy/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,76 @@ def topsort(data: dict[T, set[T]]) -> Iterable[set[T]]:
yield ready
data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
assert not data, f"A cyclic dependency exists amongst {data!r}"


class topsort2(Iterator[set[T]]): # noqa: N801
"""Topological sort using Kahn's algorithm.

This is functionally equivalent to topsort() but avoids rebuilding
the full dict and set objects on each iteration. Instead it uses
in-degree counters and a reverse adjacency list, so the total work
is O(V + E) rather than O(depth * V).

Implemented as a class rather than a generator for better mypyc
compilation.

Args:
data: A map from vertices to all vertices that it has an edge
connecting it to. NOTE: This data structure
is modified in place -- for normalization purposes,
self-dependencies are removed and entries representing
orphans are added.
"""

def __init__(self, data: dict[T, set[T]]) -> None:
# Single pass: remove self-deps, build reverse adjacency list,
# compute in-degree counts, detect orphans, and find initial ready set.
in_degree: dict[T, int] = {}
rev: dict[T, list[T]] = {}
ready: set[T] = set()
for item, deps in data.items():
deps.discard(item) # Ignore self dependencies.
deg = len(deps)
in_degree[item] = deg
if deg == 0:
ready.add(item)
if item not in rev:
rev[item] = []
for dep in deps:
if dep in rev:
rev[dep].append(item)
else:
rev[dep] = [item]
if dep not in data:
# Orphan: appears as dependency but has no entry in data.
in_degree[dep] = 0
ready.add(dep)

self.in_degree = in_degree
self.rev = rev
self.ready = ready
self.remaining = len(in_degree) - len(ready)

def __iter__(self) -> Iterator[set[T]]:
return self

def __next__(self) -> set[T]:
ready = self.ready
if not ready:
assert self.remaining == 0, (
f"A cyclic dependency exists amongst "
f"{[k for k, deg in self.in_degree.items() if deg > 0]!r}"
)
raise StopIteration
in_degree = self.in_degree
rev = self.rev
new_ready: set[T] = set()
for item in ready:
for dependent in rev[item]:
new_deg = in_degree[dependent] - 1
in_degree[dependent] = new_deg
if new_deg == 0:
new_ready.add(dependent)
self.remaining -= len(new_ready)
self.ready = new_ready
return ready
4 changes: 2 additions & 2 deletions mypy/solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from mypy.constraints import SUBTYPE_OF, SUPERTYPE_OF, Constraint, infer_constraints, neg_op
from mypy.expandtype import expand_type
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2
from mypy.join import join_type_list
from mypy.meet import meet_type_list, meet_types
from mypy.subtypes import is_subtype
Expand Down Expand Up @@ -147,7 +147,7 @@ def solve_with_dependent(
sccs = list(strongly_connected_components(set(vars), dmap))
if not all(check_linear(scc, lowers, uppers) for scc in sccs):
return {}, []
raw_batches = list(topsort(prepare_sccs(sccs, dmap)))
raw_batches = list(topsort2(prepare_sccs(sccs, dmap)))

free_vars = []
free_solutions = {}
Expand Down
79 changes: 71 additions & 8 deletions mypy/test/testgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from mypy.build import BuildManager, BuildSourceSet, State, order_ascc, sorted_components
from mypy.errors import Errors
from mypy.fscache import FileSystemCache
from mypy.graph_utils import strongly_connected_components, topsort
from mypy.graph_utils import strongly_connected_components, topsort, topsort2
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this will be around for a while, we may choose a better name, otherwise this is fine.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

|'m planning to rename the new one to topsort and drop the old one pretty soon, once I've measured the performance in our large codebase (probably within a week or so).

from mypy.modulefinder import SearchPaths
from mypy.options import Options
from mypy.plugin import Plugin
Expand All @@ -18,14 +18,77 @@


class GraphSuite(Suite):
def test_topsort_empty(self) -> None:
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {}
assert_equal(list(topsort2(data)), [])

def test_topsort(self) -> None:
a = frozenset({"A"})
b = frozenset({"B"})
c = frozenset({"C"})
d = frozenset({"D"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}}
res = list(topsort(data))
assert_equal(res, [{d}, {b, c}, {a}])
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
c = frozenset({"C"})
d = frozenset({"D"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}}
res = list(topsort_func(data))
assert_equal(res, [{d}, {b, c}, {a}])

def test_topsort_orphan(self) -> None:
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}}
res = list(topsort_func(data))
assert_equal(res, [{b}, {a}])

def test_topsort_independent(self) -> None:
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
c = frozenset({"C"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()}
res = list(topsort_func(data))
assert_equal(res, [{a, b, c}])

def test_topsort_linear_chain(self) -> None:
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
c = frozenset({"C"})
d = frozenset({"D"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {
a: {b},
b: {c},
c: {d},
d: set(),
}
res = list(topsort_func(data))
assert_equal(res, [{d}, {c}, {b}, {a}])

def test_topsort_self_dependency(self) -> None:
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()}
res = list(topsort_func(data))
assert_equal(res, [{b}, {a}])

def test_topsort_orphan_diamond(self) -> None:
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
c = frozenset({"C"})
# B and C are orphans -- they appear only in values, not as keys.
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}}
res = list(topsort_func(data))
assert_equal(res, [{b, c}, {a}])

def test_topsort_cycle(self) -> None:
for topsort_func in [topsort, topsort2]:
a = frozenset({"A"})
b = frozenset({"B"})
data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}}
with self.assertRaises(AssertionError):
list(topsort_func(data))

def test_scc(self) -> None:
vertices = {"A", "B", "C", "D"}
Expand Down