diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..714008fd 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,6 @@ +- bump: minor + changes: + added: + - Congressional district breakdowns for US state-level simulations + - New `congressional_district_impact` field in `EconomyComparison` with district-level `average_household_income_change` and `relative_household_income_change` + - Geography utilities module (`policyengine/utils/geography.py`) with `STATE_FIPS_TO_ABBREV` mapping and `geoid_to_district_name()` helper diff --git a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py index dd0123a4..04c7081a 100644 --- a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py +++ b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py @@ -10,6 +10,10 @@ from policyengine.outputs.macro.single.calculate_single_economy import ( SingleEconomy, ) +from policyengine.utils.geography import ( + STATE_FIPS_TO_ABBREV, + geoid_to_district_name, +) from typing import List, Dict, Optional import logging @@ -845,6 +849,106 @@ def uk_local_authority_breakdown( return UKLocalAuthorityBreakdownWithValues(**output) +# US Congressional District Breakdown Models + + +class USCongressionalDistrictImpact(BaseModel): + district: str # e.g., "GA-05" + average_household_income_change: float + relative_household_income_change: float + + +class USCongressionalDistrictBreakdownWithValues(BaseModel): + districts: List[USCongressionalDistrictImpact] + + +USCongressionalDistrictBreakdown = ( + USCongressionalDistrictBreakdownWithValues | None +) + + +def us_congressional_district_breakdown( + baseline: SingleEconomy, reform: SingleEconomy, country_id: str +) -> USCongressionalDistrictBreakdown: + """Break down results by US congressional district using household geoids. + + This function groups households by their congressional_district_geoid and + computes aggregate income changes per district. Only works for US simulations + that have district assignments (typically state-level datasets). + + Args: + baseline: Baseline economy with household-level data + reform: Reform economy with household-level data + country_id: Country identifier (must be "us") + + Returns: + District-level breakdown or None if not applicable + """ + if country_id != "us": + return None + + if baseline.congressional_district_geoid is None: + return None + + # Group households by district + from collections import defaultdict + + district_indices: dict[int, list[int]] = defaultdict(list) + for i, geoid in enumerate(baseline.congressional_district_geoid): + if geoid > 0: # Filter out 0 (unassigned) + district_indices[geoid].append(i) + + if not district_indices: + return None + + districts: list[USCongressionalDistrictImpact] = [] + + # Calculate district-level impacts + for geoid, indices in district_indices.items(): + district_name = geoid_to_district_name(geoid) + + # Extract household data for this district + weights = [baseline.household_weight[i] for i in indices] + baseline_incomes = [baseline.household_net_income[i] for i in indices] + reform_incomes = [reform.household_net_income[i] for i in indices] + + baseline_income = MicroSeries(baseline_incomes, weights=weights) + reform_income = MicroSeries(reform_incomes, weights=weights) + + total_households = baseline_income.count() + + if total_households == 0 or baseline_income.sum() == 0: + continue + + average_household_income_change = ( + reform_income.sum() - baseline_income.sum() + ) / total_households + + relative_household_income_change = ( + reform_income.sum() / baseline_income.sum() - 1 + ) + + districts.append( + USCongressionalDistrictImpact( + district=district_name, + average_household_income_change=float( + average_household_income_change + ), + relative_household_income_change=float( + relative_household_income_change + ), + ) + ) + + if not districts: + return None + + # Sort by district name for consistent ordering + districts.sort(key=lambda d: d.district) + + return USCongressionalDistrictBreakdownWithValues(districts=districts) + + class CliffImpactInSimulation(BaseModel): cliff_gap: float cliff_share: float @@ -873,6 +977,7 @@ class EconomyComparison(BaseModel): labor_supply_response: LaborSupplyResponse constituency_impact: UKConstituencyBreakdown local_authority_impact: UKLocalAuthorityBreakdown + congressional_district_impact: USCongressionalDistrictBreakdown # US only cliff_impact: CliffImpact | None @@ -906,6 +1011,9 @@ def calculate_economy_comparison( local_authority_impact_data: UKLocalAuthorityBreakdown = ( uk_local_authority_breakdown(baseline, reform, country_id) ) + congressional_district_impact_data: USCongressionalDistrictBreakdown = ( + us_congressional_district_breakdown(baseline, reform, country_id) + ) wealth_decile_impact_data = wealth_decile_impact( baseline, reform, country_id ) @@ -945,5 +1053,6 @@ def calculate_economy_comparison( labor_supply_response=labor_supply_response_data, constituency_impact=constituency_impact_data, local_authority_impact=local_authority_impact_data, + congressional_district_impact=congressional_district_impact_data, cliff_impact=cliff_impact, ) diff --git a/policyengine/outputs/macro/single/calculate_single_economy.py b/policyengine/outputs/macro/single/calculate_single_economy.py index 34e5ee1d..c8f05034 100644 --- a/policyengine/outputs/macro/single/calculate_single_economy.py +++ b/policyengine/outputs/macro/single/calculate_single_economy.py @@ -53,6 +53,9 @@ class SingleEconomy(BaseModel): programs: Dict[str, float] | None cliff_gap: float | None = None cliff_share: float | None = None + congressional_district_geoid: List[int] | None = ( + None # US only: SSDD format + ) @dataclass @@ -342,6 +345,28 @@ def calculate_cliffs(self): cliff_share=cliff_share, ) + def calculate_congressional_district_geoid(self) -> List[int] | None: + """Calculate congressional district geoid for US households. + + Returns list of geoids in SSDD format (state FIPS * 100 + district number), + or None if not available (non-US or variable doesn't exist). + """ + if self.country_id != "us": + return None + + try: + geoids = ( + self.simulation.calculate("congressional_district_geoid") + .astype(int) + .tolist() + ) + # Check if we have any non-zero values (0 means unassigned) + if all(g == 0 for g in geoids): + return None + return geoids + except Exception: + return None + class CliffImpactInSimulation(BaseModel): cliff_gap: float @@ -411,6 +436,11 @@ def calculate_single_economy( cliff_gap = None cliff_share = None + # US congressional district geoids + congressional_district_geoid = ( + task_manager.calculate_congressional_district_geoid() + ) + return SingleEconomy( **{ "total_net_income": total_net_income, @@ -447,5 +477,6 @@ def calculate_single_economy( "programs": uk_programs, "cliff_gap": cliff_gap if include_cliffs else None, "cliff_share": cliff_share if include_cliffs else None, + "congressional_district_geoid": congressional_district_geoid, } ) diff --git a/policyengine/utils/geography.py b/policyengine/utils/geography.py new file mode 100644 index 00000000..1baccd9e --- /dev/null +++ b/policyengine/utils/geography.py @@ -0,0 +1,73 @@ +"""Geographic utilities and constants for PolicyEngine.""" + +# US State FIPS codes to two-letter abbreviation mapping +STATE_FIPS_TO_ABBREV = { + 1: "AL", + 2: "AK", + 4: "AZ", + 5: "AR", + 6: "CA", + 8: "CO", + 9: "CT", + 10: "DE", + 11: "DC", + 12: "FL", + 13: "GA", + 15: "HI", + 16: "ID", + 17: "IL", + 18: "IN", + 19: "IA", + 20: "KS", + 21: "KY", + 22: "LA", + 23: "ME", + 24: "MD", + 25: "MA", + 26: "MI", + 27: "MN", + 28: "MS", + 29: "MO", + 30: "MT", + 31: "NE", + 32: "NV", + 33: "NH", + 34: "NJ", + 35: "NM", + 36: "NY", + 37: "NC", + 38: "ND", + 39: "OH", + 40: "OK", + 41: "OR", + 42: "PA", + 44: "RI", + 45: "SC", + 46: "SD", + 47: "TN", + 48: "TX", + 49: "UT", + 50: "VT", + 51: "VA", + 53: "WA", + 54: "WV", + 55: "WI", + 56: "WY", + 72: "PR", +} + + +def geoid_to_district_name(geoid: int) -> str: + """Convert congressional district geoid (SSDD format) to name like 'GA-05'. + + Args: + geoid: Congressional district geoid in SSDD format where SS is the + state FIPS code and DD is the district number. + + Returns: + District name in format "XX-DD" (e.g., "GA-05", "CA-12"). + """ + state_fips = geoid // 100 + district_num = geoid % 100 + state_abbrev = STATE_FIPS_TO_ABBREV.get(state_fips, f"S{state_fips}") + return f"{state_abbrev}-{district_num:02d}" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..e816468f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,14 @@ +"""Pytest configuration and shared fixtures.""" + +import pytest + +# Re-export fixtures from fixtures module +from tests.fixtures.simulation import ( + mock_get_default_dataset, + mock_dataset, + mock_simulation_with_cliff_vars, + mock_single_economy_with_ga_districts, + mock_single_economy_with_multi_state_districts, + mock_single_economy_without_districts, + mock_single_economy_with_null_districts, +) diff --git a/tests/country/test_us_congressional_districts.py b/tests/country/test_us_congressional_districts.py new file mode 100644 index 00000000..76937485 --- /dev/null +++ b/tests/country/test_us_congressional_districts.py @@ -0,0 +1,325 @@ +"""Tests for US congressional district breakdown functionality.""" + +import pytest +from tests.fixtures.simulation import create_mock_single_economy +from policyengine.outputs.macro.comparison.calculate_economy_comparison import ( + us_congressional_district_breakdown, + USCongressionalDistrictBreakdownWithValues, + USCongressionalDistrictImpact, +) +from policyengine.utils.geography import geoid_to_district_name + + +class TestGeoidToDistrictName: + """Tests for the geoid_to_district_name helper function.""" + + def test__given_georgia_district_5_geoid__then_returns_ga_05(self): + # Given + geoid = 1305 # State FIPS 13 (GA) + District 05 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "GA-05" + + def test__given_california_district_12_geoid__then_returns_ca_12(self): + # Given + geoid = 612 # State FIPS 6 (CA) + District 12 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "CA-12" + + def test__given_north_carolina_district_4_geoid__then_returns_nc_04(self): + # Given + geoid = 3704 # State FIPS 37 (NC) + District 04 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "NC-04" + + def test__given_single_digit_district__then_pads_with_zero(self): + # Given + geoid = 101 # State FIPS 1 (AL) + District 01 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "AL-01" + + +class TestUsCongressionalDistrictBreakdown: + """Tests for the us_congressional_district_breakdown function.""" + + def test__given_non_us_country__then_returns_none( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "uk" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert result is None + + def test__given_null_district_geoids__then_returns_none( + self, mock_single_economy_with_null_districts + ): + # Given + baseline = mock_single_economy_with_null_districts + reform = mock_single_economy_with_null_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert result is None + + def test__given_all_zero_district_geoids__then_returns_none( + self, mock_single_economy_without_districts + ): + # Given + baseline = mock_single_economy_without_districts + reform = mock_single_economy_without_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert result is None + + def test__given_valid_district_data__then_returns_breakdown_with_districts_list( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert result is not None + assert isinstance(result, USCongressionalDistrictBreakdownWithValues) + assert hasattr(result, "districts") + assert isinstance(result.districts, list) + + def test__given_two_ga_districts__then_returns_two_district_impacts( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert len(result.districts) == 2 + district_names = [d.district for d in result.districts] + assert "GA-05" in district_names + assert "GA-06" in district_names + + def test__given_districts_from_multiple_states__then_returns_all_districts_sorted( + self, mock_single_economy_with_multi_state_districts + ): + # Given + baseline = mock_single_economy_with_multi_state_districts + reform = mock_single_economy_with_multi_state_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert len(result.districts) == 4 + district_names = [d.district for d in result.districts] + # Should be sorted alphabetically + assert district_names == ["GA-05", "GA-06", "NC-04", "NC-12"] + + def test__given_no_income_change__then_returns_zero_changes( + self, mock_single_economy_with_ga_districts + ): + # Given: baseline and reform are identical + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + for district in result.districts: + assert district.average_household_income_change == 0.0 + assert district.relative_household_income_change == 0.0 + + def test__given_income_increase__then_returns_positive_changes(self): + # Given: reform has higher incomes than baseline + baseline = create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + reform = create_mock_single_economy( + household_net_income=[51000.0, 61000.0, 71000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert len(result.districts) == 1 + district = result.districts[0] + assert district.district == "GA-05" + assert district.average_household_income_change == 1000.0 + assert district.relative_household_income_change > 0 + + def test__given_income_decrease__then_returns_negative_changes(self): + # Given: reform has lower incomes than baseline + baseline = create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + reform = create_mock_single_economy( + household_net_income=[49000.0, 59000.0, 69000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + assert len(result.districts) == 1 + district = result.districts[0] + assert district.district == "GA-05" + assert district.average_household_income_change == -1000.0 + assert district.relative_household_income_change < 0 + + def test__given_weighted_households__then_calculates_weighted_averages( + self, + ): + # Given: households with different weights + baseline = create_mock_single_economy( + household_net_income=[50000.0, 100000.0], + household_weight=[3000.0, 1000.0], # First household has 3x weight + congressional_district_geoid=[1305, 1305], + ) + reform = create_mock_single_economy( + household_net_income=[51000.0, 101000.0], + household_weight=[3000.0, 1000.0], + congressional_district_geoid=[1305, 1305], + ) + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + district = result.districts[0] + # Weighted sum of income change: (3000*1000 + 1000*1000) = 4,000,000 + # Total households: 3000 + 1000 = 4000 + # Average change: 4,000,000 / 4000 = 1000 + assert district.average_household_income_change == 1000.0 + + def test__given_district_impact__then_has_required_fields( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) + + # Then + for district in result.districts: + assert isinstance(district, USCongressionalDistrictImpact) + assert hasattr(district, "district") + assert hasattr(district, "average_household_income_change") + assert hasattr(district, "relative_household_income_change") + assert isinstance(district.district, str) + assert isinstance(district.average_household_income_change, float) + assert isinstance(district.relative_household_income_change, float) + + +class TestCongressionalDistrictGeoidExtraction: + """Tests for congressional_district_geoid extraction in SingleEconomy.""" + + def test__given_us_simulation_with_state_dataset__then_geoid_is_extracted( + self, + ): + """Integration test: verify geoid extraction works with real simulation. + + Note: This test requires network access to download state dataset. + Skip if running in isolated environment. + """ + pytest.importorskip("policyengine_us") + + from policyengine import Simulation + + # Given: A US state simulation (GA has district assignments) + sim = Simulation( + scope="macro", + country="us", + region="state/GA", + time_period=2025, + ) + + # When + result = sim.calculate_single_economy() + + # Then + assert result.congressional_district_geoid is not None + assert len(result.congressional_district_geoid) > 0 + # All geoids should be in Georgia (FIPS 13xx) + non_zero_geoids = [ + g for g in result.congressional_district_geoid if g > 0 + ] + assert len(non_zero_geoids) > 0 + for geoid in non_zero_geoids: + state_fips = geoid // 100 + assert ( + state_fips == 13 + ), f"Expected GA (13), got state FIPS {state_fips}" diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 00000000..131d59d6 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,12 @@ +"""Test fixtures for policyengine tests.""" + +from tests.fixtures.simulation import ( + create_mock_single_economy, + mock_get_default_dataset, + mock_dataset, + mock_simulation_with_cliff_vars, + mock_single_economy_with_ga_districts, + mock_single_economy_with_multi_state_districts, + mock_single_economy_without_districts, + mock_single_economy_with_null_districts, +) diff --git a/tests/fixtures/simulation.py b/tests/fixtures/simulation.py index 8e0f7700..d2361fef 100644 --- a/tests/fixtures/simulation.py +++ b/tests/fixtures/simulation.py @@ -73,3 +73,111 @@ def mock_simulation_with_cliff_vars(): "is_adult": Mock(sum=Mock(return_value=80.0)), }[var] return mock_sim + + +def create_mock_single_economy( + household_net_income: list[float], + household_weight: list[float], + congressional_district_geoid: list[int] | None = None, +): + """Create a mock SingleEconomy with specified household data. + + Args: + household_net_income: List of household net incomes + household_weight: List of household weights + congressional_district_geoid: List of district geoids (SSDD format) or None + + Returns: + Mock SingleEconomy object with the specified data + """ + mock_economy = Mock() + mock_economy.household_net_income = household_net_income + mock_economy.household_weight = household_weight + mock_economy.congressional_district_geoid = congressional_district_geoid + return mock_economy + + +@pytest.fixture +def mock_single_economy_with_ga_districts(): + """Mock SingleEconomy with Georgia congressional district data. + + Creates 6 households across 2 districts: + - GA-05 (geoid 1305): 3 households + - GA-06 (geoid 1306): 3 households + """ + return create_mock_single_economy( + household_net_income=[ + 50000.0, + 60000.0, + 70000.0, + 80000.0, + 90000.0, + 100000.0, + ], + household_weight=[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305, 1306, 1306, 1306], + ) + + +@pytest.fixture +def mock_single_economy_with_multi_state_districts(): + """Mock SingleEconomy with districts from multiple states. + + Creates 8 households across 4 districts in 2 states: + - GA-05 (geoid 1305): 2 households + - GA-06 (geoid 1306): 2 households + - NC-04 (geoid 3704): 2 households + - NC-12 (geoid 3712): 2 households + """ + return create_mock_single_economy( + household_net_income=[ + 50000.0, + 60000.0, + 70000.0, + 80000.0, + 40000.0, + 45000.0, + 55000.0, + 65000.0, + ], + household_weight=[ + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + ], + congressional_district_geoid=[ + 1305, + 1305, + 1306, + 1306, + 3704, + 3704, + 3712, + 3712, + ], + ) + + +@pytest.fixture +def mock_single_economy_without_districts(): + """Mock SingleEconomy with no congressional district data (all zeros).""" + return create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[0, 0, 0], + ) + + +@pytest.fixture +def mock_single_economy_with_null_districts(): + """Mock SingleEconomy with None congressional district data.""" + return create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=None, + )