diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..75ac6044a 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Fix intra-decile income change formula that doubled all percentage changes. diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py index c97a03f6f..c453e8063 100644 --- a/policyengine_api/endpoints/economy/compare.py +++ b/policyengine_api/endpoints/economy/compare.py @@ -310,6 +310,14 @@ def poverty_impact(baseline: dict, reform: dict) -> dict: ) +def compute_income_change(baseline_values, reform_values): + """Percentage income change with a floor of 1 on the baseline + to avoid division by zero for zero/negative incomes.""" + absolute_change = reform_values - baseline_values + capped_baseline = np.maximum(baseline_values, 1) + return absolute_change / capped_baseline + + def intra_decile_impact(baseline: dict, reform: dict) -> dict: baseline_income = MicroSeries( baseline["household_net_income"], weights=baseline["household_weight"] @@ -321,14 +329,9 @@ def intra_decile_impact(baseline: dict, reform: dict) -> dict: baseline["household_count_people"], weights=baseline_income.weights ) decile = MicroSeries(baseline["household_income_decile"]).values - absolute_change = (reform_income - baseline_income).values - capped_baseline_income = np.maximum(baseline_income.values, 1) - capped_reform_income = ( - np.maximum(reform_income.values, 1) + absolute_change + income_change = compute_income_change( + baseline_income.values, reform_income.values ) - income_change = ( - capped_reform_income - capped_baseline_income - ) / capped_baseline_income # Within each decile, calculate the percentage of people who: # 1. Gained more than 5% of their income @@ -383,14 +386,9 @@ def intra_wealth_decile_impact(baseline: dict, reform: dict) -> dict: baseline["household_count_people"], weights=baseline_income.weights ) decile = MicroSeries(baseline["household_wealth_decile"]).values - absolute_change = (reform_income - baseline_income).values - capped_baseline_income = np.maximum(baseline_income.values, 1) - capped_reform_income = ( - np.maximum(reform_income.values, 1) + absolute_change - ) - income_change = ( - capped_reform_income - capped_baseline_income - ) / capped_baseline_income + income_change = compute_income_change( + baseline_income.values, reform_income.values + ) # Within each decile, calculate the percentage of people who: # 1. Gained more than 5% of their income diff --git a/tests/to_refactor/python/test_us_policy_macro.py b/tests/to_refactor/python/test_us_policy_macro.py index 03cb620d2..1f49a1298 100644 --- a/tests/to_refactor/python/test_us_policy_macro.py +++ b/tests/to_refactor/python/test_us_policy_macro.py @@ -71,15 +71,15 @@ def utah_reform_runner(rest_client, region: str = "us"): # Ensure that there is some budgetary impact cost = round(result["budget"]["budgetary_impact"] / 1e6, 1) assert ( - cost / 95.4 - 1 + cost / 1867.4 - 1 ) < 0.01, ( - f"Expected budgetary impact to be 95.4 million, got {cost} million" + f"Expected budgetary impact to be 1867.4 million, got {cost} million" ) assert ( - result["intra_decile"]["all"]["Lose less than 5%"] / 0.637 - 1 + result["intra_decile"]["all"]["Lose less than 5%"] / 0.534 - 1 ) < 0.01, ( - f"Expected 63.7% of people to lose less than 5%, got " + f"Expected 53.4% of people to lose less than 5%, got " f"{result['intra_decile']['all']['Lose less than 5%']}" ) diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py index 17ff66275..76617a69a 100644 --- a/tests/unit/endpoints/economy/test_compare.py +++ b/tests/unit/endpoints/economy/test_compare.py @@ -11,6 +11,9 @@ UKLocalAuthorityBreakdown, uk_constituency_breakdown, uk_local_authority_breakdown, + compute_income_change, + intra_decile_impact, + intra_wealth_decile_impact, ) @@ -731,3 +734,267 @@ def test__given_no_region__returns_all_constituencies( assert result is not None assert len(result.by_constituency) == 3 + + +def _make_economy( + incomes, + deciles, + weights=None, + people=None, + decile_key="household_income_decile", +): + """Helper to build baseline/reform dicts for intra_decile tests.""" + n = len(incomes) + return { + "household_net_income": np.array(incomes, dtype=float), + "household_weight": np.array(weights if weights else [1.0] * n), + "household_count_people": np.array(people if people else [1.0] * n), + decile_key: np.array(deciles, dtype=float), + } + + +class TestComputeIncomeChange: + """Direct unit tests for the income change formula.""" + + def test__income_change_formula_exact(self): + result = compute_income_change(np.array([1000.0]), np.array([1040.0])) + assert result[0] == pytest.approx(0.04) + + +class TestIntraDecileImpact: + """Tests for the intra_decile_impact function — verifying correct + percentage change calculation and bucket assignment.""" + + def test__5pct_gain_classified_below_5pct(self): + """A uniform 5% income gain must NOT land in 'Gain more than 5%'. + + This is the regression test for the double-counting bug where + income_change was 2x the true value, pushing 5% gains into the + >5% bucket. + """ + # 10 households, one per decile, all gain exactly 5% + baseline = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[1050.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + # Every decile should have 0% in "Gain more than 5%" + for pct in result["deciles"]["Gain more than 5%"]: + assert ( + pct == 0.0 + ), f"5% gain incorrectly classified as >5% (got {pct})" + # Every decile should have 100% in "Gain less than 5%" + for pct in result["deciles"]["Gain less than 5%"]: + assert pct == 1.0, f"5% gain not classified as <5% (got {pct})" + + def test__10pct_gain_classified_above_5pct(self): + """A 10% gain should be in 'Gain more than 5%'.""" + baseline = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[1100.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for pct in result["deciles"]["Gain more than 5%"]: + assert pct == 1.0 + + def test__3pct_loss_classified_below_5pct(self): + """A 3% loss should be in 'Lose less than 5%'.""" + baseline = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[970.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for pct in result["deciles"]["Lose less than 5%"]: + assert pct == 1.0 + for pct in result["deciles"]["Lose more than 5%"]: + assert pct == 0.0 + + def test__no_change_classified_correctly(self): + """Zero change should land in 'No change'.""" + baseline = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for pct in result["deciles"]["No change"]: + assert pct == 1.0 + + def test__near_zero_baseline_no_division_error(self): + """Households with near-zero baseline income should not cause + division errors — the floor of 1 handles this.""" + baseline = _make_economy( + incomes=[0.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[100.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + # Should not raise; all households gained income + total = sum(result["all"][label] for label in result["all"]) + assert ( + abs(total - 1.0) < 1e-9 + ), f"Proportions should sum to 1, got {total}" + + def test__zero_baseline_uses_floor_of_one(self): + """When baseline income is 0, the max(B, 1) floor means the + effective denominator is 1. A $0 -> $100 change should give + income_change = 100/1 = 100 (10000%), landing in >5%.""" + baseline = _make_economy( + incomes=[0.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[100.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + # $100 gain on a floored baseline of $1 = 10000% change -> >5% + for pct in result["deciles"]["Gain more than 5%"]: + assert ( + pct == 1.0 + ), f"Zero baseline with $100 gain should be >5% (got {pct})" + # No NaN or Inf in any bucket + for label in result["all"]: + assert not np.isnan(result["all"][label]) + assert not np.isinf(result["all"][label]) + + def test__negative_baseline_handled(self): + """Households with negative baseline income should be handled + by the max(B, 1) floor without producing NaN or Inf.""" + baseline = _make_economy( + incomes=[-500.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[500.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for label in result["all"]: + assert not np.isnan(result["all"][label]) + assert not np.isinf(result["all"][label]) + + def test__2pct_gain_is_not_doubled(self): + """A 2% gain stays in <5% even if doubled (2*2%=4% < 5%), so this + test alone would not catch the doubling bug.""" + baseline = _make_economy( + incomes=[50000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[51000.0] * 10, # +2% + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for pct in result["deciles"]["Gain more than 5%"]: + assert pct == 0.0, "2% gain incorrectly classified as >5%" + for pct in result["deciles"]["Gain less than 5%"]: + assert pct == 1.0, "2% gain not classified as <5%" + + def test__4pct_gain_not_doubled_into_above_5pct(self): + """A 4% gain must stay in <5%. With the doubling bug, 4% * 2 = 8% + would incorrectly land in >5%. This is the tightest regression + test for the doubling bug on the gain side.""" + baseline = _make_economy( + incomes=[10000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[10400.0] * 10, # +4% + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for pct in result["deciles"]["Gain more than 5%"]: + assert ( + pct == 0.0 + ), "4% gain incorrectly classified as >5% (doubling bug)" + for pct in result["deciles"]["Gain less than 5%"]: + assert pct == 1.0, "4% gain not classified as <5%" + + def test__all_field_averages_deciles(self): + """The 'all' field should be the mean of the 10 decile values.""" + baseline = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + ) + reform = _make_economy( + incomes=[1050.0] * 10, + deciles=list(range(1, 11)), + ) + result = intra_decile_impact(baseline, reform) + + for label in result["all"]: + expected = sum(result["deciles"][label]) / 10 + assert abs(result["all"][label] - expected) < 1e-9 + + +class TestIntraWealthDecileImpact: + """Tests for intra_wealth_decile_impact — same formula, keyed by + wealth decile instead of income decile.""" + + def test__5pct_gain_classified_below_5pct(self): + """Regression test: 5% gain must not be doubled into >5% bucket.""" + baseline = _make_economy( + incomes=[1000.0] * 10, + deciles=list(range(1, 11)), + decile_key="household_wealth_decile", + ) + reform = _make_economy( + incomes=[1050.0] * 10, + deciles=list(range(1, 11)), + decile_key="household_wealth_decile", + ) + + result = intra_wealth_decile_impact(baseline, reform) + + for pct in result["deciles"]["Gain more than 5%"]: + assert ( + pct == 0.0 + ), f"5% gain incorrectly classified as >5% in wealth decile (got {pct})" + + def test__2pct_gain_not_doubled(self): + """A 2% gain must stay in the <5% bucket for wealth deciles too.""" + baseline = _make_economy( + incomes=[50000.0] * 10, + deciles=list(range(1, 11)), + decile_key="household_wealth_decile", + ) + reform = _make_economy( + incomes=[51000.0] * 10, + deciles=list(range(1, 11)), + decile_key="household_wealth_decile", + ) + + result = intra_wealth_decile_impact(baseline, reform) + + for pct in result["deciles"]["Gain more than 5%"]: + assert pct == 0.0, "2% gain incorrectly classified as >5%" + for pct in result["deciles"]["Gain less than 5%"]: + assert pct == 1.0, "2% gain not classified as <5%"