Skip to content
Merged
7 changes: 7 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- bump: minor
changes:
added:
- Place-level (city) impact analysis for US Census places with population over 100,000.
- Input validation for place region strings.
removed:
- Deprecated "city/nyc" region format (use "place/NY-51000" instead).
40 changes: 30 additions & 10 deletions policyengine/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,29 +376,49 @@ def _apply_us_region_to_simulation(
"""Apply US-specific regional filtering to a simulation.

Note: Most US regions (states, congressional districts) now use
scoped datasets rather than filtering. Only NYC still requires
filtering from the national dataset (and is still using the pooled
CPS by default). This should be replaced with an approach based on
the new datasets.
scoped datasets rather than filtering. Place-level regions use
the parent state's dataset and filter by place_fips.
"""
if region == "city/nyc":
simulation = self._filter_us_simulation_by_nyc(
if isinstance(region, str) and region.startswith("place/"):
simulation = self._filter_us_simulation_by_place(
simulation=simulation,
simulation_type=simulation_type,
region=region,
reform=reform,
)
return simulation

def _filter_us_simulation_by_nyc(
def _filter_us_simulation_by_place(
self,
simulation: CountryMicrosimulation,
simulation_type: type,
region: str,
reform: ReformType | None,
) -> CountrySimulation:
"""Filter a US simulation to only include NYC households."""
"""Filter a US simulation to only include households in a specific Census place.

Args:
simulation: The microsimulation to filter.
simulation_type: The type of simulation to create.
region: A place region string (e.g., "place/NJ-57000").
reform: The reform to apply to the filtered simulation.

Returns:
A new simulation containing only households in the specified place.
"""
from policyengine.utils.data.datasets import parse_us_place_region

_, place_fips_code = parse_us_place_region(region)
df = simulation.to_input_dataframe()
in_nyc = simulation.calculate("in_nyc", map_to="person").values
return simulation_type(dataset=df[in_nyc], reform=reform)
# Get place_fips at person level since to_input_dataframe() is person-level
person_place_fips = simulation.calculate(
"place_fips", map_to="person"
).values
# place_fips may be stored as bytes in HDF5; handle both str and bytes
mask = (person_place_fips == place_fips_code) | (
person_place_fips == place_fips_code.encode()
)
return simulation_type(dataset=df[mask], reform=reform)

def check_model_version(self) -> None:
"""
Expand Down
6 changes: 2 additions & 4 deletions policyengine/utils/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@


def add_fonts():
fonts = HTML(
"""
fonts = HTML("""
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Roboto+Serif:ital,opsz,wght@0,8..144,100..900;1,8..144,100..900&display=swap" rel="stylesheet">
"""
)
""")
return display_html(fonts)


Expand Down
66 changes: 55 additions & 11 deletions policyengine/utils/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,8 @@ def _get_default_us_dataset(region: str | None) -> str:

if region_type == "nationwide":
return ECPS_2024
elif region_type == "city":
# TODO: Implement a better approach to this for our one
# city, New York City.
# Cities use the pooled CPS dataset
return CPS_2023_POOLED

# For state and congressional_district, region is guaranteed to be non-None
# For state, congressional_district, and place, region is guaranteed to be non-None
assert region is not None

if region_type == "state":
Expand All @@ -68,6 +63,11 @@ def _get_default_us_dataset(region: str | None) -> str:
state_code, district_number
)

elif region_type == "place":
# Expected format: "place/NJ-57000"
state_code, _ = parse_us_place_region(region)
return get_us_state_dataset_path(state_code)

raise ValueError(f"Unhandled US region type: {region_type}")


Expand Down Expand Up @@ -124,21 +124,23 @@ def get_us_congressional_district_dataset_path(
return f"{US_DATA_BUCKET}/districts/{state_code.upper()}-{district_number:02d}.h5"


USRegionType = Literal["nationwide", "city", "state", "congressional_district"]
USRegionType = Literal[
"nationwide", "state", "congressional_district", "place"
]

US_REGION_PREFIXES = ("city", "state", "congressional_district")
US_REGION_PREFIXES = ("state", "congressional_district", "place")


def determine_us_region_type(region: str | None) -> USRegionType:
"""
Determine the type of US region from a region string.

Args:
region: A region string (e.g., "us", "city/nyc", "state/CA",
"congressional_district/CA-01") or None.
region: A region string (e.g., "us", "state/CA",
"congressional_district/CA-01", "place/NJ-57000") or None.

Returns:
One of "nationwide", "city", "state", or "congressional_district".
One of "nationwide", "state", "congressional_district", or "place".

Raises:
ValueError: If the region string has an unrecognized prefix.
Expand All @@ -154,3 +156,45 @@ def determine_us_region_type(region: str | None) -> USRegionType:
f"Unrecognized US region format: '{region}'. "
f"Expected 'us', or one of the following prefixes: {list(US_REGION_PREFIXES)}"
)


def parse_us_place_region(region: str) -> Tuple[str, str]:
"""Parse a place region string into (state_code, place_fips).

Format: 'place/{STATE}-{PLACE_FIPS}'
Example: 'place/NJ-57000' -> ('NJ', '57000')

Args:
region: A place region string (e.g., "place/NJ-57000").

Returns:
A tuple of (state_code, place_fips).

Raises:
ValueError: If the region format is invalid or missing required parts.
"""
if not region.startswith("place/"):
raise ValueError(
f"Invalid place region format: '{region}'. "
"Expected format: 'place/{{STATE}}-{{PLACE_FIPS}}'"
)

place_str = region.split("/")[1]
if "-" not in place_str:
raise ValueError(
f"Invalid place region format: '{region}'. "
"Expected format: 'place/{{STATE}}-{{PLACE_FIPS}}'"
)

state_code, place_fips = place_str.split("-", 1)

if not state_code:
raise ValueError(
f"Invalid place region: '{region}'. State code cannot be empty."
)
if not place_fips:
raise ValueError(
f"Invalid place region: '{region}'. Place FIPS code cannot be empty."
)

return state_code, place_fips
Loading
Loading