diff --git a/alembic/versions/20260220_75e5dca14603_add_intra_decile_impacts_table.py b/alembic/versions/20260220_75e5dca14603_add_intra_decile_impacts_table.py new file mode 100644 index 0000000..d883de1 --- /dev/null +++ b/alembic/versions/20260220_75e5dca14603_add_intra_decile_impacts_table.py @@ -0,0 +1,48 @@ +"""add intra_decile_impacts table + +Revision ID: 75e5dca14603 +Revises: e243279f952f +Create Date: 2026-02-20 16:39:57.387711 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '75e5dca14603' +down_revision: Union[str, Sequence[str], None] = 'e243279f952f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('intra_decile_impacts', + sa.Column('baseline_simulation_id', sa.Uuid(), nullable=False), + sa.Column('reform_simulation_id', sa.Uuid(), nullable=False), + sa.Column('report_id', sa.Uuid(), nullable=True), + sa.Column('decile', sa.Integer(), nullable=False), + sa.Column('lose_more_than_5pct', sa.Float(), nullable=True), + sa.Column('lose_less_than_5pct', sa.Float(), nullable=True), + sa.Column('no_change', sa.Float(), nullable=True), + sa.Column('gain_less_than_5pct', sa.Float(), nullable=True), + sa.Column('gain_more_than_5pct', sa.Float(), nullable=True), + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['baseline_simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['reform_simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['report_id'], ['reports.id'], ), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('intra_decile_impacts') + # ### end Alembic commands ### diff --git a/alembic/versions/20260220_e243279f952f_add_budget_summary_table.py b/alembic/versions/20260220_e243279f952f_add_budget_summary_table.py new file mode 100644 index 0000000..a980009 --- /dev/null +++ b/alembic/versions/20260220_e243279f952f_add_budget_summary_table.py @@ -0,0 +1,48 @@ +"""add budget_summary table + +Revision ID: e243279f952f +Revises: 9daa015274dd +Create Date: 2026-02-20 01:50:46.010955 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import sqlmodel.sql.sqltypes + + +# revision identifiers, used by Alembic. +revision: str = 'e243279f952f' +down_revision: Union[str, Sequence[str], None] = '9daa015274dd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('budget_summary', + sa.Column('baseline_simulation_id', sa.Uuid(), nullable=False), + sa.Column('reform_simulation_id', sa.Uuid(), nullable=False), + sa.Column('report_id', sa.Uuid(), nullable=True), + sa.Column('variable_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('entity', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('baseline_total', sa.Float(), nullable=True), + sa.Column('reform_total', sa.Float(), nullable=True), + sa.Column('change', sa.Float(), nullable=True), + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['baseline_simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['reform_simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['report_id'], ['reports.id'], ), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('budget_summary') + # ### end Alembic commands ### diff --git a/src/policyengine_api/api/analysis.py b/src/policyengine_api/api/analysis.py index ba739c7..33fb27a 100644 --- a/src/policyengine_api/api/analysis.py +++ b/src/policyengine_api/api/analysis.py @@ -26,9 +26,17 @@ from sqlmodel import Session, select from policyengine_api.models import ( + BudgetSummary, + BudgetSummaryRead, Dataset, DecileImpact, DecileImpactRead, + Inequality, + InequalityRead, + IntraDecileImpact, + IntraDecileImpactRead, + Poverty, + PovertyRead, ProgramStatistics, ProgramStatisticsRead, Region, @@ -134,6 +142,11 @@ class EconomicImpactResponse(BaseModel): error_message: str | None = None decile_impacts: list[DecileImpactRead] | None = None program_statistics: list[ProgramStatisticsRead] | None = None + poverty: list[PovertyRead] | None = None + inequality: list[InequalityRead] | None = None + budget_summary: list[BudgetSummaryRead] | None = None + intra_decile: list[IntraDecileImpactRead] | None = None + detailed_budget: dict[str, dict[str, float | None]] | None = None def _get_model_version( @@ -273,6 +286,11 @@ def _build_response( """Build response from report and simulations.""" decile_impacts = None program_statistics = None + poverty_records = None + inequality_records = None + budget_summary_records = None + intra_decile_records = None + detailed_budget = None if report.status == ReportStatus.COMPLETED: # Fetch decile impacts for this report @@ -326,6 +344,100 @@ def _build_response( for s in stats ] + # Build detailed_budget: V1-compatible per-program breakdown + # keyed by program name with baseline/reform/difference values. + detailed_budget = { + s.program_name: { + "baseline": _safe_float(s.baseline_total), + "reform": _safe_float(s.reform_total), + "difference": _safe_float(s.change), + } + for s in stats + } + + # Fetch poverty records for this report + pov_rows = session.exec( + select(Poverty).where(Poverty.report_id == report.id) + ).all() + poverty_records = [ + PovertyRead( + id=p.id, + created_at=p.created_at, + simulation_id=p.simulation_id, + report_id=p.report_id, + poverty_type=p.poverty_type, + entity=p.entity, + filter_variable=p.filter_variable, + headcount=_safe_float(p.headcount), + total_population=_safe_float(p.total_population), + rate=_safe_float(p.rate), + ) + for p in pov_rows + ] + + # Fetch inequality records for this report + ineq_rows = session.exec( + select(Inequality).where(Inequality.report_id == report.id) + ).all() + inequality_records = [ + InequalityRead( + id=i.id, + created_at=i.created_at, + simulation_id=i.simulation_id, + report_id=i.report_id, + income_variable=i.income_variable, + entity=i.entity, + gini=_safe_float(i.gini), + top_10_share=_safe_float(i.top_10_share), + top_1_share=_safe_float(i.top_1_share), + bottom_50_share=_safe_float(i.bottom_50_share), + ) + for i in ineq_rows + ] + + # Fetch budget summary records for this report + budget_rows = session.exec( + select(BudgetSummary).where(BudgetSummary.report_id == report.id) + ).all() + budget_summary_records = [ + BudgetSummaryRead( + id=b.id, + created_at=b.created_at, + baseline_simulation_id=b.baseline_simulation_id, + reform_simulation_id=b.reform_simulation_id, + report_id=b.report_id, + variable_name=b.variable_name, + entity=b.entity, + baseline_total=_safe_float(b.baseline_total), + reform_total=_safe_float(b.reform_total), + change=_safe_float(b.change), + ) + for b in budget_rows + ] + + # Fetch intra-decile impact records for this report + intra_rows = session.exec( + select(IntraDecileImpact).where( + IntraDecileImpact.report_id == report.id + ) + ).all() + intra_decile_records = [ + IntraDecileImpactRead( + id=r.id, + created_at=r.created_at, + baseline_simulation_id=r.baseline_simulation_id, + reform_simulation_id=r.reform_simulation_id, + report_id=r.report_id, + decile=r.decile, + lose_more_than_5pct=_safe_float(r.lose_more_than_5pct), + lose_less_than_5pct=_safe_float(r.lose_less_than_5pct), + no_change=_safe_float(r.no_change), + gain_less_than_5pct=_safe_float(r.gain_less_than_5pct), + gain_more_than_5pct=_safe_float(r.gain_more_than_5pct), + ) + for r in intra_rows + ] + region_info = None if region: region_info = RegionInfo( @@ -354,6 +466,11 @@ def _build_response( error_message=report.error_message, decile_impacts=decile_impacts, program_statistics=program_statistics, + poverty=poverty_records, + inequality=inequality_records, + budget_summary=budget_summary_records, + intra_decile=intra_decile_records, + detailed_budget=detailed_budget, ) @@ -391,6 +508,14 @@ def _run_local_economy_comparison_uk(job_id: str, session: Session) -> None: from policyengine.core.policy import ParameterValue as PEParameterValue from policyengine.core.policy import Policy as PEPolicy from policyengine.outputs import DecileImpact as PEDecileImpact + from policyengine.outputs.aggregate import Aggregate as PEAggregate + from policyengine.outputs.aggregate import AggregateType as PEAggregateType + from policyengine.outputs.inequality import calculate_uk_inequality + from policyengine.outputs.poverty import ( + calculate_uk_poverty_by_age, + calculate_uk_poverty_by_gender, + calculate_uk_poverty_rates, + ) from policyengine.tax_benefit_models.uk import uk_latest from policyengine.tax_benefit_models.uk.datasets import PolicyEngineUKDataset from policyengine.tax_benefit_models.uk.outputs import ( @@ -547,8 +672,14 @@ def build_dynamic(dynamic_id): programmes = { "income_tax": {"entity": "person", "is_tax": True}, "national_insurance": {"entity": "person", "is_tax": True}, + "vat": {"entity": "household", "is_tax": True}, + "council_tax": {"entity": "household", "is_tax": True}, "universal_credit": {"entity": "person", "is_tax": False}, "child_benefit": {"entity": "person", "is_tax": False}, + "pension_credit": {"entity": "person", "is_tax": False}, + "income_support": {"entity": "person", "is_tax": False}, + "working_tax_credit": {"entity": "person", "is_tax": False}, + "child_tax_credit": {"entity": "person", "is_tax": False}, } for prog_name, prog_info in programmes.items(): try: @@ -579,6 +710,175 @@ def build_dynamic(dynamic_id): except KeyError: pass # Variable not found in model + # Calculate poverty rates for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + poverty_results = calculate_uk_poverty_rates(pe_sim) + for pov in poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by age group for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + age_poverty_results = calculate_uk_poverty_by_age(pe_sim) + for pov in age_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by gender for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + gender_poverty_results = calculate_uk_poverty_by_gender(pe_sim) + for pov in gender_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate inequality for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + ineq = calculate_uk_inequality(pe_sim) + ineq.run() + inequality_record = Inequality( + simulation_id=db_sim.id, + report_id=report.id, + income_variable=ineq.income_variable, + entity=ineq.entity, + gini=ineq.gini, + top_10_share=ineq.top_10_share, + top_1_share=ineq.top_1_share, + bottom_50_share=ineq.bottom_50_share, + ) + session.add(inequality_record) + + # Calculate budget summary aggregates + # UK budget variables — household-level aggregates for fiscal totals + uk_budget_variables = { + "household_tax": "household", + "household_benefits": "household", + "household_net_income": "household", + } + PEAggregate.model_rebuild(_types_namespace={"Simulation": PESimulation}) + for var_name, entity in uk_budget_variables.items(): + baseline_agg = PEAggregate( + simulation=pe_baseline_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + baseline_agg.run() + reform_agg = PEAggregate( + simulation=pe_reform_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + reform_agg.run() + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name=var_name, + entity=entity, + baseline_total=float(baseline_agg.result), + reform_total=float(reform_agg.result), + change=float(reform_agg.result - baseline_agg.result), + ) + session.add(budget_record) + + # Household count: bypass Aggregate and compute directly from raw numpy + # values. Using Aggregate(SUM) on household_weight would compute + # sum(weight * weight) because MicroSeries.sum() applies weights + # automatically — it's unclear whether Aggregate can be used correctly + # for summing the weight column itself. + baseline_hh_count = float( + pe_baseline_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + reform_hh_count = float( + pe_reform_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name="household_count_total", + entity="household", + baseline_total=baseline_hh_count, + reform_total=reform_hh_count, + change=reform_hh_count - baseline_hh_count, + ) + session.add(budget_record) + + # Calculate intra-decile impact (5-category income change distribution) + from policyengine_api.api.intra_decile import compute_intra_decile + + baseline_hh_data = { + k: pe_baseline_sim.output_dataset.data.household[k].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + reform_hh_data = { + k: pe_reform_sim.output_dataset.data.household[k].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + intra_decile_rows = compute_intra_decile(baseline_hh_data, reform_hh_data) + for row in intra_decile_rows: + record = IntraDecileImpact( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + **row, + ) + session.add(record) + # Mark completed baseline_sim.status = SimulationStatus.COMPLETED baseline_sim.completed_at = datetime.now(timezone.utc) @@ -601,6 +901,15 @@ def _run_local_economy_comparison_us(job_id: str, session: Session) -> None: from policyengine.core.policy import ParameterValue as PEParameterValue from policyengine.core.policy import Policy as PEPolicy from policyengine.outputs import DecileImpact as PEDecileImpact + from policyengine.outputs.aggregate import Aggregate as PEAggregate + from policyengine.outputs.aggregate import AggregateType as PEAggregateType + from policyengine.outputs.inequality import calculate_us_inequality + from policyengine.outputs.poverty import ( + calculate_us_poverty_by_age, + calculate_us_poverty_by_gender, + calculate_us_poverty_by_race, + calculate_us_poverty_rates, + ) from policyengine.tax_benefit_models.us import us_latest from policyengine.tax_benefit_models.us.datasets import PolicyEngineUSDataset from policyengine.tax_benefit_models.us.outputs import ( @@ -791,6 +1100,195 @@ def build_dynamic(dynamic_id): except KeyError: pass # Variable not found in model + # Calculate poverty rates for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + poverty_results = calculate_us_poverty_rates(pe_sim) + for pov in poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by age group for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + age_poverty_results = calculate_us_poverty_by_age(pe_sim) + for pov in age_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by gender for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + gender_poverty_results = calculate_us_poverty_by_gender(pe_sim) + for pov in gender_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by race for baseline and reform (US only) + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + race_poverty_results = calculate_us_poverty_by_race(pe_sim) + for pov in race_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate inequality for baseline and reform + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + ineq = calculate_us_inequality(pe_sim) + ineq.run() + inequality_record = Inequality( + simulation_id=db_sim.id, + report_id=report.id, + income_variable=ineq.income_variable, + entity=ineq.entity, + gini=ineq.gini, + top_10_share=ineq.top_10_share, + top_1_share=ineq.top_1_share, + bottom_50_share=ineq.bottom_50_share, + ) + session.add(inequality_record) + + # Calculate budget summary aggregates + # US budget variables — household-level plus state tax at tax_unit level + us_budget_variables = { + "household_tax": "household", + "household_benefits": "household", + "household_net_income": "household", + "household_state_income_tax": "tax_unit", + } + PEAggregate.model_rebuild(_types_namespace={"Simulation": PESimulation}) + for var_name, entity in us_budget_variables.items(): + baseline_agg = PEAggregate( + simulation=pe_baseline_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + baseline_agg.run() + reform_agg = PEAggregate( + simulation=pe_reform_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + reform_agg.run() + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name=var_name, + entity=entity, + baseline_total=float(baseline_agg.result), + reform_total=float(reform_agg.result), + change=float(reform_agg.result - baseline_agg.result), + ) + session.add(budget_record) + + # Household count: bypass Aggregate and compute directly from raw numpy + # values. Using Aggregate(SUM) on household_weight would compute + # sum(weight * weight) because MicroSeries.sum() applies weights + # automatically — it's unclear whether Aggregate can be used correctly + # for summing the weight column itself. + baseline_hh_count = float( + pe_baseline_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + reform_hh_count = float( + pe_reform_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name="household_count_total", + entity="household", + baseline_total=baseline_hh_count, + reform_total=reform_hh_count, + change=reform_hh_count - baseline_hh_count, + ) + session.add(budget_record) + + # Calculate intra-decile impact (5-category income change distribution) + from policyengine_api.api.intra_decile import compute_intra_decile + + baseline_hh_data = { + k: pe_baseline_sim.output_dataset.data.household[k].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + reform_hh_data = { + k: pe_reform_sim.output_dataset.data.household[k].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + intra_decile_rows = compute_intra_decile(baseline_hh_data, reform_hh_data) + for row in intra_decile_rows: + record = IntraDecileImpact( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + **row, + ) + session.add(record) + # Mark completed baseline_sim.status = SimulationStatus.COMPLETED baseline_sim.completed_at = datetime.now(timezone.utc) diff --git a/src/policyengine_api/api/intra_decile.py b/src/policyengine_api/api/intra_decile.py new file mode 100644 index 0000000..799c65b --- /dev/null +++ b/src/policyengine_api/api/intra_decile.py @@ -0,0 +1,117 @@ +"""Intra-decile income change computation. + +Computes the distribution of income change categories within each income +decile, producing proportions for 5 categories per decile plus an overall +average row. +""" + +from typing import Callable + +import numpy as np + +# The 5-category thresholds and labels (matching V1 structure) +BOUNDS = [-np.inf, -0.05, -1e-3, 1e-3, 0.05, np.inf] +CATEGORY_COLUMNS = [ + "lose_more_than_5pct", + "lose_less_than_5pct", + "no_change", + "gain_less_than_5pct", + "gain_more_than_5pct", +] + + +# --- Income change formula variants --- + + +# NOTE: This formula replicates V1's API (policyengine-api, endpoints/economy/ +# compare.py lines 324-331). It appears to double-count the change because it +# adds absolute_change to the already-changed capped reform income: +# capped_reform = max(reform, 1) + (reform - baseline) +# For the common case (both incomes > 1), this yields: +# income_change = 2 * (reform - baseline) / baseline +# Kept here for reference while confirming with the team. +def _income_change_v1_original( + baseline_income: np.ndarray, + reform_income: np.ndarray, +) -> np.ndarray: + absolute_change = reform_income - baseline_income + capped_baseline = np.maximum(baseline_income, 1) + capped_reform = np.maximum(reform_income, 1) + absolute_change + return (capped_reform - capped_baseline) / capped_baseline + + +def _income_change_corrected( + baseline_income: np.ndarray, + reform_income: np.ndarray, +) -> np.ndarray: + capped_baseline = np.maximum(baseline_income, 1) + return (reform_income - baseline_income) / capped_baseline + + +# Strategy selector — change this to switch formulas +def get_income_change_formula() -> ( + Callable[[np.ndarray, np.ndarray], np.ndarray] +): + return _income_change_corrected + + +# --- Main computation --- + + +def compute_intra_decile( + baseline_household_data: dict[str, np.ndarray], + reform_household_data: dict[str, np.ndarray], +) -> list[dict]: + """Compute intra-decile impact proportions. + + Args: + baseline_household_data: Dict with keys "household_net_income", + "household_weight", "household_count_people", + "household_income_decile" — all as raw numpy arrays. + reform_household_data: Same keys for the reform simulation. + + Returns: + List of 11 dicts (deciles 1-10 + overall as decile=0), each with + keys: decile, lose_more_than_5pct, lose_less_than_5pct, no_change, + gain_less_than_5pct, gain_more_than_5pct. + """ + baseline_income = baseline_household_data["household_net_income"] + reform_income = reform_household_data["household_net_income"] + weights = baseline_household_data["household_weight"] + people_per_hh = baseline_household_data["household_count_people"] + decile = baseline_household_data["household_income_decile"] + + # People-weighted count per household + people = people_per_hh * weights + + # Compute percentage income change + formula = get_income_change_formula() + income_change = formula(baseline_income, reform_income) + + # For each decile, compute proportion of people in each category + rows = [] + for decile_num in range(1, 11): + in_decile = decile == decile_num + people_in_decile = people[in_decile].sum() + + proportions = {} + for col, lower, upper in zip( + CATEGORY_COLUMNS, BOUNDS[:-1], BOUNDS[1:] + ): + in_category = (income_change > lower) & (income_change <= upper) + in_both = in_decile & in_category + + if people_in_decile == 0: + proportions[col] = 0.0 + else: + proportions[col] = float(people[in_both].sum() / people_in_decile) + + rows.append({"decile": decile_num, **proportions}) + + # Overall average: mean of the 10 decile proportions (matching V1) + overall = {"decile": 0} + for col in CATEGORY_COLUMNS: + overall[col] = sum(r[col] for r in rows) / 10 + rows.append(overall) + + return rows diff --git a/src/policyengine_api/modal_app.py b/src/policyengine_api/modal_app.py index 0feb0d7..3699010 100644 --- a/src/policyengine_api/modal_app.py +++ b/src/policyengine_api/modal_app.py @@ -1130,8 +1130,12 @@ def economy_comparison_uk(job_id: str, traceparent: str | None = None) -> None: try: # Import models inline from policyengine_api.models import ( + BudgetSummary, Dataset, DecileImpact, + Inequality, + IntraDecileImpact, + Poverty, ProgramStatistics, Report, ReportStatus, @@ -1173,6 +1177,12 @@ def economy_comparison_uk(job_id: str, traceparent: str | None = None) -> None: # Import policyengine from policyengine.core import Simulation as PESimulation from policyengine.outputs import DecileImpact as PEDecileImpact + from policyengine.outputs.aggregate import ( + Aggregate as PEAggregate, + ) + from policyengine.outputs.aggregate import ( + AggregateType as PEAggregateType, + ) from policyengine.tax_benefit_models.uk import uk_latest from policyengine.tax_benefit_models.uk.datasets import ( PolicyEngineUKDataset, @@ -1399,6 +1409,202 @@ def economy_comparison_uk(job_id: str, traceparent: str | None = None) -> None: except KeyError: pass # Variable not in model, skip silently + # Calculate poverty rates for baseline and reform + from policyengine.outputs.poverty import ( + calculate_uk_poverty_by_age, + calculate_uk_poverty_by_gender, + calculate_uk_poverty_rates, + ) + + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + poverty_results = calculate_uk_poverty_rates(pe_sim) + for pov in poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by age group + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + age_poverty_results = calculate_uk_poverty_by_age( + pe_sim + ) + for pov in age_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by gender + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + gender_poverty_results = ( + calculate_uk_poverty_by_gender(pe_sim) + ) + for pov in gender_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate inequality for baseline and reform + from policyengine.outputs.inequality import ( + calculate_uk_inequality, + ) + + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + ineq = calculate_uk_inequality(pe_sim) + ineq.run() + inequality_record = Inequality( + simulation_id=db_sim.id, + report_id=report.id, + income_variable=ineq.income_variable, + entity=ineq.entity, + gini=ineq.gini, + top_10_share=ineq.top_10_share, + top_1_share=ineq.top_1_share, + bottom_50_share=ineq.bottom_50_share, + ) + session.add(inequality_record) + + # Calculate budget summary aggregates + # UK budget variables — household-level aggregates + uk_budget_variables = { + "household_tax": "household", + "household_benefits": "household", + "household_net_income": "household", + } + PEAggregate.model_rebuild( + _types_namespace={"Simulation": PESimulation} + ) + for var_name, entity in uk_budget_variables.items(): + baseline_agg = PEAggregate( + simulation=pe_baseline_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + baseline_agg.run() + reform_agg = PEAggregate( + simulation=pe_reform_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + reform_agg.run() + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name=var_name, + entity=entity, + baseline_total=float(baseline_agg.result), + reform_total=float(reform_agg.result), + change=float( + reform_agg.result - baseline_agg.result + ), + ) + session.add(budget_record) + + # Household count: bypass Aggregate and compute directly + # from raw numpy values. Using Aggregate(SUM) on + # household_weight would compute sum(weight * weight) + # because MicroSeries.sum() applies weights automatically + # — it's unclear whether Aggregate can be used correctly + # for summing the weight column itself. + baseline_hh_count = float( + pe_baseline_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + reform_hh_count = float( + pe_reform_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name="household_count_total", + entity="household", + baseline_total=baseline_hh_count, + reform_total=reform_hh_count, + change=reform_hh_count - baseline_hh_count, + ) + session.add(budget_record) + + # Calculate intra-decile impact + from policyengine_api.api.intra_decile import ( + compute_intra_decile, + ) + + baseline_hh_data = { + k: pe_baseline_sim.output_dataset.data.household[ + k + ].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + reform_hh_data = { + k: pe_reform_sim.output_dataset.data.household[ + k + ].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + intra_decile_rows = compute_intra_decile( + baseline_hh_data, reform_hh_data + ) + for row in intra_decile_rows: + record = IntraDecileImpact( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + **row, + ) + session.add(record) + # Mark simulations and report as completed baseline_sim.status = SimulationStatus.COMPLETED baseline_sim.completed_at = datetime.now(timezone.utc) @@ -1465,8 +1671,12 @@ def economy_comparison_us(job_id: str, traceparent: str | None = None) -> None: try: # Import models inline from policyengine_api.models import ( + BudgetSummary, Dataset, DecileImpact, + Inequality, + IntraDecileImpact, + Poverty, ProgramStatistics, Report, ReportStatus, @@ -1501,6 +1711,12 @@ def economy_comparison_us(job_id: str, traceparent: str | None = None) -> None: # Import policyengine from policyengine.core import Simulation as PESimulation from policyengine.outputs import DecileImpact as PEDecileImpact + from policyengine.outputs.aggregate import ( + Aggregate as PEAggregate, + ) + from policyengine.outputs.aggregate import ( + AggregateType as PEAggregateType, + ) from policyengine.tax_benefit_models.us import us_latest from policyengine.tax_benefit_models.us.datasets import ( PolicyEngineUSDataset, @@ -1700,6 +1916,225 @@ def economy_comparison_us(job_id: str, traceparent: str | None = None) -> None: except KeyError: pass # Variable not in model, skip silently + # Calculate poverty rates for baseline and reform + from policyengine.outputs.poverty import ( + calculate_us_poverty_by_age, + calculate_us_poverty_by_gender, + calculate_us_poverty_by_race, + calculate_us_poverty_rates, + ) + + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + poverty_results = calculate_us_poverty_rates(pe_sim) + for pov in poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by age group + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + age_poverty_results = calculate_us_poverty_by_age( + pe_sim + ) + for pov in age_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by gender + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + gender_poverty_results = ( + calculate_us_poverty_by_gender(pe_sim) + ) + for pov in gender_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate poverty rates by race (US only) + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + race_poverty_results = ( + calculate_us_poverty_by_race(pe_sim) + ) + for pov in race_poverty_results.outputs: + poverty_record = Poverty( + simulation_id=db_sim.id, + report_id=report.id, + poverty_type=pov.poverty_type, + entity=pov.entity, + filter_variable=pov.filter_variable, + headcount=pov.headcount, + total_population=pov.total_population, + rate=pov.rate, + ) + session.add(poverty_record) + + # Calculate inequality for baseline and reform + from policyengine.outputs.inequality import ( + calculate_us_inequality, + ) + + for pe_sim, db_sim in [ + (pe_baseline_sim, baseline_sim), + (pe_reform_sim, reform_sim), + ]: + ineq = calculate_us_inequality(pe_sim) + ineq.run() + inequality_record = Inequality( + simulation_id=db_sim.id, + report_id=report.id, + income_variable=ineq.income_variable, + entity=ineq.entity, + gini=ineq.gini, + top_10_share=ineq.top_10_share, + top_1_share=ineq.top_1_share, + bottom_50_share=ineq.bottom_50_share, + ) + session.add(inequality_record) + + # Calculate budget summary aggregates + # US budget variables — household-level plus state tax + us_budget_variables = { + "household_tax": "household", + "household_benefits": "household", + "household_net_income": "household", + "household_state_income_tax": "tax_unit", + } + PEAggregate.model_rebuild( + _types_namespace={"Simulation": PESimulation} + ) + for var_name, entity in us_budget_variables.items(): + baseline_agg = PEAggregate( + simulation=pe_baseline_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + baseline_agg.run() + reform_agg = PEAggregate( + simulation=pe_reform_sim, + variable=var_name, + aggregate_type=PEAggregateType.SUM, + entity=entity, + ) + reform_agg.run() + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name=var_name, + entity=entity, + baseline_total=float(baseline_agg.result), + reform_total=float(reform_agg.result), + change=float( + reform_agg.result - baseline_agg.result + ), + ) + session.add(budget_record) + + # Household count: bypass Aggregate and compute directly + # from raw numpy values. Using Aggregate(SUM) on + # household_weight would compute sum(weight * weight) + # because MicroSeries.sum() applies weights automatically + # — it's unclear whether Aggregate can be used correctly + # for summing the weight column itself. + baseline_hh_count = float( + pe_baseline_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + reform_hh_count = float( + pe_reform_sim.output_dataset.data.household[ + "household_weight" + ].values.sum() + ) + budget_record = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name="household_count_total", + entity="household", + baseline_total=baseline_hh_count, + reform_total=reform_hh_count, + change=reform_hh_count - baseline_hh_count, + ) + session.add(budget_record) + + # Calculate intra-decile impact + from policyengine_api.api.intra_decile import ( + compute_intra_decile, + ) + + baseline_hh_data = { + k: pe_baseline_sim.output_dataset.data.household[ + k + ].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + reform_hh_data = { + k: pe_reform_sim.output_dataset.data.household[ + k + ].values + for k in [ + "household_net_income", + "household_weight", + "household_count_people", + "household_income_decile", + ] + } + intra_decile_rows = compute_intra_decile( + baseline_hh_data, reform_hh_data + ) + for row in intra_decile_rows: + record = IntraDecileImpact( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + **row, + ) + session.add(record) + # Mark simulations and report as completed baseline_sim.status = SimulationStatus.COMPLETED baseline_sim.completed_at = datetime.now(timezone.utc) diff --git a/src/policyengine_api/models/__init__.py b/src/policyengine_api/models/__init__.py index e73b75a..257d03a 100644 --- a/src/policyengine_api/models/__init__.py +++ b/src/policyengine_api/models/__init__.py @@ -1,5 +1,10 @@ """Database models for PolicyEngine API.""" +from .budget_summary import ( + BudgetSummary, + BudgetSummaryCreate, + BudgetSummaryRead, +) from .change_aggregate import ( ChangeAggregate, ChangeAggregateCreate, @@ -19,6 +24,11 @@ HouseholdJobStatus, ) from .inequality import Inequality, InequalityCreate, InequalityRead +from .intra_decile_impact import ( + IntraDecileImpact, + IntraDecileImpactCreate, + IntraDecileImpactRead, +) from .output import ( AggregateOutput, AggregateOutputCreate, @@ -82,6 +92,9 @@ from .variable import Variable, VariableCreate, VariableRead __all__ = [ + "BudgetSummary", + "BudgetSummaryCreate", + "BudgetSummaryRead", "AggregateOutput", "AggregateOutputCreate", "AggregateOutputRead", @@ -114,6 +127,9 @@ "Inequality", "InequalityCreate", "InequalityRead", + "IntraDecileImpact", + "IntraDecileImpactCreate", + "IntraDecileImpactRead", "Parameter", "ParameterCreate", "ParameterRead", diff --git a/src/policyengine_api/models/budget_summary.py b/src/policyengine_api/models/budget_summary.py new file mode 100644 index 0000000..0a399fd --- /dev/null +++ b/src/policyengine_api/models/budget_summary.py @@ -0,0 +1,55 @@ +"""Budget summary output model. + +Stores economy-wide fiscal aggregates for a report. Each row represents +a single aggregate variable (e.g. household_tax, household_benefits) +with baseline and reform totals. This is separate from ProgramStatistics, +which stores per-program breakdowns. + +The client can derive V1-compatible budget fields from these rows: + - tax_revenue_impact = household_tax row's change + - benefit_spending_impact = household_benefits row's change + - budgetary_impact = tax change - benefit change + - households = household_count_total row's baseline_total + - baseline_net_income = household_net_income row's baseline_total + - state_tax_revenue_impact = household_state_income_tax row's change (US only) +""" + +from datetime import datetime, timezone +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel + + +class BudgetSummaryBase(SQLModel): + """Base budget summary fields.""" + + baseline_simulation_id: UUID = Field(foreign_key="simulations.id") + reform_simulation_id: UUID = Field(foreign_key="simulations.id") + report_id: UUID | None = Field(default=None, foreign_key="reports.id") + variable_name: str + entity: str + baseline_total: float | None = None + reform_total: float | None = None + change: float | None = None + + +class BudgetSummary(BudgetSummaryBase, table=True): + """Budget summary database model.""" + + __tablename__ = "budget_summary" + + id: UUID = Field(default_factory=uuid4, primary_key=True) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + + +class BudgetSummaryCreate(BudgetSummaryBase): + """Schema for creating budget summary records.""" + + pass + + +class BudgetSummaryRead(BudgetSummaryBase): + """Schema for reading budget summary records.""" + + id: UUID + created_at: datetime diff --git a/src/policyengine_api/models/intra_decile_impact.py b/src/policyengine_api/models/intra_decile_impact.py new file mode 100644 index 0000000..42c8173 --- /dev/null +++ b/src/policyengine_api/models/intra_decile_impact.py @@ -0,0 +1,57 @@ +"""Intra-decile impact output model. + +Stores the distribution of income change categories within each income +decile. Each row represents one decile (1-10) or the overall average +(decile=0), with five proportion columns summing to ~1.0. + +The five categories classify households by their percentage income change: + - lose_more_than_5pct: change <= -5% + - lose_less_than_5pct: -5% < change <= -0.1% + - no_change: -0.1% < change <= 0.1% + - gain_less_than_5pct: 0.1% < change <= 5% + - gain_more_than_5pct: change > 5% + +Proportions are people-weighted (using household_count_people * +household_weight) so they reflect the share of people, not households. +""" + +from datetime import datetime, timezone +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel + + +class IntraDecileImpactBase(SQLModel): + """Base intra-decile impact fields.""" + + baseline_simulation_id: UUID = Field(foreign_key="simulations.id") + reform_simulation_id: UUID = Field(foreign_key="simulations.id") + report_id: UUID | None = Field(default=None, foreign_key="reports.id") + decile: int # 1-10 for individual deciles, 0 for overall average + lose_more_than_5pct: float | None = None + lose_less_than_5pct: float | None = None + no_change: float | None = None + gain_less_than_5pct: float | None = None + gain_more_than_5pct: float | None = None + + +class IntraDecileImpact(IntraDecileImpactBase, table=True): + """Intra-decile impact database model.""" + + __tablename__ = "intra_decile_impacts" + + id: UUID = Field(default_factory=uuid4, primary_key=True) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + + +class IntraDecileImpactCreate(IntraDecileImpactBase): + """Schema for creating intra-decile impact records.""" + + pass + + +class IntraDecileImpactRead(IntraDecileImpactBase): + """Schema for reading intra-decile impact records.""" + + id: UUID + created_at: datetime diff --git a/test_fixtures/fixtures_economic_impact_response.py b/test_fixtures/fixtures_economic_impact_response.py new file mode 100644 index 0000000..4d78371 --- /dev/null +++ b/test_fixtures/fixtures_economic_impact_response.py @@ -0,0 +1,316 @@ +"""Fixtures for economic impact response tests. + +Provides factory functions to create completed reports with all output +table records (poverty, inequality, budget_summary, intra_decile, +program_statistics, decile_impacts) for testing _build_response(). +""" + +from uuid import uuid4 + +from sqlmodel import Session + +from policyengine_api.models import ( + BudgetSummary, + Dataset, + DecileImpact, + Inequality, + IntraDecileImpact, + Poverty, + ProgramStatistics, + Report, + ReportStatus, + Simulation, + SimulationStatus, + TaxBenefitModel, + TaxBenefitModelVersion, +) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +UK_PROGRAMS = { + "income_tax": {"entity": "person", "is_tax": True}, + "national_insurance": {"entity": "person", "is_tax": True}, + "vat": {"entity": "household", "is_tax": True}, + "council_tax": {"entity": "household", "is_tax": True}, + "universal_credit": {"entity": "person", "is_tax": False}, + "child_benefit": {"entity": "person", "is_tax": False}, + "pension_credit": {"entity": "person", "is_tax": False}, + "income_support": {"entity": "person", "is_tax": False}, + "working_tax_credit": {"entity": "person", "is_tax": False}, + "child_tax_credit": {"entity": "person", "is_tax": False}, +} + +UK_PROGRAM_COUNT = len(UK_PROGRAMS) + +BUDGET_VARIABLES_UK = [ + ("household_tax", "household"), + ("household_benefits", "household"), + ("household_net_income", "household"), + ("household_count_total", "household"), +] + +SAMPLE_POVERTY_TYPES = ["absolute_bhc", "absolute_ahc"] +SAMPLE_INEQUALITY_INCOME_VAR = "household_net_income" +SAMPLE_GINI = 0.35 +SAMPLE_TOP_10_SHARE = 0.28 +SAMPLE_TOP_1_SHARE = 0.10 +SAMPLE_BOTTOM_50_SHARE = 0.22 + +INTRA_DECILE_DECILE_COUNT = 11 # 10 deciles + overall + + +# --------------------------------------------------------------------------- +# Core factory: report with simulations +# --------------------------------------------------------------------------- + + +def create_report_with_simulations( + session: Session, + status: ReportStatus = ReportStatus.COMPLETED, +) -> tuple[Report, Simulation, Simulation]: + """Create a model, version, dataset, two simulations, and a report.""" + model = TaxBenefitModel(name="policyengine-uk", description="UK model") + session.add(model) + session.commit() + session.refresh(model) + + version = TaxBenefitModelVersion( + model_id=model.id, version="1.0.0", description="Test" + ) + session.add(version) + session.commit() + session.refresh(version) + + dataset = Dataset( + name="uk_test", + description="Test dataset", + filepath="test.h5", + year=2024, + tax_benefit_model_id=model.id, + ) + session.add(dataset) + session.commit() + session.refresh(dataset) + + baseline_sim = Simulation( + dataset_id=dataset.id, + tax_benefit_model_version_id=version.id, + status=SimulationStatus.COMPLETED, + ) + reform_sim = Simulation( + dataset_id=dataset.id, + tax_benefit_model_version_id=version.id, + status=SimulationStatus.COMPLETED, + ) + session.add(baseline_sim) + session.add(reform_sim) + session.commit() + session.refresh(baseline_sim) + session.refresh(reform_sim) + + report = Report( + label="Test economic impact report", + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + status=status, + report_type="economy_comparison", + ) + session.add(report) + session.commit() + session.refresh(report) + + return report, baseline_sim, reform_sim + + +# --------------------------------------------------------------------------- +# Output record factories +# --------------------------------------------------------------------------- + + +def add_poverty_records( + session: Session, + report: Report, + baseline_sim: Simulation, + reform_sim: Simulation, + count: int = 4, +) -> list[Poverty]: + """Add poverty records to a report (for baseline and reform).""" + records = [] + for sim in [baseline_sim, reform_sim]: + for i, ptype in enumerate(SAMPLE_POVERTY_TYPES): + rec = Poverty( + simulation_id=sim.id, + report_id=report.id, + poverty_type=ptype, + entity="person", + filter_variable=None, + headcount=float(1000 + i * 100), + total_population=10000.0, + rate=float(1000 + i * 100) / 10000.0, + ) + session.add(rec) + records.append(rec) + session.commit() + return records + + +def add_poverty_by_age_records( + session: Session, + report: Report, + baseline_sim: Simulation, + reform_sim: Simulation, +) -> list[Poverty]: + """Add poverty-by-age records with filter_variable set.""" + records = [] + age_groups = [ + ("is_child", True), + ("is_adult", True), + ("is_SP_age", True), + ] + for sim in [baseline_sim, reform_sim]: + for filter_var, _ in age_groups: + for ptype in SAMPLE_POVERTY_TYPES: + rec = Poverty( + simulation_id=sim.id, + report_id=report.id, + poverty_type=ptype, + entity="person", + filter_variable=filter_var, + headcount=500.0, + total_population=3000.0, + rate=500.0 / 3000.0, + ) + session.add(rec) + records.append(rec) + session.commit() + return records + + +def add_inequality_records( + session: Session, + report: Report, + baseline_sim: Simulation, + reform_sim: Simulation, +) -> list[Inequality]: + """Add inequality records for baseline and reform.""" + records = [] + for sim in [baseline_sim, reform_sim]: + rec = Inequality( + simulation_id=sim.id, + report_id=report.id, + income_variable=SAMPLE_INEQUALITY_INCOME_VAR, + entity="household", + gini=SAMPLE_GINI, + top_10_share=SAMPLE_TOP_10_SHARE, + top_1_share=SAMPLE_TOP_1_SHARE, + bottom_50_share=SAMPLE_BOTTOM_50_SHARE, + ) + session.add(rec) + records.append(rec) + session.commit() + return records + + +def add_budget_summary_records( + session: Session, + report: Report, + baseline_sim: Simulation, + reform_sim: Simulation, +) -> list[BudgetSummary]: + """Add budget summary records for UK variables.""" + records = [] + for var_name, entity in BUDGET_VARIABLES_UK: + rec = BudgetSummary( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + variable_name=var_name, + entity=entity, + baseline_total=1_000_000.0, + reform_total=1_050_000.0, + change=50_000.0, + ) + session.add(rec) + records.append(rec) + session.commit() + return records + + +def add_intra_decile_records( + session: Session, + report: Report, + baseline_sim: Simulation, + reform_sim: Simulation, +) -> list[IntraDecileImpact]: + """Add 11 intra-decile impact records (deciles 1-10 + overall).""" + records = [] + for decile_num in list(range(1, 11)) + [0]: + rec = IntraDecileImpact( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + decile=decile_num, + lose_more_than_5pct=0.0, + lose_less_than_5pct=0.0, + no_change=0.0, + gain_less_than_5pct=1.0, + gain_more_than_5pct=0.0, + ) + session.add(rec) + records.append(rec) + session.commit() + return records + + +def add_program_statistics_records( + session: Session, + report: Report, + baseline_sim: Simulation, + reform_sim: Simulation, + programs: dict | None = None, +) -> list[ProgramStatistics]: + """Add program statistics records. Defaults to full UK program list.""" + if programs is None: + programs = UK_PROGRAMS + records = [] + for prog_name, prog_info in programs.items(): + rec = ProgramStatistics( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + program_name=prog_name, + entity=prog_info["entity"], + is_tax=prog_info["is_tax"], + baseline_total=500_000.0, + reform_total=520_000.0, + change=20_000.0, + baseline_count=10_000.0, + reform_count=10_000.0, + winners=3_000.0, + losers=2_000.0, + ) + session.add(rec) + records.append(rec) + session.commit() + return records + + +# --------------------------------------------------------------------------- +# Composite: fully populated report +# --------------------------------------------------------------------------- + + +def create_fully_populated_report( + session: Session, +) -> tuple[Report, Simulation, Simulation]: + """Create a completed report with records in ALL output tables.""" + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_poverty_records(session, report, baseline_sim, reform_sim) + add_poverty_by_age_records(session, report, baseline_sim, reform_sim) + add_inequality_records(session, report, baseline_sim, reform_sim) + add_budget_summary_records(session, report, baseline_sim, reform_sim) + add_intra_decile_records(session, report, baseline_sim, reform_sim) + add_program_statistics_records(session, report, baseline_sim, reform_sim) + return report, baseline_sim, reform_sim diff --git a/test_fixtures/fixtures_intra_decile.py b/test_fixtures/fixtures_intra_decile.py new file mode 100644 index 0000000..fd83c7d --- /dev/null +++ b/test_fixtures/fixtures_intra_decile.py @@ -0,0 +1,77 @@ +"""Fixtures for intra-decile impact tests.""" + +import numpy as np + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +NUM_HOUSEHOLDS = 100 +HOUSEHOLDS_PER_DECILE = NUM_HOUSEHOLDS // 10 + +# Each decile has 10 households; deciles 1-10 +DECILES = np.repeat(np.arange(1, 11), HOUSEHOLDS_PER_DECILE).astype(float) + +UNIFORM_WEIGHTS = np.ones(NUM_HOUSEHOLDS) * 100.0 +UNIFORM_PEOPLE = np.full(NUM_HOUSEHOLDS, 2.0) + +# Income change thresholds (matching intra_decile.py BOUNDS) +THRESHOLD_5PCT = 0.05 +THRESHOLD_0_1PCT = 1e-3 + +CATEGORY_NAMES = [ + "lose_more_than_5pct", + "lose_less_than_5pct", + "no_change", + "gain_less_than_5pct", + "gain_more_than_5pct", +] + +EXPECTED_ROW_COUNT = 11 # 10 deciles + 1 overall (decile=0) +EXPECTED_DECILE_NUMBERS = list(range(1, 11)) + [0] + + +# --------------------------------------------------------------------------- +# Factory functions +# --------------------------------------------------------------------------- + + +def make_baseline_income() -> np.ndarray: + """Baseline incomes: decile N earns N * 10,000.""" + return DECILES * 10_000.0 + + +def make_household_data( + baseline_income: np.ndarray, + reform_income: np.ndarray | None = None, + weights: np.ndarray | None = None, + people: np.ndarray | None = None, +) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]: + """Build baseline and reform household data dicts for compute_intra_decile.""" + if reform_income is None: + reform_income = baseline_income.copy() + if weights is None: + weights = UNIFORM_WEIGHTS.copy() + if people is None: + people = UNIFORM_PEOPLE.copy() + + baseline = { + "household_net_income": baseline_income, + "household_weight": weights, + "household_count_people": people, + "household_income_decile": DECILES.copy(), + } + reform = { + "household_net_income": reform_income, + "household_weight": weights, + "household_count_people": people, + "household_income_decile": DECILES.copy(), + } + return baseline, reform + + +def make_single_household_arrays( + baseline_val: float, reform_val: float +) -> tuple[np.ndarray, np.ndarray]: + """Create single-element arrays for formula unit tests.""" + return np.array([baseline_val]), np.array([reform_val]) diff --git a/tests/test_economic_impact_response.py b/tests/test_economic_impact_response.py new file mode 100644 index 0000000..9401224 --- /dev/null +++ b/tests/test_economic_impact_response.py @@ -0,0 +1,485 @@ +"""Tests for _build_response() and _safe_float() in analysis.py. + +Covers all Phase 2 output fields: poverty, inequality, budget_summary, +intra_decile, program_statistics, detailed_budget, and decile_impacts. +""" + +import math + +from policyengine_api.api.analysis import _build_response, _safe_float +from policyengine_api.models import ReportStatus +from test_fixtures.fixtures_economic_impact_response import ( + BUDGET_VARIABLES_UK, + INTRA_DECILE_DECILE_COUNT, + SAMPLE_BOTTOM_50_SHARE, + SAMPLE_GINI, + SAMPLE_INEQUALITY_INCOME_VAR, + SAMPLE_POVERTY_TYPES, + SAMPLE_TOP_1_SHARE, + SAMPLE_TOP_10_SHARE, + UK_PROGRAM_COUNT, + UK_PROGRAMS, + add_budget_summary_records, + add_inequality_records, + add_intra_decile_records, + add_poverty_by_age_records, + add_poverty_records, + add_program_statistics_records, + create_fully_populated_report, + create_report_with_simulations, +) + + +# --------------------------------------------------------------------------- +# _safe_float +# --------------------------------------------------------------------------- + + +class TestSafeFloat: + """Tests for the _safe_float helper that sanitizes floats for JSON.""" + + def test__given_normal_float__then_returns_same_value(self): + assert _safe_float(42.5) == 42.5 + + def test__given_none__then_returns_none(self): + assert _safe_float(None) is None + + def test__given_nan__then_returns_none(self): + assert _safe_float(float("nan")) is None + + def test__given_positive_inf__then_returns_none(self): + assert _safe_float(float("inf")) is None + + def test__given_negative_inf__then_returns_none(self): + assert _safe_float(float("-inf")) is None + + def test__given_zero__then_returns_zero(self): + assert _safe_float(0.0) == 0.0 + + def test__given_negative_float__then_returns_same_value(self): + assert _safe_float(-123.456) == -123.456 + + +# --------------------------------------------------------------------------- +# _build_response — pending report +# --------------------------------------------------------------------------- + + +class TestBuildResponsePending: + """Tests for _build_response when the report is not yet completed.""" + + def test__given_pending_report__then_all_output_fields_are_none(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations( + session, status=ReportStatus.PENDING + ) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.status == ReportStatus.PENDING + assert response.decile_impacts is None + assert response.program_statistics is None + assert response.poverty is None + assert response.inequality is None + assert response.budget_summary is None + assert response.intra_decile is None + assert response.detailed_budget is None + + def test__given_running_report__then_all_output_fields_are_none(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations( + session, status=ReportStatus.RUNNING + ) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.status == ReportStatus.RUNNING + assert response.poverty is None + assert response.inequality is None + + +# --------------------------------------------------------------------------- +# _build_response — poverty +# --------------------------------------------------------------------------- + + +class TestBuildResponsePoverty: + """Tests for poverty records in _build_response output.""" + + def test__given_completed_report_with_poverty__then_poverty_list_not_empty( + self, session + ): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_poverty_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.poverty is not None + assert len(response.poverty) > 0 + + def test__given_poverty_records__then_each_has_poverty_type(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_poverty_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for p in response.poverty: + assert p.poverty_type in SAMPLE_POVERTY_TYPES + + def test__given_poverty_by_age_records__then_filter_variable_is_set(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_poverty_by_age_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.poverty is not None + filter_vars = {p.filter_variable for p in response.poverty} + assert "is_child" in filter_vars + assert "is_adult" in filter_vars + assert "is_SP_age" in filter_vars + + def test__given_poverty_records__then_rate_is_headcount_over_population( + self, session + ): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_poverty_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for p in response.poverty: + expected_rate = p.headcount / p.total_population + assert abs(p.rate - expected_rate) < 1e-9 + + +# --------------------------------------------------------------------------- +# _build_response — inequality +# --------------------------------------------------------------------------- + + +class TestBuildResponseInequality: + """Tests for inequality records in _build_response output.""" + + def test__given_completed_report_with_inequality__then_two_records(self, session): + # Given — one for baseline, one for reform + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_inequality_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.inequality is not None + assert len(response.inequality) == 2 + + def test__given_inequality_records__then_gini_matches_input(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_inequality_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for ineq in response.inequality: + assert ineq.gini == SAMPLE_GINI + assert ineq.top_10_share == SAMPLE_TOP_10_SHARE + assert ineq.top_1_share == SAMPLE_TOP_1_SHARE + assert ineq.bottom_50_share == SAMPLE_BOTTOM_50_SHARE + + def test__given_inequality_records__then_income_variable_set(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_inequality_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for ineq in response.inequality: + assert ineq.income_variable == SAMPLE_INEQUALITY_INCOME_VAR + + +# --------------------------------------------------------------------------- +# _build_response — budget_summary +# --------------------------------------------------------------------------- + + +class TestBuildResponseBudgetSummary: + """Tests for budget_summary records in _build_response output.""" + + def test__given_completed_report_with_budget__then_correct_count(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_budget_summary_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.budget_summary is not None + assert len(response.budget_summary) == len(BUDGET_VARIABLES_UK) + + def test__given_budget_records__then_change_equals_reform_minus_baseline( + self, session + ): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_budget_summary_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for b in response.budget_summary: + expected_change = b.reform_total - b.baseline_total + assert abs(b.change - expected_change) < 1e-9 + + def test__given_budget_records__then_variable_names_match_uk_set(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_budget_summary_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + var_names = {b.variable_name for b in response.budget_summary} + expected_names = {name for name, _ in BUDGET_VARIABLES_UK} + assert var_names == expected_names + + +# --------------------------------------------------------------------------- +# _build_response — intra_decile +# --------------------------------------------------------------------------- + + +class TestBuildResponseIntraDecile: + """Tests for intra_decile records in _build_response output.""" + + def test__given_completed_report_with_intra_decile__then_11_records(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_intra_decile_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.intra_decile is not None + assert len(response.intra_decile) == INTRA_DECILE_DECILE_COUNT + + def test__given_intra_decile_records__then_decile_0_present_for_overall( + self, session + ): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_intra_decile_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + decile_numbers = {r.decile for r in response.intra_decile} + assert 0 in decile_numbers # overall row + + def test__given_intra_decile_records__then_proportions_sum_to_one(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_intra_decile_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for r in response.intra_decile: + total = ( + r.lose_more_than_5pct + + r.lose_less_than_5pct + + r.no_change + + r.gain_less_than_5pct + + r.gain_more_than_5pct + ) + assert abs(total - 1.0) < 1e-9 + + +# --------------------------------------------------------------------------- +# _build_response — program_statistics & detailed_budget +# --------------------------------------------------------------------------- + + +class TestBuildResponseProgramStatistics: + """Tests for program_statistics and detailed_budget in _build_response.""" + + def test__given_completed_report_with_programs__then_correct_count(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_program_statistics_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.program_statistics is not None + assert len(response.program_statistics) == UK_PROGRAM_COUNT + + def test__given_uk_programs__then_all_10_programs_present(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_program_statistics_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + program_names = {s.program_name for s in response.program_statistics} + assert program_names == set(UK_PROGRAMS.keys()) + + def test__given_program_records__then_detailed_budget_has_same_keys(self, session): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_program_statistics_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.detailed_budget is not None + assert set(response.detailed_budget.keys()) == set(UK_PROGRAMS.keys()) + + def test__given_program_records__then_detailed_budget_has_baseline_reform_difference( + self, session + ): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_program_statistics_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + for prog_name, entry in response.detailed_budget.items(): + assert "baseline" in entry + assert "reform" in entry + assert "difference" in entry + + def test__given_program_records__then_detailed_budget_difference_matches_change( + self, session + ): + # Given + report, baseline_sim, reform_sim = create_report_with_simulations(session) + add_program_statistics_records(session, report, baseline_sim, reform_sim) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then — difference should equal reform - baseline (from ProgramStatistics.change) + for prog_name, entry in response.detailed_budget.items(): + expected_diff = entry["reform"] - entry["baseline"] + assert abs(entry["difference"] - expected_diff) < 1e-9 + + def test__given_no_program_records__then_detailed_budget_is_empty_dict( + self, session + ): + # Given — completed report with no program statistics + report, baseline_sim, reform_sim = create_report_with_simulations(session) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.detailed_budget == {} + + def test__given_program_with_nan_values__then_detailed_budget_has_none( + self, session + ): + # Given + from policyengine_api.models import ProgramStatistics + + report, baseline_sim, reform_sim = create_report_with_simulations(session) + rec = ProgramStatistics( + baseline_simulation_id=baseline_sim.id, + reform_simulation_id=reform_sim.id, + report_id=report.id, + program_name="test_program", + entity="person", + is_tax=True, + baseline_total=float("nan"), + reform_total=float("nan"), + change=float("nan"), + baseline_count=0.0, + reform_count=0.0, + winners=0.0, + losers=0.0, + ) + session.add(rec) + session.commit() + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.detailed_budget["test_program"]["baseline"] is None + assert response.detailed_budget["test_program"]["reform"] is None + assert response.detailed_budget["test_program"]["difference"] is None + + +# --------------------------------------------------------------------------- +# _build_response — fully populated report +# --------------------------------------------------------------------------- + + +class TestBuildResponseFullyPopulated: + """Tests for _build_response with all output tables populated.""" + + def test__given_fully_populated_report__then_all_fields_present(self, session): + # Given + report, baseline_sim, reform_sim = create_fully_populated_report(session) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.status == ReportStatus.COMPLETED + assert response.poverty is not None + assert response.inequality is not None + assert response.budget_summary is not None + assert response.intra_decile is not None + assert response.program_statistics is not None + assert response.detailed_budget is not None + + def test__given_fully_populated_report__then_report_id_matches(self, session): + # Given + report, baseline_sim, reform_sim = create_fully_populated_report(session) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.report_id == report.id + + def test__given_fully_populated_report__then_simulation_ids_match(self, session): + # Given + report, baseline_sim, reform_sim = create_fully_populated_report(session) + + # When + response = _build_response(report, baseline_sim, reform_sim, session) + + # Then + assert response.baseline_simulation.id == baseline_sim.id + assert response.reform_simulation.id == reform_sim.id diff --git a/tests/test_intra_decile.py b/tests/test_intra_decile.py new file mode 100644 index 0000000..5e107e7 --- /dev/null +++ b/tests/test_intra_decile.py @@ -0,0 +1,311 @@ +"""Tests for intra-decile income change computation.""" + +import numpy as np + +from policyengine_api.api.intra_decile import ( + CATEGORY_COLUMNS, + _income_change_corrected, + _income_change_v1_original, + compute_intra_decile, + get_income_change_formula, +) +from test_fixtures.fixtures_intra_decile import ( + CATEGORY_NAMES, + EXPECTED_DECILE_NUMBERS, + EXPECTED_ROW_COUNT, + make_baseline_income, + make_household_data, + make_single_household_arrays, +) + + +# --------------------------------------------------------------------------- +# Income change formula variants +# --------------------------------------------------------------------------- + + +class TestIncomeChangeFormulas: + """Tests for the two income change formula variants.""" + + def test__given_both_incomes_above_1__when_v1_formula__then_doubles_percentage( + self, + ): + # Given + baseline, reform = make_single_household_arrays(100.0, 103.0) + + # When + result = _income_change_v1_original(baseline, reform) + + # Then — V1 produces ~6% instead of 3% + assert abs(result[0] - 0.06) < 1e-9 + + def test__given_both_incomes_above_1__when_corrected_formula__then_correct_percentage( + self, + ): + # Given + baseline, reform = make_single_household_arrays(100.0, 103.0) + + # When + result = _income_change_corrected(baseline, reform) + + # Then + assert abs(result[0] - 0.03) < 1e-9 + + def test__given_zero_baseline__when_corrected_formula__then_caps_denominator_at_1( + self, + ): + # Given + baseline, reform = make_single_household_arrays(0.0, 10.0) + + # When + result = _income_change_corrected(baseline, reform) + + # Then — denominator capped at 1, so change = (10 - 0) / 1 = 10.0 + assert abs(result[0] - 10.0) < 1e-9 + + def test__given_negative_baseline__when_corrected_formula__then_caps_denominator_at_1( + self, + ): + # Given + baseline, reform = make_single_household_arrays(-5.0, 5.0) + + # When + result = _income_change_corrected(baseline, reform) + + # Then — denominator capped at 1, change = (5 - (-5)) / 1 = 10.0 + assert abs(result[0] - 10.0) < 1e-9 + + def test__given_identical_incomes__when_v1_formula__then_zero_change(self): + # Given + baseline, reform = make_single_household_arrays(50_000.0, 50_000.0) + + # When + result = _income_change_v1_original(baseline, reform) + + # Then + assert result[0] == 0.0 + + def test__given_identical_incomes__when_corrected_formula__then_zero_change(self): + # Given + baseline, reform = make_single_household_arrays(50_000.0, 50_000.0) + + # When + result = _income_change_corrected(baseline, reform) + + # Then + assert result[0] == 0.0 + + def test__given_strategy_selector__then_returns_corrected_formula(self): + # When + formula = get_income_change_formula() + + # Then + assert formula is _income_change_corrected + + +# --------------------------------------------------------------------------- +# compute_intra_decile — structure +# --------------------------------------------------------------------------- + + +class TestComputeIntraDecileStructure: + """Tests for the shape and structure of compute_intra_decile output.""" + + def test__given_any_input__then_returns_11_rows(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + assert len(rows) == EXPECTED_ROW_COUNT + + def test__given_any_input__then_decile_numbers_are_1_through_10_plus_0(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income) + + # When + rows = compute_intra_decile(baseline, reform) + decile_numbers = [r["decile"] for r in rows] + + # Then + assert decile_numbers == EXPECTED_DECILE_NUMBERS + + def test__given_any_input__then_each_row_has_all_category_columns(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + for col in CATEGORY_NAMES: + assert col in row, f"Missing column {col} in row for decile {row['decile']}" + + def test__given_any_input__then_proportions_sum_to_approximately_one_per_decile( + self, + ): + # Given — a mix of changes so multiple categories are populated + income = make_baseline_income() + reform_income = income * np.where( + np.arange(len(income)) % 3 == 0, 1.03, 0.97 + ) + baseline, reform = make_household_data(income, reform_income) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + total = sum(row[col] for col in CATEGORY_NAMES) + assert abs(total - 1.0) < 1e-9, ( + f"Decile {row['decile']} proportions sum to {total}, expected 1.0" + ) + + def test__given_overall_row__then_is_mean_of_decile_proportions(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 1.03) + + # When + rows = compute_intra_decile(baseline, reform) + decile_rows = [r for r in rows if r["decile"] != 0] + overall_row = [r for r in rows if r["decile"] == 0][0] + + # Then + for col in CATEGORY_NAMES: + expected_mean = sum(r[col] for r in decile_rows) / 10 + assert abs(overall_row[col] - expected_mean) < 1e-9 + + +# --------------------------------------------------------------------------- +# compute_intra_decile — classification +# --------------------------------------------------------------------------- + + +class TestComputeIntraDecileClassification: + """Tests for correct classification of income changes into categories.""" + + def test__given_no_income_change__then_all_in_no_change_category(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income, income) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["no_change"] == 1.0 + assert row["gain_less_than_5pct"] == 0.0 + assert row["gain_more_than_5pct"] == 0.0 + assert row["lose_less_than_5pct"] == 0.0 + assert row["lose_more_than_5pct"] == 0.0 + + def test__given_uniform_3pct_raise__then_all_in_gain_less_than_5pct(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 1.03) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["gain_less_than_5pct"] == 1.0 + + def test__given_uniform_10pct_raise__then_all_in_gain_more_than_5pct(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 1.10) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["gain_more_than_5pct"] == 1.0 + + def test__given_uniform_3pct_loss__then_all_in_lose_less_than_5pct(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 0.97) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["lose_less_than_5pct"] == 1.0 + + def test__given_uniform_10pct_loss__then_all_in_lose_more_than_5pct(self): + # Given + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 0.90) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["lose_more_than_5pct"] == 1.0 + + def test__given_boundary_at_exactly_5pct_gain__then_in_gain_less_than_5pct(self): + # Given — BOUNDS uses (lower, upper], so exactly 0.05 falls in gain_less_than_5pct + # because the gain_less_than_5pct interval is (1e-3, 0.05] + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 1.05) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["gain_less_than_5pct"] == 1.0 + + def test__given_boundary_at_exactly_0_1pct_gain__then_in_no_change(self): + # Given — exactly 0.001 falls in no_change because the no_change + # interval is (-1e-3, 1e-3] and 0.001 == 1e-3 which is the upper bound + income = make_baseline_income() + baseline, reform = make_household_data(income, income * 1.001) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then + for row in rows: + assert row["no_change"] == 1.0 + + +# --------------------------------------------------------------------------- +# compute_intra_decile — edge cases +# --------------------------------------------------------------------------- + + +class TestComputeIntraDecileEdgeCases: + """Tests for edge cases in compute_intra_decile.""" + + def test__given_zero_people_in_decile__then_proportions_are_zero(self): + # Given — remove all households from decile 5 by setting their weight to 0 + income = make_baseline_income() + weights = np.ones(len(income)) * 100.0 + people = np.full(len(income), 2.0) + # Decile 5 is indices 40-49 + people[40:50] = 0.0 + + baseline, reform = make_household_data( + income, income * 1.03, weights=weights, people=people + ) + + # When + rows = compute_intra_decile(baseline, reform) + + # Then — decile 5 should have all-zero proportions + decile_5 = [r for r in rows if r["decile"] == 5][0] + for col in CATEGORY_NAMES: + assert decile_5[col] == 0.0