diff --git a/us/states/sc/data_exploration.ipynb b/us/states/sc/data_exploration.ipynb new file mode 100644 index 0000000..e6978a6 --- /dev/null +++ b/us/states/sc/data_exploration.ipynb @@ -0,0 +1,479 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SC Dataset Exploration\n", + "\n", + "This notebook explores the South Carolina (SC) dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/states/SC.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load SC dataset\n", + "sim = Microsimulation(dataset=SC_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 35,324\n", + "Household count (weighted): 1,887,388\n", + "Person count (weighted): 5,451,832\n" + ] + } + ], + "source": [ + "# Check dataset size - use .values to get raw arrays (avoid MicroSeries auto-weighting)\n", + "household_weight = sim.calculate(\"household_weight\", period=2025).values\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\").values\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\").values\n", + "\n", + "# Weighted sums using raw arrays\n", + "weighted_household_count = (household_count * household_weight).sum()\n", + "weighted_person_count = (person_count * household_weight).sum()\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {weighted_household_count:,.0f}\")\n", + "print(f\"Person count (weighted): {weighted_person_count:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "INCOME DISTRIBUTION SUMMARY\n", + "============================================================\n", + "\n", + "Household AGI:\n", + " Unweighted median: $41,884\n", + " Weighted median: $43,222\n", + " Weighted average: $103,858\n", + "\n", + "Person AGI:\n", + " Unweighted median: $40,216\n", + " Weighted median: $38,962\n", + " Weighted average: $93,926\n", + "\n", + "Average household size: 2.9\n", + "\n", + "Weighted household AGI percentiles:\n", + " 25th percentile: $9,425\n", + " 50th percentile: $43,222\n", + " 75th percentile: $91,877\n", + " 90th percentile: $167,068\n", + " 95th percentile: $268,311\n", + " Max AGI: $6,430,892\n" + ] + } + ], + "source": [ + "# Check income distribution (weighted vs unweighted, household and person level)\n", + "# Use .values to get raw numpy arrays (avoid MicroSeries auto-weighting)\n", + "agi_hh_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "hh_weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "agi_person_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"person\").values\n", + "person_weights = sim.calculate(\"person_weight\", period=2025).values\n", + "\n", + "# Weighted percentile calculation\n", + "def weighted_percentile(values, weights, percentile):\n", + " sorted_indices = np.argsort(values)\n", + " sorted_values = values[sorted_indices]\n", + " sorted_weights = weights[sorted_indices]\n", + " cumulative_weight = np.cumsum(sorted_weights)\n", + " idx = np.searchsorted(cumulative_weight, cumulative_weight[-1] * percentile / 100)\n", + " return sorted_values[min(idx, len(sorted_values)-1)]\n", + "\n", + "# Unweighted medians\n", + "unweighted_median_hh = np.median(agi_hh_array)\n", + "unweighted_median_person = np.median(agi_person_array)\n", + "\n", + "# Weighted medians\n", + "weighted_median_hh = weighted_percentile(agi_hh_array, hh_weights, 50)\n", + "weighted_median_person = weighted_percentile(agi_person_array, person_weights, 50)\n", + "\n", + "# Weighted averages\n", + "weighted_avg_hh = np.average(agi_hh_array, weights=hh_weights)\n", + "weighted_avg_person = np.average(agi_person_array, weights=person_weights)\n", + "\n", + "# Average household size\n", + "total_persons = person_weights.sum()\n", + "total_households = hh_weights.sum()\n", + "avg_hh_size = total_persons / total_households\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"INCOME DISTRIBUTION SUMMARY\")\n", + "print(\"=\" * 60)\n", + "print(f\"\\nHousehold AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_hh:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_hh:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_hh:,.0f}\")\n", + "\n", + "print(f\"\\nPerson AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_person:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_person:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_person:,.0f}\")\n", + "\n", + "print(f\"\\nAverage household size: {avg_hh_size:.1f}\")\n", + "\n", + "print(f\"\\nWeighted household AGI percentiles:\")\n", + "print(f\" 25th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\")\n", + "print(f\" 50th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 50):,.0f}\")\n", + "print(f\" 75th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\")\n", + "print(f\" 90th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\")\n", + "print(f\" 95th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\")\n", + "print(f\" Max AGI: ${agi_hh_array.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 598,564\n", + " Households with 1 child: 247,956\n", + " Households with 2 children: 190,545\n", + " Households with 3+ children: 160,063\n" + ] + } + ], + "source": [ + "# Check households with children - use .values for raw arrays\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\").values\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\").values\n", + "household_weight_person = sim.calculate(\"household_weight\", period=2025, map_to=\"person\").values\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight_person\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,198,147\n", + " Children under 6: 349,101\n", + " Children under 3: 169,412\n" + ] + } + ], + "source": [ + "# Check children by age groups - use .values for raw arrays\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\").values,\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\").values,\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\").values,\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\").values,\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\").values\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=================================================================\n", + "SC DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "=================================================================\n", + " Metric Value\n", + " Household count (weighted) 1,887,388\n", + " Person count (weighted) 5,451,832\n", + " Average household size 2.9\n", + " Weighted median household AGI $43,222\n", + " Weighted average household AGI $103,858\n", + " Weighted median person AGI $38,962\n", + " Weighted average person AGI $93,926\n", + "Unweighted median household AGI $41,884\n", + " Unweighted median person AGI $40,216\n", + " 25th percentile household AGI $9,425\n", + " 75th percentile household AGI $91,877\n", + " 90th percentile household AGI $167,068\n", + " 95th percentile household AGI $268,311\n", + " Max household AGI $6,430,892\n", + " Total households with children 598,564\n", + " Households with 1 child 247,956\n", + " Households with 2 children 190,545\n", + " Households with 3+ children 160,063\n", + " Total children under 18 1,198,147\n", + " Children under 6 349,101\n", + " Children under 3 169,412\n", + "=================================================================\n", + "\n", + "Summary saved to: sc_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create comprehensive summary table\n", + "summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Average household size',\n", + " 'Weighted median household AGI',\n", + " 'Weighted average household AGI',\n", + " 'Weighted median person AGI',\n", + " 'Weighted average person AGI',\n", + " 'Unweighted median household AGI',\n", + " 'Unweighted median person AGI',\n", + " '25th percentile household AGI',\n", + " '75th percentile household AGI',\n", + " '90th percentile household AGI',\n", + " '95th percentile household AGI',\n", + " 'Max household AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6',\n", + " 'Children under 3'\n", + " ],\n", + " 'Value': [\n", + " f\"{weighted_household_count:,.0f}\",\n", + " f\"{weighted_person_count:,.0f}\",\n", + " f\"{avg_hh_size:.1f}\",\n", + " f\"${weighted_median_hh:,.0f}\",\n", + " f\"${weighted_avg_hh:,.0f}\",\n", + " f\"${weighted_median_person:,.0f}\",\n", + " f\"${weighted_avg_person:,.0f}\",\n", + " f\"${unweighted_median_hh:,.0f}\",\n", + " f\"${unweighted_median_person:,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\",\n", + " f\"${agi_hh_array.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\",\n", + " f\"{children_under_3:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "summary_df = pd.DataFrame(summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*65)\n", + "print(\"SC DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*65)\n", + "print(summary_df.to_string(index=False))\n", + "print(\"=\"*65)\n", + "\n", + "# Save table\n", + "summary_df.to_csv('sc_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: sc_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 179,119\n", + "Percentage of all households: 9.49%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income - using raw arrays\n", + "agi_hh = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 434,505 23.02%\n", + " $10k-$20k 155,370 8.23%\n", + " $20k-$30k 149,595 7.93%\n", + " $30k-$40k 115,365 6.11%\n", + " $40k-$50k 127,566 6.76%\n", + " $50k-$60k 110,405 5.85%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 1,092,805\n", + "Percentage of all households in $0-$60k range: 57.90%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/sc/data_exploration_test.ipynb b/us/states/sc/data_exploration_test.ipynb new file mode 100644 index 0000000..7f6b103 --- /dev/null +++ b/us/states/sc/data_exploration_test.ipynb @@ -0,0 +1,489 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC Dataset Exploration (Test - March 2025)\n", + "\n", + "This notebook explores the South Carolina (SC) **test** dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/test/mar/SC.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [], + "source": [ + "# Load SC test dataset\n", + "sim = Microsimulation(dataset=SC_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 31,322\n", + "Household count (weighted): 1,844,111\n", + "Person count (weighted): 5,389,226\n" + ] + } + ], + "source": [ + "# Check dataset size - use .values to get raw arrays (avoid MicroSeries auto-weighting)\n", + "household_weight = sim.calculate(\"household_weight\", period=2025).values\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\").values\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\").values\n", + "\n", + "# Weighted sums using raw arrays\n", + "weighted_household_count = (household_count * household_weight).sum()\n", + "weighted_person_count = (person_count * household_weight).sum()\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {weighted_household_count:,.0f}\")\n", + "print(f\"Person count (weighted): {weighted_person_count:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "INCOME DISTRIBUTION SUMMARY\n", + "============================================================\n", + "\n", + "Household AGI:\n", + " Unweighted median: $57,308\n", + " Weighted median: $34,927\n", + " Weighted average: $74,061\n", + "\n", + "Person AGI:\n", + " Unweighted median: $58,750\n", + " Weighted median: $34,911\n", + " Weighted average: $78,962\n", + "\n", + "Average household size: 2.9\n", + "\n", + "Weighted household AGI percentiles:\n", + " 25th percentile: $2,489\n", + " 50th percentile: $34,927\n", + " 75th percentile: $86,301\n", + " 90th percentile: $140,239\n", + " 95th percentile: $236,759\n", + " Max AGI: $418,650,960\n" + ] + } + ], + "source": [ + "# Check income distribution (weighted vs unweighted, household and person level)\n", + "# Use .values to get raw numpy arrays (avoid MicroSeries auto-weighting)\n", + "agi_hh_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "hh_weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "agi_person_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"person\").values\n", + "person_weights = sim.calculate(\"person_weight\", period=2025).values\n", + "\n", + "# Weighted percentile calculation\n", + "def weighted_percentile(values, weights, percentile):\n", + " sorted_indices = np.argsort(values)\n", + " sorted_values = values[sorted_indices]\n", + " sorted_weights = weights[sorted_indices]\n", + " cumulative_weight = np.cumsum(sorted_weights)\n", + " idx = np.searchsorted(cumulative_weight, cumulative_weight[-1] * percentile / 100)\n", + " return sorted_values[min(idx, len(sorted_values)-1)]\n", + "\n", + "# Unweighted medians\n", + "unweighted_median_hh = np.median(agi_hh_array)\n", + "unweighted_median_person = np.median(agi_person_array)\n", + "\n", + "# Weighted medians\n", + "weighted_median_hh = weighted_percentile(agi_hh_array, hh_weights, 50)\n", + "weighted_median_person = weighted_percentile(agi_person_array, person_weights, 50)\n", + "\n", + "# Weighted averages\n", + "weighted_avg_hh = np.average(agi_hh_array, weights=hh_weights)\n", + "weighted_avg_person = np.average(agi_person_array, weights=person_weights)\n", + "\n", + "# Average household size\n", + "total_persons = person_weights.sum()\n", + "total_households = hh_weights.sum()\n", + "avg_hh_size = total_persons / total_households\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"INCOME DISTRIBUTION SUMMARY\")\n", + "print(\"=\" * 60)\n", + "print(f\"\\nHousehold AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_hh:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_hh:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_hh:,.0f}\")\n", + "\n", + "print(f\"\\nPerson AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_person:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_person:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_person:,.0f}\")\n", + "\n", + "print(f\"\\nAverage household size: {avg_hh_size:.1f}\")\n", + "\n", + "print(f\"\\nWeighted household AGI percentiles:\")\n", + "print(f\" 25th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\")\n", + "print(f\" 50th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 50):,.0f}\")\n", + "print(f\" 75th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\")\n", + "print(f\" 90th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\")\n", + "print(f\" 95th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\")\n", + "print(f\" Max AGI: ${agi_hh_array.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 663,513\n", + " Households with 1 child: 303,647\n", + " Households with 2 children: 209,804\n", + " Households with 3+ children: 150,062\n" + ] + } + ], + "source": [ + "# Check households with children - use .values for raw arrays\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\").values\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\").values\n", + "household_weight_person = sim.calculate(\"household_weight\", period=2025, map_to=\"person\").values\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight_person\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,247,050\n", + " Children under 6: 361,890\n", + " Children under 3: 177,869\n" + ] + } + ], + "source": [ + "# Check children by age groups - use .values for raw arrays\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\").values,\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\").values,\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\").values,\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\").values,\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\").values\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=================================================================\n", + "SC TEST DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "=================================================================\n", + " Metric Value\n", + " Household count (weighted) 1,844,111\n", + " Person count (weighted) 5,389,226\n", + " Average household size 2.9\n", + " Weighted median household AGI $34,927\n", + " Weighted average household AGI $74,061\n", + " Weighted median person AGI $34,911\n", + " Weighted average person AGI $78,962\n", + "Unweighted median household AGI $57,308\n", + " Unweighted median person AGI $58,750\n", + " 25th percentile household AGI $2,489\n", + " 75th percentile household AGI $86,301\n", + " 90th percentile household AGI $140,239\n", + " 95th percentile household AGI $236,759\n", + " Max household AGI $418,650,960\n", + " Total households with children 663,513\n", + " Households with 1 child 303,647\n", + " Households with 2 children 209,804\n", + " Households with 3+ children 150,062\n", + " Total children under 18 1,247,050\n", + " Children under 6 361,890\n", + " Children under 3 177,869\n", + "=================================================================\n", + "\n", + "Summary saved to: sc_test_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create comprehensive summary table\n", + "summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Average household size',\n", + " 'Weighted median household AGI',\n", + " 'Weighted average household AGI',\n", + " 'Weighted median person AGI',\n", + " 'Weighted average person AGI',\n", + " 'Unweighted median household AGI',\n", + " 'Unweighted median person AGI',\n", + " '25th percentile household AGI',\n", + " '75th percentile household AGI',\n", + " '90th percentile household AGI',\n", + " '95th percentile household AGI',\n", + " 'Max household AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6',\n", + " 'Children under 3'\n", + " ],\n", + " 'Value': [\n", + " f\"{weighted_household_count:,.0f}\",\n", + " f\"{weighted_person_count:,.0f}\",\n", + " f\"{avg_hh_size:.1f}\",\n", + " f\"${weighted_median_hh:,.0f}\",\n", + " f\"${weighted_avg_hh:,.0f}\",\n", + " f\"${weighted_median_person:,.0f}\",\n", + " f\"${weighted_avg_person:,.0f}\",\n", + " f\"${unweighted_median_hh:,.0f}\",\n", + " f\"${unweighted_median_person:,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\",\n", + " f\"${agi_hh_array.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\",\n", + " f\"{children_under_3:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "summary_df = pd.DataFrame(summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*65)\n", + "print(\"SC TEST DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*65)\n", + "print(summary_df.to_string(index=False))\n", + "print(\"=\"*65)\n", + "\n", + "# Save table\n", + "summary_df.to_csv('sc_test_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: sc_test_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 227,976\n", + "Percentage of all households: 12.36%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income - using raw arrays\n", + "agi_hh = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 553,836 30.03%\n", + " $10k-$20k 158,183 8.58%\n", + " $20k-$30k 131,263 7.12%\n", + " $30k-$40k 113,749 6.17%\n", + " $40k-$50k 101,002 5.48%\n", + " $50k-$60k 98,917 5.36%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 1,156,950\n", + "Percentage of all households in $0-$60k range: 62.74%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/5.21_rate/rfa_h4216_5.21_analysis.csv b/us/states/sc/h4216_analysis/5.21_rate/rfa_h4216_5.21_analysis.csv new file mode 100644 index 0000000..a150bea --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/rfa_h4216_5.21_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,78854,2.9%,$50,$42,1080,1.4%,$3683,$3062,$-622,$-671000,576,0.7%,$-704000,$-1222,504,0.6%,$34000,$68,77774,98.6%,77824,98.7% +$1 to $10000,286253,10.4%,$3,$9,43699,15.3%,$20,$58,$38,$1653000,834,0.3%,$-78000,$-94,42865,15.0%,$1731000,$40,242554,84.7%,243249,85.0% +$10001 to $20000,310122,11.2%,$16,$26,75652,24.4%,$67,$105,$38,$2867000,5591,1.8%,$-363000,$-65,70060,22.6%,$3230000,$46,234471,75.6%,235107,75.8% +$20001 to $30000,275560,10.0%,$107,$110,140713,51.1%,$210,$216,$5,$762000,51551,18.7%,$-2682000,$-52,89162,32.4%,$3444000,$39,134847,48.9%,134332,48.7% +$30001 to $40000,269566,9.8%,$288,$216,160474,59.5%,$483,$362,$-121,$-19416000,131752,48.9%,$-21120000,$-160,28722,10.7%,$1704000,$59,109091,40.5%,110638,41.0% +$40001 to $50000,234386,8.5%,$569,$388,174125,74.3%,$767,$522,$-244,$-42568000,127554,54.4%,$-46871000,$-367,46572,19.9%,$4303000,$92,60260,25.7%,61891,26.4% +$50001 to $75000,407593,14.8%,$1192,$971,351754,86.3%,$1381,$1125,$-256,$-89935000,287674,70.6%,$-101116000,$-351,64080,15.7%,$11181000,$174,55839,13.7%,61960,15.2% +$75001 to $100000,250437,9.1%,$2020,$1826,225194,89.9%,$2246,$2030,$-216,$-48624000,177430,70.8%,$-61900000,$-349,47764,19.1%,$13276000,$278,25243,10.1%,27729,11.1% +$100001 to $150000,298343,10.8%,$3258,$3171,289948,97.2%,$3352,$3262,$-90,$-26092000,199040,66.7%,$-58517000,$-294,90908,30.5%,$32425000,$357,8395,2.8%,9188,3.1% +$150001 to $200000,143398,5.2%,$5518,$5684,141433,98.6%,$5595,$5763,$168,$23766000,61936,43.2%,$-14937000,$-241,79497,55.4%,$38703000,$487,1965,1.4%,1459,1.0% +$200001 to $300000,109340,4.0%,$8741,$8777,108016,98.8%,$8848,$8885,$37,$3955000,63636,58.2%,$-27603000,$-434,44380,40.6%,$31558000,$711,1324,1.2%,945,0.9% +$300001 to $500000,56123,2.0%,$14926,$14355,55090,98.2%,$15206,$14624,$-582,$-32054000,42933,76.5%,$-47609000,$-1109,12157,21.7%,$15555000,$1280,1032,1.8%,762,1.4% +$500001 to $1000000,25664,0.9%,$25969,$24512,24758,96.5%,$26919,$25410,$-1510,$-37381000,19803,77.2%,$-51185000,$-2585,4955,19.3%,$13804000,$2786,906,3.5%,684,2.7% +Over $1000000,11936,0.4%,$78228,$74458,11159,93.5%,$83671,$79639,$-4031,$-44989000,8693,72.8%,$-87454000,$-10060,2466,20.7%,$42465000,$17221,776,6.5%,703,5.9% +Total,2757573,100.0%,$2321,$2209,1803095,65.4%,$3549,$3378,$-171,$-308700000,1179002,42.8%,$-522100000,$-443,624092,22.6%,$213400000,$342,954478,34.6%,966471,35.0% diff --git a/us/states/sc/h4216_analysis/5.21_rate/state/pe_h4216_5.21_state_analysis.csv b/us/states/sc/h4216_analysis/5.21_rate/state/pe_h4216_5.21_state_analysis.csv new file mode 100644 index 0000000..5d7b774 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/state/pe_h4216_5.21_state_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,619010,21.1%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,619010,100.0%,619010,100.0% +$1 to $10000,502276,17.1%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,502276,100.0%,502276,100.0% +$10001 to $20000,279412,9.5%,$0,$10,53961,19.3%,$0,$50,$50,$2672942,0,0.0%,$0,$0,53961,19.3%,$2672922,$50,225451,80.7%,225413,80.7% +$20001 to $30000,252863,8.6%,$64,$101,136052,53.8%,$119,$188,$68,$9294693,5029,2.0%,$-40734,$-8,131023,51.8%,$9335378,$71,116811,46.2%,116751,46.2% +$30001 to $40000,215980,7.4%,$225,$200,135926,62.9%,$356,$316,$-40,$-5431497,88710,41.1%,$-8472465,$-96,47216,21.9%,$3040994,$64,80055,37.1%,79265,36.7% +$40001 to $50000,197525,6.7%,$547,$404,152733,77.3%,$706,$522,$-184,$-28145982,99989,50.6%,$-34226980,$-342,52744,26.7%,$6080948,$115,44792,22.7%,44131,22.3% +$50001 to $75000,300857,10.2%,$822,$722,254734,84.7%,$971,$853,$-118,$-30064724,164685,54.7%,$-45636192,$-277,90049,29.9%,$15571469,$173,46123,15.3%,46125,15.3% +$75001 to $100000,177284,6.0%,$1781,$1631,168284,94.9%,$1876,$1718,$-157,$-26475178,128443,72.5%,$-39583444,$-308,39841,22.5%,$13108268,$329,9000,5.1%,9124,5.1% +$100001 to $150000,187946,6.4%,$3292,$3387,186839,99.4%,$3311,$3407,$96,$17889888,111928,59.6%,$-22415936,$-200,74911,39.9%,$40305824,$538,1107,0.6%,1105,0.6% +$150001 to $200000,73396,2.5%,$6049,$6413,73395,100.0%,$6049,$6412,$363,$26678432,14400,19.6%,$-3249580,$-226,58996,80.4%,$29928012,$507,1,0.0%,0,0.0% +$200001 to $300000,52882,1.8%,$9164,$9358,52878,100.0%,$9164,$9358,$194,$10258680,21154,40.0%,$-5374373,$-254,31724,60.0%,$15633049,$493,4,0.0%,0,0.0% +$300001 to $500000,36977,1.3%,$17163,$16717,36977,100.0%,$17163,$16717,$-447,$-16518335,28313,76.6%,$-27952982,$-987,8664,23.4%,$11434646,$1320,0,0.0%,0,0.0% +$500001 to $1000000,16526,0.6%,$26140,$24911,16526,100.0%,$26140,$24911,$-1229,$-20314260,14769,89.4%,$-25823908,$-1749,1757,10.6%,$5509648,$3136,0,0.0%,0,0.0% +Over $1000000,22686,0.8%,$139623,$124950,22686,100.0%,$139623,$124950,$-14672,$-332860608,22658,99.9%,$-333138432,$-14703,29,0.1%,$277836,$9684,0,0.0%,0,0.0% +Total,2935621,100.0%,$2220,$2086,1290992,44.0%,$5048,$4744,$-304,$-393015936,700078,23.8%,$-545915008,$-780,590915,20.1%,$152898992,$259,1644629,56.0%,1643201,56.0% diff --git a/us/states/sc/h4216_analysis/5.21_rate/state/sc_h4216_5.21_state_analysis.ipynb b/us/states/sc/h4216_analysis/5.21_rate/state/sc_h4216_5.21_state_analysis.ipynb new file mode 100644 index 0000000..b58e742 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/state/sc_h4216_5.21_state_analysis.ipynb @@ -0,0 +1,550 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC H.4216 Tax Reform Analysis - 5.21% Top Rate (State Dataset)\n", + "\n", + "This notebook produces analysis in the same format as the RFA fiscal note for direct comparison.\n", + "\n", + "**Dataset:** `hf://policyengine/policyengine-us-data/states/SC.h5` (Production)\n", + "\n", + "**Reform:** H.4216 with 5.21% top rate (bill default)\n", + "\n", + "**RFA Estimate:** -$308,700,000" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/states/SC.h5\"\n", + "TAX_YEAR = 2026\n", + "TOP_RATE = 0.0521 # 5.21% top rate\n", + "RFA_ESTIMATE = -308700000" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n", + "Done!\n" + ] + } + ], + "source": [ + "def create_h4216_reform(top_rate=0.0521):\n", + " \"\"\"\n", + " SC H.4216 Reform:\n", + " - 1.99% up to $30k\n", + " - top_rate over $30k (default 5.21% for bill version)\n", + " \"\"\"\n", + " param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.contrib.states.sc.h4216.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " },\n", + " \"gov.contrib.states.sc.h4216.rates[1].rate\": {\n", + " \"2026-01-01.2100-12-31\": top_rate\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " base_reform = create_sc_h4216()\n", + " return (base_reform, param_reform)\n", + "\n", + "print(\"Loading simulations...\")\n", + "baseline = Microsimulation(dataset=SC_DATASET)\n", + "reform_sim = Microsimulation(dataset=SC_DATASET, reform=create_h4216_reform(TOP_RATE))\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tax units: 49,486\n", + "Weighted tax units: 2,935,621\n" + ] + } + ], + "source": [ + "# Get data - use .values to avoid double-weighting\n", + "baseline_tax = baseline.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "reform_tax = reform_sim.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "agi = baseline.calculate(\"adjusted_gross_income\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "weight = baseline.calculate(\"tax_unit_weight\", period=TAX_YEAR).values\n", + "\n", + "tax_change = reform_tax - baseline_tax\n", + "\n", + "print(f\"Total tax units: {len(baseline_tax):,}\")\n", + "print(f\"Weighted tax units: {weight.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bracket analysis complete!\n" + ] + } + ], + "source": [ + "# Define income brackets matching RFA exactly\n", + "income_brackets = [\n", + " (float('-inf'), 0, \"$0*\"),\n", + " (0, 10000, \"$1 to $10000\"),\n", + " (10000, 20000, \"$10001 to $20000\"),\n", + " (20000, 30000, \"$20001 to $30000\"),\n", + " (30000, 40000, \"$30001 to $40000\"),\n", + " (40000, 50000, \"$40001 to $50000\"),\n", + " (50000, 75000, \"$50001 to $75000\"),\n", + " (75000, 100000, \"$75001 to $100000\"),\n", + " (100000, 150000, \"$100001 to $150000\"),\n", + " (150000, 200000, \"$150001 to $200000\"),\n", + " (200000, 300000, \"$200001 to $300000\"),\n", + " (300000, 500000, \"$300001 to $500000\"),\n", + " (500000, 1000000, \"$500001 to $1000000\"),\n", + " (1000000, float('inf'), \"Over $1000000\")\n", + "]\n", + "\n", + "total_weight = weight.sum()\n", + "results = []\n", + "\n", + "for lower, upper, label in income_brackets:\n", + " if lower == float('-inf'):\n", + " mask = agi <= upper\n", + " elif upper == float('inf'):\n", + " mask = agi > lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Basic stats\n", + " est_returns = weight[mask].sum()\n", + " pct_returns = est_returns / total_weight * 100\n", + " \n", + " old_avg_tax = np.average(baseline_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_changed = np.average(baseline_tax[change_mask], weights=weight[change_mask])\n", + " new_avg_changed = np.average(reform_tax[change_mask], weights=weight[change_mask])\n", + " avg_change = np.average(tax_change[change_mask], weights=weight[change_mask])\n", + " else:\n", + " old_avg_changed = 0\n", + " new_avg_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est # Returns\": int(round(est_returns)),\n", + " \"Est % Returns\": f\"{pct_returns:.1f}%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(old_avg_tax))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(new_avg_tax))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change)),\n", + " \"% Returns in Range with Change\": f\"{pct_with_change:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_pct:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns)),\n", + " \"Tax Increase % in Range\": f\"{increase_pct:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns)),\n", + " \"No Change % Returns\": f\"{no_change_pct:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_pct:.1f}%\"\n", + " })\n", + "\n", + "print(\"Bracket analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Totals calculated!\n" + ] + } + ], + "source": [ + "# Calculate totals\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "decrease_mask_all = tax_change < -1\n", + "increase_mask_all = tax_change > 1\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "\n", + "total_old_avg = np.average(baseline_tax, weights=weight)\n", + "total_new_avg = np.average(reform_tax, weights=weight)\n", + "total_change_amount = (tax_change * weight).sum()\n", + "\n", + "returns_with_change_all = weight[change_mask_all].sum()\n", + "old_avg_changed_all = np.average(baseline_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "new_avg_changed_all = np.average(reform_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "avg_change_all = np.average(tax_change[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "\n", + "decrease_returns_all = weight[decrease_mask_all].sum()\n", + "total_decrease_all = (tax_change[decrease_mask_all] * weight[decrease_mask_all]).sum()\n", + "avg_decrease_all = np.average(tax_change[decrease_mask_all], weights=weight[decrease_mask_all]) if decrease_returns_all > 0 else 0\n", + "\n", + "increase_returns_all = weight[increase_mask_all].sum()\n", + "total_increase_all = (tax_change[increase_mask_all] * weight[increase_mask_all]).sum()\n", + "avg_increase_all = np.average(tax_change[increase_mask_all], weights=weight[increase_mask_all]) if increase_returns_all > 0 else 0\n", + "\n", + "no_change_returns_all = weight[no_change_mask_all].sum()\n", + "zero_tax_returns_all = weight[zero_tax_mask_all].sum()\n", + "\n", + "results.append({\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est # Returns\": int(round(total_weight)),\n", + " \"Est % Returns\": \"100.0%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(total_old_avg))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(total_new_avg))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change_all)),\n", + " \"% Returns in Range with Change\": f\"{returns_with_change_all / total_weight * 100:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed_all))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed_all))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change_all))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change_amount))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns_all)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease_all))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease_all))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns_all)),\n", + " \"Tax Increase % in Range\": f\"{increase_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase_all))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase_all))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns_all)),\n", + " \"No Change % Returns\": f\"{no_change_returns_all / total_weight * 100:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns_all)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_returns_all / total_weight * 100:.1f}%\"\n", + "})\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Totals calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H.4216 - POLICYENGINE ANALYSIS (State Dataset, 5.21% Top Rate)\n", + "====================================================================================================\n", + "\n", + "Total Returns: 2,935,621\n", + "General Fund Impact: $-393,015,936\n", + "\n", + "RFA Estimate: $-308,700,000\n", + "Difference: $-84,315,936\n", + "Accuracy: 72.7%\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Display summary\n", + "print(\"=\"*100)\n", + "print(f\"H.4216 - POLICYENGINE ANALYSIS (State Dataset, {TOP_RATE*100:.2f}% Top Rate)\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nTotal Returns: {int(total_weight):,}\")\n", + "print(f\"General Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(f\"\\nRFA Estimate: ${RFA_ESTIMATE:,}\")\n", + "print(f\"Difference: ${total_change_amount - RFA_ESTIMATE:,.0f}\")\n", + "print(f\"Accuracy: {(1 - abs(total_change_amount - RFA_ESTIMATE) / abs(RFA_ESTIMATE)) * 100:.1f}%\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exported to: pe_h4216_5.21_state_analysis.csv\n" + ] + } + ], + "source": [ + "# Export to CSV in RFA format\n", + "df_results.to_csv('pe_h4216_5.21_state_analysis.csv', index=False)\n", + "print(\"Exported to: pe_h4216_5.21_state_analysis.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "KEY METRICS:\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Total Dollar Change\n", + " $0* 619010 21.1% $0 $0 $0\n", + " $1 to $10000 502276 17.1% $0 $0 $0\n", + " $10001 to $20000 279412 9.5% $0 $10 $2672942\n", + " $20001 to $30000 252863 8.6% $64 $101 $9294693\n", + " $30001 to $40000 215980 7.4% $225 $200 $-5431497\n", + " $40001 to $50000 197525 6.7% $547 $404 $-28145982\n", + " $50001 to $75000 300857 10.2% $822 $722 $-30064724\n", + " $75001 to $100000 177284 6.0% $1781 $1631 $-26475178\n", + " $100001 to $150000 187946 6.4% $3292 $3387 $17889888\n", + " $150001 to $200000 73396 2.5% $6049 $6413 $26678432\n", + " $200001 to $300000 52882 1.8% $9164 $9358 $10258680\n", + " $300001 to $500000 36977 1.3% $17163 $16717 $-16518335\n", + "$500001 to $1000000 16526 0.6% $26140 $24911 $-20314260\n", + " Over $1000000 22686 0.8% $139623 $124950 $-332860608\n", + " Total 2935621 100.0% $2220 $2086 $-393015936\n" + ] + } + ], + "source": [ + "# Display key columns for quick comparison\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est # Returns\", \"Est % Returns\",\n", + " \"Old Avg Tax Liability\", \"New Avg Tax Liability\", \"Total Dollar Change\"\n", + "]\n", + "print(\"\\nKEY METRICS:\")\n", + "print(df_results[display_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-9", + "metadata": {}, + "source": [ + "## Side-by-Side Comparison with RFA" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "====================================================================================================\n", + "POLICYENGINE (State) vs RFA COMPARISON (5.21% Rate)\n", + "====================================================================================================\n", + " AGI Range PE Returns RFA Returns PE Impact RFA Impact Diff\n", + " $0* 619,010 78,854 $0 $-671,000 $+671,000\n", + " $1 to $10000 502,276 286,253 $0 $1,653,000 $-1,653,000\n", + " $10001 to $20000 279,412 310,122 $2,672,942 $2,867,000 $-194,058\n", + " $20001 to $30000 252,863 275,560 $9,294,693 $762,000 $+8,532,693\n", + " $30001 to $40000 215,980 269,566 $-5,431,497 $-19,416,000 $+13,984,503\n", + " $40001 to $50000 197,525 234,386 $-28,145,982 $-42,568,000 $+14,422,018\n", + " $50001 to $75000 300,857 407,593 $-30,064,724 $-89,935,000 $+59,870,276\n", + " $75001 to $100000 177,284 250,437 $-26,475,178 $-48,624,000 $+22,148,822\n", + " $100001 to $150000 187,946 298,343 $17,889,888 $-26,092,000 $+43,981,888\n", + " $150001 to $200000 73,396 143,398 $26,678,432 $23,766,000 $+2,912,432\n", + " $200001 to $300000 52,882 109,340 $10,258,680 $3,955,000 $+6,303,680\n", + " $300001 to $500000 36,977 56,123 $-16,518,335 $-32,054,000 $+15,535,665\n", + "$500001 to $1000000 16,526 25,664 $-20,314,260 $-37,381,000 $+17,066,740\n", + " Over $1000000 22,686 11,936 $-332,860,608 $-44,989,000 $-287,871,608\n", + " Total 2,935,621 2,757,573 $-393,015,936 $-308,700,000 $-84,315,936\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Load RFA data\n", + "rfa_df = pd.read_csv('../rfa_h4216_5.21_analysis.csv')\n", + "\n", + "def parse_dollar(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('$', '').replace(',', '').replace('%', ''))\n", + " return val\n", + "\n", + "# Create comparison\n", + "comparison = []\n", + "for idx, pe_row in df_results.iterrows():\n", + " agi_range = pe_row['Federal AGI Range']\n", + " rfa_match = rfa_df[rfa_df['Federal AGI Range'] == agi_range]\n", + " \n", + " pe_returns = pe_row['Est # Returns']\n", + " pe_impact = parse_dollar(pe_row['Total Dollar Change'])\n", + " \n", + " if len(rfa_match) > 0:\n", + " rfa_returns = rfa_match['Est # Returns'].values[0]\n", + " rfa_impact = parse_dollar(rfa_match['Total Dollar Change'].values[0])\n", + " else:\n", + " rfa_returns = 0\n", + " rfa_impact = 0\n", + " \n", + " comparison.append({\n", + " 'AGI Range': agi_range,\n", + " 'PE Returns': f\"{pe_returns:,}\",\n", + " 'RFA Returns': f\"{rfa_returns:,}\" if rfa_returns else \"N/A\",\n", + " 'PE Impact': f\"${pe_impact:,.0f}\",\n", + " 'RFA Impact': f\"${rfa_impact:,.0f}\" if rfa_impact else \"N/A\",\n", + " 'Diff': f\"${pe_impact - rfa_impact:+,.0f}\" if rfa_impact else \"N/A\"\n", + " })\n", + "\n", + "comparison_df = pd.DataFrame(comparison)\n", + "print(\"\\n\" + \"=\"*100)\n", + "print(\"POLICYENGINE (State) vs RFA COMPARISON (5.21% Rate)\")\n", + "print(\"=\"*100)\n", + "print(comparison_df.to_string(index=False))\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "FULL POLICYENGINE ANALYSIS (RFA Format)\n", + "========================================================================================================================\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Returns with Tax Change % Returns in Range with Change Old Avg Tax (Changed) New Avg Tax (Changed) Avg Tax Change Total Dollar Change Tax Decrease # Returns Tax Decrease % in Range Total Decrease Amount Avg Decrease Amount Tax Increase # Returns Tax Increase % in Range Total Increase Amount Avg Increase Amount No Tax Change # Returns No Change % Returns Zero Tax # Returns Zero Tax % Returns\n", + " $0* 619010 21.1% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 619010 100.0% 619010 100.0%\n", + " $1 to $10000 502276 17.1% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 502276 100.0% 502276 100.0%\n", + " $10001 to $20000 279412 9.5% $0 $10 53961 19.3% $0 $50 $50 $2672942 0 0.0% $0 $0 53961 19.3% $2672922 $50 225451 80.7% 225413 80.7%\n", + " $20001 to $30000 252863 8.6% $64 $101 136052 53.8% $119 $188 $68 $9294693 5029 2.0% $-40734 $-8 131023 51.8% $9335378 $71 116811 46.2% 116751 46.2%\n", + " $30001 to $40000 215980 7.4% $225 $200 135926 62.9% $356 $316 $-40 $-5431497 88710 41.1% $-8472465 $-96 47216 21.9% $3040994 $64 80055 37.1% 79265 36.7%\n", + " $40001 to $50000 197525 6.7% $547 $404 152733 77.3% $706 $522 $-184 $-28145982 99989 50.6% $-34226980 $-342 52744 26.7% $6080948 $115 44792 22.7% 44131 22.3%\n", + " $50001 to $75000 300857 10.2% $822 $722 254734 84.7% $971 $853 $-118 $-30064724 164685 54.7% $-45636192 $-277 90049 29.9% $15571469 $173 46123 15.3% 46125 15.3%\n", + " $75001 to $100000 177284 6.0% $1781 $1631 168284 94.9% $1876 $1718 $-157 $-26475178 128443 72.5% $-39583444 $-308 39841 22.5% $13108268 $329 9000 5.1% 9124 5.1%\n", + " $100001 to $150000 187946 6.4% $3292 $3387 186839 99.4% $3311 $3407 $96 $17889888 111928 59.6% $-22415936 $-200 74911 39.9% $40305824 $538 1107 0.6% 1105 0.6%\n", + " $150001 to $200000 73396 2.5% $6049 $6413 73395 100.0% $6049 $6412 $363 $26678432 14400 19.6% $-3249580 $-226 58996 80.4% $29928012 $507 1 0.0% 0 0.0%\n", + " $200001 to $300000 52882 1.8% $9164 $9358 52878 100.0% $9164 $9358 $194 $10258680 21154 40.0% $-5374373 $-254 31724 60.0% $15633049 $493 4 0.0% 0 0.0%\n", + " $300001 to $500000 36977 1.3% $17163 $16717 36977 100.0% $17163 $16717 $-447 $-16518335 28313 76.6% $-27952982 $-987 8664 23.4% $11434646 $1320 0 0.0% 0 0.0%\n", + "$500001 to $1000000 16526 0.6% $26140 $24911 16526 100.0% $26140 $24911 $-1229 $-20314260 14769 89.4% $-25823908 $-1749 1757 10.6% $5509648 $3136 0 0.0% 0 0.0%\n", + " Over $1000000 22686 0.8% $139623 $124950 22686 100.0% $139623 $124950 $-14672 $-332860608 22658 99.9% $-333138432 $-14703 29 0.1% $277836 $9684 0 0.0% 0 0.0%\n", + " Total 2935621 100.0% $2220 $2086 1290992 44.0% $5048 $4744 $-304 $-393015936 700078 23.8% $-545915008 $-780 590915 20.1% $152898992 $259 1644629 56.0% 1643201 56.0%\n" + ] + } + ], + "source": [ + "# Full results table\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"FULL POLICYENGINE ANALYSIS (RFA Format)\")\n", + "print(\"=\"*120)\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "print(df_results.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/5.21_rate/test/pe_h4216_5.21_analysis.csv b/us/states/sc/h4216_analysis/5.21_rate/test/pe_h4216_5.21_analysis.csv new file mode 100644 index 0000000..99fdf7e --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/test/pe_h4216_5.21_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,727881,26.9%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,727881,100.0%,727881,100.0% +$1 to $10000,498186,18.4%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,498186,100.0%,498186,100.0% +$10001 to $20000,233000,8.6%,$0,$4,16527,7.1%,$0,$51,$51,$847688,0,0.0%,$0,$0,16527,7.1%,$847510,$51,216473,92.9%,215471,92.5% +$20001 to $30000,171515,6.3%,$40,$56,48979,28.6%,$131,$187,$56,$2756262,2691,1.6%,$-22726,$-8,46288,27.0%,$2778858,$60,122536,71.4%,121168,70.6% +$30001 to $40000,157010,5.8%,$149,$135,70118,44.7%,$333,$302,$-31,$-2140517,45821,29.2%,$-3836658,$-84,24298,15.5%,$1696106,$70,86892,55.3%,86762,55.3% +$40001 to $50000,132402,4.9%,$399,$301,95777,72.3%,$548,$413,$-135,$-12939930,49802,37.6%,$-16742836,$-336,45975,34.7%,$3801853,$83,36624,27.7%,35193,26.6% +$50001 to $75000,245406,9.1%,$701,$576,205400,83.7%,$836,$687,$-149,$-30530120,139791,57.0%,$-40495060,$-290,65609,26.7%,$9964432,$152,40007,16.3%,39028,15.9% +$75001 to $100000,165713,6.1%,$1452,$1261,163885,98.9%,$1468,$1275,$-193,$-31638784,120428,72.7%,$-44416856,$-369,43457,26.2%,$12778093,$294,1828,1.1%,1759,1.1% +$100001 to $150000,225396,8.3%,$2929,$3055,220631,97.9%,$2992,$3121,$129,$28517960,121628,54.0%,$-24284616,$-200,99003,43.9%,$52802576,$533,4765,2.1%,4765,2.1% +$150001 to $200000,42792,1.6%,$5236,$5812,41448,96.9%,$5235,$5829,$595,$24642470,7562,17.7%,$-786251,$-104,33886,79.2%,$25429746,$750,1344,3.1%,0,0.0% +$200001 to $300000,55391,2.0%,$9952,$10126,55391,100.0%,$9952,$10126,$174,$9646339,25962,46.9%,$-7259498,$-280,29429,53.1%,$16905836,$574,0,0.0%,0,0.0% +$300001 to $500000,32748,1.2%,$16226,$15433,32748,100.0%,$16226,$15433,$-793,$-25961084,26670,81.4%,$-27887880,$-1046,6078,18.6%,$1926795,$317,0,0.0%,0,0.0% +$500001 to $1000000,11418,0.4%,$31912,$29003,11417,100.0%,$31913,$29004,$-2909,$-33216482,11403,99.9%,$-33299206,$-2920,15,0.1%,$82722,$5703,0,0.0%,0,0.0% +Over $1000000,6993,0.3%,$171527,$151221,6993,100.0%,$171530,$151223,$-20306,$-141996240,6961,99.5%,$-146692704,$-21074,32,0.5%,$4696465,$147081,0,0.0%,0,0.0% +Total,2705850,100.0%,$1488,$1410,969313,35.8%,$4145,$3926,$-219,$-212012432,558718,20.6%,$-345724288,$-619,410595,15.2%,$133710992,$326,1736536,64.2%,1730213,63.9% diff --git a/us/states/sc/h4216_analysis/5.21_rate/test/sc_h4216_5.21_analysis.ipynb b/us/states/sc/h4216_analysis/5.21_rate/test/sc_h4216_5.21_analysis.ipynb new file mode 100644 index 0000000..ae510a9 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/test/sc_h4216_5.21_analysis.ipynb @@ -0,0 +1,555 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC H.4216 Tax Reform Analysis - 5.21% Top Rate\n", + "\n", + "This notebook produces analysis in the same format as the RFA fiscal note for direct comparison.\n", + "\n", + "**Dataset:** `hf://policyengine/test/mar/SC.h5`\n", + "\n", + "**Reform:** H.4216 with 5.21% top rate (bill default)\n", + "\n", + "**RFA Estimate:** -$308,700,000" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/test/mar/SC.h5\"\n", + "TAX_YEAR = 2026\n", + "TOP_RATE = 0.0521 # 5.21% top rate\n", + "RFA_ESTIMATE = -308700000" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n", + "Done!\n" + ] + } + ], + "source": [ + "def create_h4216_reform(top_rate=0.0521):\n", + " \"\"\"\n", + " SC H.4216 Reform:\n", + " - 1.99% up to $30k\n", + " - top_rate over $30k (default 5.21% for bill version)\n", + " \"\"\"\n", + " param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.contrib.states.sc.h4216.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " },\n", + " \"gov.contrib.states.sc.h4216.rates[1].rate\": {\n", + " \"2026-01-01.2100-12-31\": top_rate\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " base_reform = create_sc_h4216()\n", + " return (base_reform, param_reform)\n", + "\n", + "print(\"Loading simulations...\")\n", + "baseline = Microsimulation(dataset=SC_DATASET)\n", + "reform_sim = Microsimulation(dataset=SC_DATASET, reform=create_h4216_reform(TOP_RATE))\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tax units: 42,461\n", + "Weighted tax units: 2,705,850\n" + ] + } + ], + "source": [ + "# Get data - use .values to avoid double-weighting\n", + "baseline_tax = baseline.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "reform_tax = reform_sim.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "agi = baseline.calculate(\"adjusted_gross_income\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "weight = baseline.calculate(\"tax_unit_weight\", period=TAX_YEAR).values\n", + "\n", + "tax_change = reform_tax - baseline_tax\n", + "\n", + "print(f\"Total tax units: {len(baseline_tax):,}\")\n", + "print(f\"Weighted tax units: {weight.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bracket analysis complete!\n" + ] + } + ], + "source": [ + "# Define income brackets matching RFA exactly\n", + "income_brackets = [\n", + " (float('-inf'), 0, \"$0*\"),\n", + " (0, 10000, \"$1 to $10000\"),\n", + " (10000, 20000, \"$10001 to $20000\"),\n", + " (20000, 30000, \"$20001 to $30000\"),\n", + " (30000, 40000, \"$30001 to $40000\"),\n", + " (40000, 50000, \"$40001 to $50000\"),\n", + " (50000, 75000, \"$50001 to $75000\"),\n", + " (75000, 100000, \"$75001 to $100000\"),\n", + " (100000, 150000, \"$100001 to $150000\"),\n", + " (150000, 200000, \"$150001 to $200000\"),\n", + " (200000, 300000, \"$200001 to $300000\"),\n", + " (300000, 500000, \"$300001 to $500000\"),\n", + " (500000, 1000000, \"$500001 to $1000000\"),\n", + " (1000000, float('inf'), \"Over $1000000\")\n", + "]\n", + "\n", + "total_weight = weight.sum()\n", + "results = []\n", + "\n", + "for lower, upper, label in income_brackets:\n", + " if lower == float('-inf'):\n", + " mask = agi <= upper\n", + " elif upper == float('inf'):\n", + " mask = agi > lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Basic stats\n", + " est_returns = weight[mask].sum()\n", + " pct_returns = est_returns / total_weight * 100\n", + " \n", + " old_avg_tax = np.average(baseline_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_changed = np.average(baseline_tax[change_mask], weights=weight[change_mask])\n", + " new_avg_changed = np.average(reform_tax[change_mask], weights=weight[change_mask])\n", + " avg_change = np.average(tax_change[change_mask], weights=weight[change_mask])\n", + " else:\n", + " old_avg_changed = 0\n", + " new_avg_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est # Returns\": int(round(est_returns)),\n", + " \"Est % Returns\": f\"{pct_returns:.1f}%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(old_avg_tax))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(new_avg_tax))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change)),\n", + " \"% Returns in Range with Change\": f\"{pct_with_change:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_pct:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns)),\n", + " \"Tax Increase % in Range\": f\"{increase_pct:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns)),\n", + " \"No Change % Returns\": f\"{no_change_pct:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_pct:.1f}%\"\n", + " })\n", + "\n", + "print(\"Bracket analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Totals calculated!\n" + ] + } + ], + "source": [ + "# Calculate totals\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "decrease_mask_all = tax_change < -1\n", + "increase_mask_all = tax_change > 1\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "\n", + "total_old_avg = np.average(baseline_tax, weights=weight)\n", + "total_new_avg = np.average(reform_tax, weights=weight)\n", + "total_change_amount = (tax_change * weight).sum()\n", + "\n", + "returns_with_change_all = weight[change_mask_all].sum()\n", + "old_avg_changed_all = np.average(baseline_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "new_avg_changed_all = np.average(reform_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "avg_change_all = np.average(tax_change[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "\n", + "decrease_returns_all = weight[decrease_mask_all].sum()\n", + "total_decrease_all = (tax_change[decrease_mask_all] * weight[decrease_mask_all]).sum()\n", + "avg_decrease_all = np.average(tax_change[decrease_mask_all], weights=weight[decrease_mask_all]) if decrease_returns_all > 0 else 0\n", + "\n", + "increase_returns_all = weight[increase_mask_all].sum()\n", + "total_increase_all = (tax_change[increase_mask_all] * weight[increase_mask_all]).sum()\n", + "avg_increase_all = np.average(tax_change[increase_mask_all], weights=weight[increase_mask_all]) if increase_returns_all > 0 else 0\n", + "\n", + "no_change_returns_all = weight[no_change_mask_all].sum()\n", + "zero_tax_returns_all = weight[zero_tax_mask_all].sum()\n", + "\n", + "results.append({\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est # Returns\": int(round(total_weight)),\n", + " \"Est % Returns\": \"100.0%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(total_old_avg))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(total_new_avg))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change_all)),\n", + " \"% Returns in Range with Change\": f\"{returns_with_change_all / total_weight * 100:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed_all))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed_all))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change_all))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change_amount))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns_all)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease_all))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease_all))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns_all)),\n", + " \"Tax Increase % in Range\": f\"{increase_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase_all))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase_all))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns_all)),\n", + " \"No Change % Returns\": f\"{no_change_returns_all / total_weight * 100:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns_all)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_returns_all / total_weight * 100:.1f}%\"\n", + "})\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Totals calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H.4216 - POLICYENGINE ANALYSIS (Test Dataset, 5.21% Top Rate)\n", + "====================================================================================================\n", + "\n", + "Total Returns: 2,705,849\n", + "General Fund Impact: $-212,012,432\n", + "\n", + "RFA Estimate: $-308,700,000\n", + "Difference: $96,687,568\n", + "Accuracy: 68.7%\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Display summary\n", + "print(\"=\"*100)\n", + "print(f\"H.4216 - POLICYENGINE ANALYSIS (Test Dataset, {TOP_RATE*100:.2f}% Top Rate)\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nTotal Returns: {int(total_weight):,}\")\n", + "print(f\"General Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(f\"\\nRFA Estimate: ${RFA_ESTIMATE:,}\")\n", + "print(f\"Difference: ${total_change_amount - RFA_ESTIMATE:,.0f}\")\n", + "print(f\"Accuracy: {(1 - abs(total_change_amount - RFA_ESTIMATE) / abs(RFA_ESTIMATE)) * 100:.1f}%\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exported to: pe_h4216_5.21_analysis.csv\n" + ] + } + ], + "source": [ + "# Export to CSV in RFA format\n", + "df_results.to_csv('pe_h4216_5.21_analysis.csv', index=False)\n", + "print(\"Exported to: pe_h4216_5.21_analysis.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "KEY METRICS:\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Total Dollar Change\n", + " $0* 727881 26.9% $0 $0 $0\n", + " $1 to $10000 498186 18.4% $0 $0 $0\n", + " $10001 to $20000 233000 8.6% $0 $4 $847688\n", + " $20001 to $30000 171515 6.3% $40 $56 $2756262\n", + " $30001 to $40000 157010 5.8% $149 $135 $-2140517\n", + " $40001 to $50000 132402 4.9% $399 $301 $-12939930\n", + " $50001 to $75000 245406 9.1% $701 $576 $-30530120\n", + " $75001 to $100000 165713 6.1% $1452 $1261 $-31638784\n", + " $100001 to $150000 225396 8.3% $2929 $3055 $28517960\n", + " $150001 to $200000 42792 1.6% $5236 $5812 $24642470\n", + " $200001 to $300000 55391 2.0% $9952 $10126 $9646339\n", + " $300001 to $500000 32748 1.2% $16226 $15433 $-25961084\n", + "$500001 to $1000000 11418 0.4% $31912 $29003 $-33216482\n", + " Over $1000000 6993 0.3% $171527 $151221 $-141996240\n", + " Total 2705850 100.0% $1488 $1410 $-212012432\n" + ] + } + ], + "source": [ + "# Display key columns for quick comparison\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est # Returns\", \"Est % Returns\",\n", + " \"Old Avg Tax Liability\", \"New Avg Tax Liability\", \"Total Dollar Change\"\n", + "]\n", + "print(\"\\nKEY METRICS:\")\n", + "print(df_results[display_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-9", + "metadata": {}, + "source": [ + "## Side-by-Side Comparison with RFA" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "====================================================================================================\n", + "POLICYENGINE (Test) vs RFA COMPARISON (5.21% Rate)\n", + "====================================================================================================\n", + " AGI Range PE Returns RFA Returns PE Impact RFA Impact Diff\n", + " $0* 727,881 78,854 $0 $-671,000 $+671,000\n", + " $1 to $10000 498,186 286,253 $0 $1,653,000 $-1,653,000\n", + " $10001 to $20000 233,000 310,122 $847,688 $2,867,000 $-2,019,312\n", + " $20001 to $30000 171,515 275,560 $2,756,262 $762,000 $+1,994,262\n", + " $30001 to $40000 157,010 269,566 $-2,140,517 $-19,416,000 $+17,275,483\n", + " $40001 to $50000 132,402 234,386 $-12,939,930 $-42,568,000 $+29,628,070\n", + " $50001 to $75000 245,406 407,593 $-30,530,120 $-89,935,000 $+59,404,880\n", + " $75001 to $100000 165,713 250,437 $-31,638,784 $-48,624,000 $+16,985,216\n", + " $100001 to $150000 225,396 298,343 $28,517,960 $-26,092,000 $+54,609,960\n", + " $150001 to $200000 42,792 143,398 $24,642,470 $23,766,000 $+876,470\n", + " $200001 to $300000 55,391 109,340 $9,646,339 $3,955,000 $+5,691,339\n", + " $300001 to $500000 32,748 56,123 $-25,961,084 $-32,054,000 $+6,092,916\n", + "$500001 to $1000000 11,418 25,664 $-33,216,482 $-37,381,000 $+4,164,518\n", + " Over $1000000 6,993 11,936 $-141,996,240 $-44,989,000 $-97,007,240\n", + " Total 2,705,850 2,757,573 $-212,012,432 $-308,700,000 $+96,687,568\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Load RFA data\n", + "rfa_df = pd.read_csv('../rfa_h4216_5.21_analysis.csv')\n", + "\n", + "def parse_dollar(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('$', '').replace(',', '').replace('%', ''))\n", + " return val\n", + "\n", + "def parse_pct(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('%', ''))\n", + " return val\n", + "\n", + "# Create comparison\n", + "comparison = []\n", + "for idx, pe_row in df_results.iterrows():\n", + " agi_range = pe_row['Federal AGI Range']\n", + " rfa_match = rfa_df[rfa_df['Federal AGI Range'] == agi_range]\n", + " \n", + " pe_returns = pe_row['Est # Returns']\n", + " pe_impact = parse_dollar(pe_row['Total Dollar Change'])\n", + " \n", + " if len(rfa_match) > 0:\n", + " rfa_returns = rfa_match['Est # Returns'].values[0]\n", + " rfa_impact = parse_dollar(rfa_match['Total Dollar Change'].values[0])\n", + " else:\n", + " rfa_returns = 0\n", + " rfa_impact = 0\n", + " \n", + " comparison.append({\n", + " 'AGI Range': agi_range,\n", + " 'PE Returns': f\"{pe_returns:,}\",\n", + " 'RFA Returns': f\"{rfa_returns:,}\" if rfa_returns else \"N/A\",\n", + " 'PE Impact': f\"${pe_impact:,.0f}\",\n", + " 'RFA Impact': f\"${rfa_impact:,.0f}\" if rfa_impact else \"N/A\",\n", + " 'Diff': f\"${pe_impact - rfa_impact:+,.0f}\" if rfa_impact else \"N/A\"\n", + " })\n", + "\n", + "comparison_df = pd.DataFrame(comparison)\n", + "print(\"\\n\" + \"=\"*100)\n", + "print(\"POLICYENGINE (Test) vs RFA COMPARISON (5.21% Rate)\")\n", + "print(\"=\"*100)\n", + "print(comparison_df.to_string(index=False))\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "FULL POLICYENGINE ANALYSIS (RFA Format)\n", + "========================================================================================================================\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Returns with Tax Change % Returns in Range with Change Old Avg Tax (Changed) New Avg Tax (Changed) Avg Tax Change Total Dollar Change Tax Decrease # Returns Tax Decrease % in Range Total Decrease Amount Avg Decrease Amount Tax Increase # Returns Tax Increase % in Range Total Increase Amount Avg Increase Amount No Tax Change # Returns No Change % Returns Zero Tax # Returns Zero Tax % Returns\n", + " $0* 727881 26.9% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 727881 100.0% 727881 100.0%\n", + " $1 to $10000 498186 18.4% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 498186 100.0% 498186 100.0%\n", + " $10001 to $20000 233000 8.6% $0 $4 16527 7.1% $0 $51 $51 $847688 0 0.0% $0 $0 16527 7.1% $847510 $51 216473 92.9% 215471 92.5%\n", + " $20001 to $30000 171515 6.3% $40 $56 48979 28.6% $131 $187 $56 $2756262 2691 1.6% $-22726 $-8 46288 27.0% $2778858 $60 122536 71.4% 121168 70.6%\n", + " $30001 to $40000 157010 5.8% $149 $135 70118 44.7% $333 $302 $-31 $-2140517 45821 29.2% $-3836658 $-84 24298 15.5% $1696106 $70 86892 55.3% 86762 55.3%\n", + " $40001 to $50000 132402 4.9% $399 $301 95777 72.3% $548 $413 $-135 $-12939930 49802 37.6% $-16742836 $-336 45975 34.7% $3801853 $83 36624 27.7% 35193 26.6%\n", + " $50001 to $75000 245406 9.1% $701 $576 205400 83.7% $836 $687 $-149 $-30530120 139791 57.0% $-40495060 $-290 65609 26.7% $9964432 $152 40007 16.3% 39028 15.9%\n", + " $75001 to $100000 165713 6.1% $1452 $1261 163885 98.9% $1468 $1275 $-193 $-31638784 120428 72.7% $-44416856 $-369 43457 26.2% $12778093 $294 1828 1.1% 1759 1.1%\n", + " $100001 to $150000 225396 8.3% $2929 $3055 220631 97.9% $2992 $3121 $129 $28517960 121628 54.0% $-24284616 $-200 99003 43.9% $52802576 $533 4765 2.1% 4765 2.1%\n", + " $150001 to $200000 42792 1.6% $5236 $5812 41448 96.9% $5235 $5829 $595 $24642470 7562 17.7% $-786251 $-104 33886 79.2% $25429746 $750 1344 3.1% 0 0.0%\n", + " $200001 to $300000 55391 2.0% $9952 $10126 55391 100.0% $9952 $10126 $174 $9646339 25962 46.9% $-7259498 $-280 29429 53.1% $16905836 $574 0 0.0% 0 0.0%\n", + " $300001 to $500000 32748 1.2% $16226 $15433 32748 100.0% $16226 $15433 $-793 $-25961084 26670 81.4% $-27887880 $-1046 6078 18.6% $1926795 $317 0 0.0% 0 0.0%\n", + "$500001 to $1000000 11418 0.4% $31912 $29003 11417 100.0% $31913 $29004 $-2909 $-33216482 11403 99.9% $-33299206 $-2920 15 0.1% $82722 $5703 0 0.0% 0 0.0%\n", + " Over $1000000 6993 0.3% $171527 $151221 6993 100.0% $171530 $151223 $-20306 $-141996240 6961 99.5% $-146692704 $-21074 32 0.5% $4696465 $147081 0 0.0% 0 0.0%\n", + " Total 2705850 100.0% $1488 $1410 969313 35.8% $4145 $3926 $-219 $-212012432 558718 20.6% $-345724288 $-619 410595 15.2% $133710992 $326 1736536 64.2% 1730213 63.9%\n" + ] + } + ], + "source": [ + "# Full results table\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"FULL POLICYENGINE ANALYSIS (RFA Format)\")\n", + "print(\"=\"*120)\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "print(df_results.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/5.39_rate/rfa_h4216_analysis.csv b/us/states/sc/h4216_analysis/5.39_rate/rfa_h4216_analysis.csv new file mode 100644 index 0000000..43991c5 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.39_rate/rfa_h4216_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,78854,2.9%,$50,$43,1080,1.4%,$3683,$3154,-$529,-$571000,575,0.7%,-$606000,-$1054,505,0.6%,$35000,$69,77774,98.6%,77824,98.7% +$1 to $10000,286253,10.4%,$3,$9,43699,15.3%,$20,$58,$38,$1655000,834,0.3%,-$76000,-$91,42865,15.0%,$1731000,$40,242554,84.7%,243249,85.0% +$10001 to $20000,310122,11.2%,$16,$26,75652,24.4%,$67,$105,$38,$2872000,5591,1.8%,-$360000,-$64,70060,22.6%,$3232000,$46,234471,75.6%,235107,75.8% +$20001 to $30000,275560,10.0%,$107,$110,140713,51.1%,$210,$216,$5,$769000,51548,18.7%,-$2676000,-$52,89165,32.4%,$3445000,$39,134847,48.9%,134332,48.7% +$30001 to $40000,269566,9.8%,$288,$216,160474,59.5%,$483,$362,-$121,-$19360000,131750,48.9%,-$21067000,-$160,28724,10.7%,$1707000,$59,109091,40.5%,110638,41.0% +$40001 to $50000,234386,8.5%,$569,$390,174112,74.3%,$767,$526,-$241,-$41986000,127503,54.4%,-$46301000,-$363,46609,19.9%,$4315000,$93,60274,25.7%,61884,26.4% +$50001 to $75000,407593,14.8%,$1192,$990,351715,86.3%,$1381,$1148,-$234,-$82146000,286705,70.3%,-$93552000,-$326,65010,15.9%,$11406000,$175,55877,13.7%,61644,15.1% +$75001 to $100000,250437,9.1%,$2020,$1874,225176,89.9%,$2247,$2085,-$162,-$36461000,173939,69.5%,-$51076000,-$294,51237,20.5%,$14615000,$285,25261,10.1%,27341,10.9% +$100001 to $150000,298343,10.8%,$3258,$3269,289966,97.2%,$3352,$3363,$11,$3115000,175398,58.8%,-$35022000,-$200,114568,38.4%,$38137000,$333,8377,2.8%,8450,2.8% +$150001 to $200000,143398,5.2%,$5518,$5873,141749,98.9%,$5582,$5942,$359,$50933000,19752,13.8%,-$6653000,-$337,121997,85.1%,$57586000,$472,1649,1.1%,1210,0.8% +$200001 to $300000,109340,4.0%,$8741,$9077,108086,98.9%,$8842,$9182,$340,$36718000,29527,27.0%,-$10562000,-$358,78560,71.8%,$47280000,$602,1253,1.1%,791,0.7% +$300001 to $500000,56123,2.0%,$14926,$14844,55098,98.2%,$15204,$15120,-$84,-$4627000,36199,64.5%,-$25411000,-$702,18898,33.7%,$20784000,$1100,1025,1.8%,688,1.2% +$500001 to $1000000,25664,0.9%,$25969,$25338,24764,96.5%,$26912,$26258,-$654,-$16195000,18325,71.4%,-$32991000,-$1800,6439,25.1%,$16796000,$2608,900,3.5%,649,2.5% +Over $1000000,11936,0.4%,$78228,$77074,11163,93.5%,$83646,$82413,-$1233,-$13767000,8187,68.6%,-$62365000,-$7617,2975,24.9%,$48598000,$16334,773,6.5%,666,5.6% +Total,2757573,100.0%,$2321,$2277,1803447,65.4%,$3548,$3482,-$66,-$119100000,1065834,38.7%,-$388700000,-$365,737613,26.7%,$269600000,$366,954126,34.6%,964473,35.0% diff --git a/us/states/sc/h4216_analysis/5.39_rate/state/pe_h4216_5.39_state_analysis.csv b/us/states/sc/h4216_analysis/5.39_rate/state/pe_h4216_5.39_state_analysis.csv new file mode 100644 index 0000000..6ed079c --- /dev/null +++ b/us/states/sc/h4216_analysis/5.39_rate/state/pe_h4216_5.39_state_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,619010,21.1%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,619010,100.0%,619010,100.0% +$1 to $10000,502276,17.1%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,502276,100.0%,502276,100.0% +$10001 to $20000,279412,9.5%,$0,$10,53961,19.3%,$0,$50,$50,$2672942,0,0.0%,$0,$0,53961,19.3%,$2672922,$50,225451,80.7%,225413,80.7% +$20001 to $30000,252863,8.6%,$64,$101,136052,53.8%,$119,$188,$68,$9294693,5029,2.0%,$-40734,$-8,131023,51.8%,$9335378,$71,116811,46.2%,116751,46.2% +$30001 to $40000,215980,7.4%,$225,$200,135926,62.9%,$356,$316,$-40,$-5431497,88710,41.1%,$-8472465,$-96,47216,21.9%,$3040994,$64,80055,37.1%,79265,36.7% +$40001 to $50000,197525,6.7%,$547,$406,152733,77.3%,$706,$524,$-182,$-27750680,99989,50.6%,$-33876124,$-339,52744,26.7%,$6125390,$116,44792,22.7%,44131,22.3% +$50001 to $75000,300857,10.2%,$822,$734,254734,84.7%,$971,$867,$-104,$-26516064,163843,54.5%,$-42432168,$-259,90891,30.2%,$15916106,$175,46123,15.3%,46125,15.3% +$75001 to $100000,177284,6.0%,$1781,$1673,168196,94.9%,$1875,$1761,$-114,$-19091546,123948,69.9%,$-33622528,$-271,44248,25.0%,$14531019,$328,9088,5.1%,9124,5.1% +$100001 to $150000,187946,6.4%,$3292,$3490,186777,99.4%,$3310,$3510,$199,$37224792,80645,42.9%,$-11527897,$-143,106132,56.5%,$48752732,$459,1169,0.6%,1105,0.6% +$150001 to $200000,73396,2.5%,$6049,$6621,73396,100.0%,$6049,$6621,$572,$41990104,5919,8.1%,$-1521388,$-257,67477,91.9%,$43511488,$645,0,0.0%,0,0.0% +$200001 to $300000,52882,1.8%,$9164,$9669,52844,99.9%,$9161,$9666,$504,$26656968,6389,12.1%,$-1749618,$-274,46455,87.8%,$28406596,$611,38,0.1%,0,0.0% +$300001 to $500000,36977,1.3%,$17163,$17280,36977,100.0%,$17163,$17280,$116,$4306962,20370,55.1%,$-12902031,$-633,16607,44.9%,$17208994,$1036,0,0.0%,0,0.0% +$500001 to $1000000,16526,0.6%,$26140,$25753,16526,100.0%,$26140,$25753,$-387,$-6389232,14096,85.3%,$-14481286,$-1027,2430,14.7%,$8092054,$3331,0,0.0%,0,0.0% +Over $1000000,22686,0.8%,$139623,$129256,22686,100.0%,$139623,$129256,$-10367,$-235194960,22658,99.9%,$-235526416,$-10395,29,0.1%,$331456,$11553,0,0.0%,0,0.0% +Total,2935621,100.0%,$2220,$2153,1290809,44.0%,$5048,$4894,$-154,$-198227520,631596,21.5%,$-396152640,$-627,659213,22.5%,$197925120,$300,1644813,56.0%,1643201,56.0% diff --git a/us/states/sc/h4216_analysis/5.39_rate/state/sc_h4216_5.39_state_analysis.ipynb b/us/states/sc/h4216_analysis/5.39_rate/state/sc_h4216_5.39_state_analysis.ipynb new file mode 100644 index 0000000..a95395d --- /dev/null +++ b/us/states/sc/h4216_analysis/5.39_rate/state/sc_h4216_5.39_state_analysis.ipynb @@ -0,0 +1,550 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC H.4216 Tax Reform Analysis - 5.39% Top Rate (State Dataset)\n", + "\n", + "This notebook produces analysis in the same format as the RFA fiscal note for direct comparison.\n", + "\n", + "**Dataset:** `hf://policyengine/policyengine-us-data/states/SC.h5` (Production)\n", + "\n", + "**Reform:** H.4216 with 5.39% top rate (RFA version)\n", + "\n", + "**RFA Estimate:** -$119,100,000" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/states/SC.h5\"\n", + "TAX_YEAR = 2026\n", + "TOP_RATE = 0.0539 # 5.39% top rate\n", + "RFA_ESTIMATE = -119100000" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n", + "Done!\n" + ] + } + ], + "source": [ + "def create_h4216_reform(top_rate=0.0539):\n", + " \"\"\"\n", + " SC H.4216 Reform:\n", + " - 1.99% up to $30k\n", + " - top_rate over $30k (default 5.39% for RFA version)\n", + " \"\"\"\n", + " param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.contrib.states.sc.h4216.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " },\n", + " \"gov.contrib.states.sc.h4216.rates[1].rate\": {\n", + " \"2026-01-01.2100-12-31\": top_rate\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " base_reform = create_sc_h4216()\n", + " return (base_reform, param_reform)\n", + "\n", + "print(\"Loading simulations...\")\n", + "baseline = Microsimulation(dataset=SC_DATASET)\n", + "reform_sim = Microsimulation(dataset=SC_DATASET, reform=create_h4216_reform(TOP_RATE))\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tax units: 49,486\n", + "Weighted tax units: 2,935,621\n" + ] + } + ], + "source": [ + "# Get data - use .values to avoid double-weighting\n", + "baseline_tax = baseline.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "reform_tax = reform_sim.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "agi = baseline.calculate(\"adjusted_gross_income\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "weight = baseline.calculate(\"tax_unit_weight\", period=TAX_YEAR).values\n", + "\n", + "tax_change = reform_tax - baseline_tax\n", + "\n", + "print(f\"Total tax units: {len(baseline_tax):,}\")\n", + "print(f\"Weighted tax units: {weight.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bracket analysis complete!\n" + ] + } + ], + "source": [ + "# Define income brackets matching RFA exactly\n", + "income_brackets = [\n", + " (float('-inf'), 0, \"$0*\"),\n", + " (0, 10000, \"$1 to $10000\"),\n", + " (10000, 20000, \"$10001 to $20000\"),\n", + " (20000, 30000, \"$20001 to $30000\"),\n", + " (30000, 40000, \"$30001 to $40000\"),\n", + " (40000, 50000, \"$40001 to $50000\"),\n", + " (50000, 75000, \"$50001 to $75000\"),\n", + " (75000, 100000, \"$75001 to $100000\"),\n", + " (100000, 150000, \"$100001 to $150000\"),\n", + " (150000, 200000, \"$150001 to $200000\"),\n", + " (200000, 300000, \"$200001 to $300000\"),\n", + " (300000, 500000, \"$300001 to $500000\"),\n", + " (500000, 1000000, \"$500001 to $1000000\"),\n", + " (1000000, float('inf'), \"Over $1000000\")\n", + "]\n", + "\n", + "total_weight = weight.sum()\n", + "results = []\n", + "\n", + "for lower, upper, label in income_brackets:\n", + " if lower == float('-inf'):\n", + " mask = agi <= upper\n", + " elif upper == float('inf'):\n", + " mask = agi > lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Basic stats\n", + " est_returns = weight[mask].sum()\n", + " pct_returns = est_returns / total_weight * 100\n", + " \n", + " old_avg_tax = np.average(baseline_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_changed = np.average(baseline_tax[change_mask], weights=weight[change_mask])\n", + " new_avg_changed = np.average(reform_tax[change_mask], weights=weight[change_mask])\n", + " avg_change = np.average(tax_change[change_mask], weights=weight[change_mask])\n", + " else:\n", + " old_avg_changed = 0\n", + " new_avg_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est # Returns\": int(round(est_returns)),\n", + " \"Est % Returns\": f\"{pct_returns:.1f}%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(old_avg_tax))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(new_avg_tax))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change)),\n", + " \"% Returns in Range with Change\": f\"{pct_with_change:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_pct:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns)),\n", + " \"Tax Increase % in Range\": f\"{increase_pct:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns)),\n", + " \"No Change % Returns\": f\"{no_change_pct:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_pct:.1f}%\"\n", + " })\n", + "\n", + "print(\"Bracket analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Totals calculated!\n" + ] + } + ], + "source": [ + "# Calculate totals\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "decrease_mask_all = tax_change < -1\n", + "increase_mask_all = tax_change > 1\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "\n", + "total_old_avg = np.average(baseline_tax, weights=weight)\n", + "total_new_avg = np.average(reform_tax, weights=weight)\n", + "total_change_amount = (tax_change * weight).sum()\n", + "\n", + "returns_with_change_all = weight[change_mask_all].sum()\n", + "old_avg_changed_all = np.average(baseline_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "new_avg_changed_all = np.average(reform_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "avg_change_all = np.average(tax_change[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "\n", + "decrease_returns_all = weight[decrease_mask_all].sum()\n", + "total_decrease_all = (tax_change[decrease_mask_all] * weight[decrease_mask_all]).sum()\n", + "avg_decrease_all = np.average(tax_change[decrease_mask_all], weights=weight[decrease_mask_all]) if decrease_returns_all > 0 else 0\n", + "\n", + "increase_returns_all = weight[increase_mask_all].sum()\n", + "total_increase_all = (tax_change[increase_mask_all] * weight[increase_mask_all]).sum()\n", + "avg_increase_all = np.average(tax_change[increase_mask_all], weights=weight[increase_mask_all]) if increase_returns_all > 0 else 0\n", + "\n", + "no_change_returns_all = weight[no_change_mask_all].sum()\n", + "zero_tax_returns_all = weight[zero_tax_mask_all].sum()\n", + "\n", + "results.append({\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est # Returns\": int(round(total_weight)),\n", + " \"Est % Returns\": \"100.0%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(total_old_avg))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(total_new_avg))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change_all)),\n", + " \"% Returns in Range with Change\": f\"{returns_with_change_all / total_weight * 100:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed_all))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed_all))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change_all))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change_amount))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns_all)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease_all))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease_all))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns_all)),\n", + " \"Tax Increase % in Range\": f\"{increase_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase_all))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase_all))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns_all)),\n", + " \"No Change % Returns\": f\"{no_change_returns_all / total_weight * 100:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns_all)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_returns_all / total_weight * 100:.1f}%\"\n", + "})\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Totals calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H.4216 - POLICYENGINE ANALYSIS (State Dataset, 5.39% Top Rate)\n", + "====================================================================================================\n", + "\n", + "Total Returns: 2,935,621\n", + "General Fund Impact: $-198,227,520\n", + "\n", + "RFA Estimate: $-119,100,000\n", + "Difference: $-79,127,520\n", + "Accuracy: 33.6%\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Display summary\n", + "print(\"=\"*100)\n", + "print(f\"H.4216 - POLICYENGINE ANALYSIS (State Dataset, {TOP_RATE*100:.2f}% Top Rate)\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nTotal Returns: {int(total_weight):,}\")\n", + "print(f\"General Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(f\"\\nRFA Estimate: ${RFA_ESTIMATE:,}\")\n", + "print(f\"Difference: ${total_change_amount - RFA_ESTIMATE:,.0f}\")\n", + "print(f\"Accuracy: {(1 - abs(total_change_amount - RFA_ESTIMATE) / abs(RFA_ESTIMATE)) * 100:.1f}%\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exported to: pe_h4216_5.39_state_analysis.csv\n" + ] + } + ], + "source": [ + "# Export to CSV in RFA format\n", + "df_results.to_csv('pe_h4216_5.39_state_analysis.csv', index=False)\n", + "print(\"Exported to: pe_h4216_5.39_state_analysis.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "KEY METRICS:\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Total Dollar Change\n", + " $0* 619010 21.1% $0 $0 $0\n", + " $1 to $10000 502276 17.1% $0 $0 $0\n", + " $10001 to $20000 279412 9.5% $0 $10 $2672942\n", + " $20001 to $30000 252863 8.6% $64 $101 $9294693\n", + " $30001 to $40000 215980 7.4% $225 $200 $-5431497\n", + " $40001 to $50000 197525 6.7% $547 $406 $-27750680\n", + " $50001 to $75000 300857 10.2% $822 $734 $-26516064\n", + " $75001 to $100000 177284 6.0% $1781 $1673 $-19091546\n", + " $100001 to $150000 187946 6.4% $3292 $3490 $37224792\n", + " $150001 to $200000 73396 2.5% $6049 $6621 $41990104\n", + " $200001 to $300000 52882 1.8% $9164 $9669 $26656968\n", + " $300001 to $500000 36977 1.3% $17163 $17280 $4306962\n", + "$500001 to $1000000 16526 0.6% $26140 $25753 $-6389232\n", + " Over $1000000 22686 0.8% $139623 $129256 $-235194960\n", + " Total 2935621 100.0% $2220 $2153 $-198227520\n" + ] + } + ], + "source": [ + "# Display key columns for quick comparison\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est # Returns\", \"Est % Returns\",\n", + " \"Old Avg Tax Liability\", \"New Avg Tax Liability\", \"Total Dollar Change\"\n", + "]\n", + "print(\"\\nKEY METRICS:\")\n", + "print(df_results[display_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-9", + "metadata": {}, + "source": [ + "## Side-by-Side Comparison with RFA" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "====================================================================================================\n", + "POLICYENGINE (State) vs RFA COMPARISON (5.39% Rate)\n", + "====================================================================================================\n", + " AGI Range PE Returns RFA Returns PE Impact RFA Impact Diff\n", + " $0* 619,010 78,854 $0 $-571,000 $+571,000\n", + " $1 to $10000 502,276 286,253 $0 $1,655,000 $-1,655,000\n", + " $10001 to $20000 279,412 310,122 $2,672,942 $2,872,000 $-199,058\n", + " $20001 to $30000 252,863 275,560 $9,294,693 $769,000 $+8,525,693\n", + " $30001 to $40000 215,980 269,566 $-5,431,497 $-19,360,000 $+13,928,503\n", + " $40001 to $50000 197,525 234,386 $-27,750,680 $-41,986,000 $+14,235,320\n", + " $50001 to $75000 300,857 407,593 $-26,516,064 $-82,146,000 $+55,629,936\n", + " $75001 to $100000 177,284 250,437 $-19,091,546 $-36,461,000 $+17,369,454\n", + " $100001 to $150000 187,946 298,343 $37,224,792 $3,115,000 $+34,109,792\n", + " $150001 to $200000 73,396 143,398 $41,990,104 $50,933,000 $-8,942,896\n", + " $200001 to $300000 52,882 109,340 $26,656,968 $36,718,000 $-10,061,032\n", + " $300001 to $500000 36,977 56,123 $4,306,962 $-4,627,000 $+8,933,962\n", + "$500001 to $1000000 16,526 25,664 $-6,389,232 $-16,195,000 $+9,805,768\n", + " Over $1000000 22,686 11,936 $-235,194,960 $-13,767,000 $-221,427,960\n", + " Total 2,935,621 2,757,573 $-198,227,520 $-119,100,000 $-79,127,520\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Load RFA data\n", + "rfa_df = pd.read_csv('../rfa_h4216_analysis.csv')\n", + "\n", + "def parse_dollar(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('$', '').replace(',', '').replace('%', ''))\n", + " return val\n", + "\n", + "# Create comparison\n", + "comparison = []\n", + "for idx, pe_row in df_results.iterrows():\n", + " agi_range = pe_row['Federal AGI Range']\n", + " rfa_match = rfa_df[rfa_df['Federal AGI Range'] == agi_range]\n", + " \n", + " pe_returns = pe_row['Est # Returns']\n", + " pe_impact = parse_dollar(pe_row['Total Dollar Change'])\n", + " \n", + " if len(rfa_match) > 0:\n", + " rfa_returns = rfa_match['Est # Returns'].values[0]\n", + " rfa_impact = parse_dollar(rfa_match['Total Dollar Change'].values[0])\n", + " else:\n", + " rfa_returns = 0\n", + " rfa_impact = 0\n", + " \n", + " comparison.append({\n", + " 'AGI Range': agi_range,\n", + " 'PE Returns': f\"{pe_returns:,}\",\n", + " 'RFA Returns': f\"{rfa_returns:,}\" if rfa_returns else \"N/A\",\n", + " 'PE Impact': f\"${pe_impact:,.0f}\",\n", + " 'RFA Impact': f\"${rfa_impact:,.0f}\" if rfa_impact else \"N/A\",\n", + " 'Diff': f\"${pe_impact - rfa_impact:+,.0f}\" if rfa_impact else \"N/A\"\n", + " })\n", + "\n", + "comparison_df = pd.DataFrame(comparison)\n", + "print(\"\\n\" + \"=\"*100)\n", + "print(\"POLICYENGINE (State) vs RFA COMPARISON (5.39% Rate)\")\n", + "print(\"=\"*100)\n", + "print(comparison_df.to_string(index=False))\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "FULL POLICYENGINE ANALYSIS (RFA Format)\n", + "========================================================================================================================\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Returns with Tax Change % Returns in Range with Change Old Avg Tax (Changed) New Avg Tax (Changed) Avg Tax Change Total Dollar Change Tax Decrease # Returns Tax Decrease % in Range Total Decrease Amount Avg Decrease Amount Tax Increase # Returns Tax Increase % in Range Total Increase Amount Avg Increase Amount No Tax Change # Returns No Change % Returns Zero Tax # Returns Zero Tax % Returns\n", + " $0* 619010 21.1% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 619010 100.0% 619010 100.0%\n", + " $1 to $10000 502276 17.1% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 502276 100.0% 502276 100.0%\n", + " $10001 to $20000 279412 9.5% $0 $10 53961 19.3% $0 $50 $50 $2672942 0 0.0% $0 $0 53961 19.3% $2672922 $50 225451 80.7% 225413 80.7%\n", + " $20001 to $30000 252863 8.6% $64 $101 136052 53.8% $119 $188 $68 $9294693 5029 2.0% $-40734 $-8 131023 51.8% $9335378 $71 116811 46.2% 116751 46.2%\n", + " $30001 to $40000 215980 7.4% $225 $200 135926 62.9% $356 $316 $-40 $-5431497 88710 41.1% $-8472465 $-96 47216 21.9% $3040994 $64 80055 37.1% 79265 36.7%\n", + " $40001 to $50000 197525 6.7% $547 $406 152733 77.3% $706 $524 $-182 $-27750680 99989 50.6% $-33876124 $-339 52744 26.7% $6125390 $116 44792 22.7% 44131 22.3%\n", + " $50001 to $75000 300857 10.2% $822 $734 254734 84.7% $971 $867 $-104 $-26516064 163843 54.5% $-42432168 $-259 90891 30.2% $15916106 $175 46123 15.3% 46125 15.3%\n", + " $75001 to $100000 177284 6.0% $1781 $1673 168196 94.9% $1875 $1761 $-114 $-19091546 123948 69.9% $-33622528 $-271 44248 25.0% $14531019 $328 9088 5.1% 9124 5.1%\n", + " $100001 to $150000 187946 6.4% $3292 $3490 186777 99.4% $3310 $3510 $199 $37224792 80645 42.9% $-11527897 $-143 106132 56.5% $48752732 $459 1169 0.6% 1105 0.6%\n", + " $150001 to $200000 73396 2.5% $6049 $6621 73396 100.0% $6049 $6621 $572 $41990104 5919 8.1% $-1521388 $-257 67477 91.9% $43511488 $645 0 0.0% 0 0.0%\n", + " $200001 to $300000 52882 1.8% $9164 $9669 52844 99.9% $9161 $9666 $504 $26656968 6389 12.1% $-1749618 $-274 46455 87.8% $28406596 $611 38 0.1% 0 0.0%\n", + " $300001 to $500000 36977 1.3% $17163 $17280 36977 100.0% $17163 $17280 $116 $4306962 20370 55.1% $-12902031 $-633 16607 44.9% $17208994 $1036 0 0.0% 0 0.0%\n", + "$500001 to $1000000 16526 0.6% $26140 $25753 16526 100.0% $26140 $25753 $-387 $-6389232 14096 85.3% $-14481286 $-1027 2430 14.7% $8092054 $3331 0 0.0% 0 0.0%\n", + " Over $1000000 22686 0.8% $139623 $129256 22686 100.0% $139623 $129256 $-10367 $-235194960 22658 99.9% $-235526416 $-10395 29 0.1% $331456 $11553 0 0.0% 0 0.0%\n", + " Total 2935621 100.0% $2220 $2153 1290809 44.0% $5048 $4894 $-154 $-198227520 631596 21.5% $-396152640 $-627 659213 22.5% $197925120 $300 1644813 56.0% 1643201 56.0%\n" + ] + } + ], + "source": [ + "# Full results table\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"FULL POLICYENGINE ANALYSIS (RFA Format)\")\n", + "print(\"=\"*120)\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "print(df_results.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/5.39_rate/test/pe_h4216_test_analysis.csv b/us/states/sc/h4216_analysis/5.39_rate/test/pe_h4216_test_analysis.csv new file mode 100644 index 0000000..30e7d02 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.39_rate/test/pe_h4216_test_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,727881,26.9%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,727881,100.0%,727881,100.0% +$1 to $10000,498186,18.4%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,498186,100.0%,498186,100.0% +$10001 to $20000,233000,8.6%,$0,$4,16527,7.1%,$0,$51,$51,$847688,0,0.0%,$0,$0,16527,7.1%,$847510,$51,216473,92.9%,215471,92.5% +$20001 to $30000,171515,6.3%,$40,$56,48979,28.6%,$131,$187,$56,$2756262,2691,1.6%,$-22726,$-8,46288,27.0%,$2778858,$60,122536,71.4%,121168,70.6% +$30001 to $40000,157010,5.8%,$149,$135,70118,44.7%,$333,$302,$-31,$-2140517,45821,29.2%,$-3836658,$-84,24298,15.5%,$1696106,$70,86892,55.3%,86762,55.3% +$40001 to $50000,132402,4.9%,$399,$302,95777,72.3%,$548,$414,$-134,$-12807614,49802,37.6%,$-16610524,$-334,45975,34.7%,$3801858,$83,36624,27.7%,35193,26.6% +$50001 to $75000,245406,9.1%,$701,$584,205399,83.7%,$836,$697,$-139,$-28577564,139789,57.0%,$-38644192,$-276,65610,26.7%,$10066116,$153,40008,16.3%,39028,15.9% +$75001 to $100000,165713,6.1%,$1452,$1290,163884,98.9%,$1468,$1305,$-163,$-26753744,118394,71.4%,$-40121352,$-339,45491,27.5%,$13367631,$294,1829,1.1%,1759,1.1% +$100001 to $150000,225396,8.3%,$2929,$3149,220578,97.9%,$2991,$3216,$225,$49609656,92356,41.0%,$-12660524,$-137,128222,56.9%,$62270140,$486,4818,2.1%,4765,2.1% +$150001 to $200000,42792,1.6%,$5236,$5998,42792,100.0%,$5236,$5998,$762,$32593342,879,2.1%,$-318522,$-362,41913,97.9%,$32911862,$785,0,0.0%,0,0.0% +$200001 to $300000,55391,2.0%,$9952,$10461,55391,100.0%,$9952,$10461,$509,$28205350,3638,6.6%,$-353202,$-97,51753,93.4%,$28558554,$552,0,0.0%,0,0.0% +$300001 to $500000,32748,1.2%,$16226,$15952,32748,100.0%,$16226,$15952,$-273,$-8944396,23765,72.6%,$-13389474,$-563,8983,27.4%,$4445078,$495,0,0.0%,0,0.0% +$500001 to $1000000,11418,0.4%,$31912,$29991,11417,100.0%,$31913,$29992,$-1921,$-21936012,11383,99.7%,$-22036886,$-1936,35,0.3%,$100874,$2922,0,0.0%,0,0.0% +Over $1000000,6993,0.3%,$171527,$156431,6993,100.0%,$171530,$156433,$-15096,$-105563360,6959,99.5%,$-111170816,$-15974,33,0.5%,$5607453,$167720,0,0.0%,0,0.0% +Total,2705850,100.0%,$1488,$1454,970603,35.9%,$4147,$4051,$-96,$-92710912,495476,18.3%,$-259164880,$-523,475127,17.6%,$166452032,$350,1735247,64.1%,1730213,63.9% diff --git a/us/states/sc/h4216_analysis/5.39_rate/test/sc_h4216_test_analysis.ipynb b/us/states/sc/h4216_analysis/5.39_rate/test/sc_h4216_test_analysis.ipynb new file mode 100644 index 0000000..2f0f470 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.39_rate/test/sc_h4216_test_analysis.ipynb @@ -0,0 +1,478 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC H.4216 Tax Reform Analysis - Test Dataset\n", + "\n", + "This notebook produces analysis in the same format as the RFA fiscal note for direct comparison.\n", + "\n", + "**Dataset:** `hf://policyengine/test/mar/SC.h5`\n", + "\n", + "**Reform:** H.4216 with 5.39% top rate (RFA version)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/test/mar/SC.h5\"\n", + "TAX_YEAR = 2026" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n", + "Done!\n" + ] + } + ], + "source": [ + "def create_h4216_reform(top_rate=0.0539):\n", + " \"\"\"\n", + " SC H.4216 Reform:\n", + " - 1.99% up to $30k\n", + " - top_rate over $30k (default 5.39% for RFA comparison)\n", + " \"\"\"\n", + " param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.contrib.states.sc.h4216.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " },\n", + " \"gov.contrib.states.sc.h4216.rates[1].rate\": {\n", + " \"2026-01-01.2100-12-31\": top_rate\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " base_reform = create_sc_h4216()\n", + " return (base_reform, param_reform)\n", + "\n", + "print(\"Loading simulations...\")\n", + "baseline = Microsimulation(dataset=SC_DATASET)\n", + "reform_sim = Microsimulation(dataset=SC_DATASET, reform=create_h4216_reform(0.0539))\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tax units: 42,461\n", + "Weighted tax units: 2,705,850\n" + ] + } + ], + "source": [ + "# Get data - use .values to avoid double-weighting\n", + "baseline_tax = baseline.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "reform_tax = reform_sim.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "agi = baseline.calculate(\"adjusted_gross_income\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "weight = baseline.calculate(\"tax_unit_weight\", period=TAX_YEAR).values\n", + "\n", + "tax_change = reform_tax - baseline_tax\n", + "\n", + "print(f\"Total tax units: {len(baseline_tax):,}\")\n", + "print(f\"Weighted tax units: {weight.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bracket analysis complete!\n" + ] + } + ], + "source": [ + "# Define income brackets matching RFA exactly\n", + "income_brackets = [\n", + " (float('-inf'), 0, \"$0*\"),\n", + " (0, 10000, \"$1 to $10000\"),\n", + " (10000, 20000, \"$10001 to $20000\"),\n", + " (20000, 30000, \"$20001 to $30000\"),\n", + " (30000, 40000, \"$30001 to $40000\"),\n", + " (40000, 50000, \"$40001 to $50000\"),\n", + " (50000, 75000, \"$50001 to $75000\"),\n", + " (75000, 100000, \"$75001 to $100000\"),\n", + " (100000, 150000, \"$100001 to $150000\"),\n", + " (150000, 200000, \"$150001 to $200000\"),\n", + " (200000, 300000, \"$200001 to $300000\"),\n", + " (300000, 500000, \"$300001 to $500000\"),\n", + " (500000, 1000000, \"$500001 to $1000000\"),\n", + " (1000000, float('inf'), \"Over $1000000\")\n", + "]\n", + "\n", + "total_weight = weight.sum()\n", + "results = []\n", + "\n", + "for lower, upper, label in income_brackets:\n", + " if lower == float('-inf'):\n", + " mask = agi <= upper\n", + " elif upper == float('inf'):\n", + " mask = agi > lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Basic stats\n", + " est_returns = weight[mask].sum()\n", + " pct_returns = est_returns / total_weight * 100\n", + " \n", + " old_avg_tax = np.average(baseline_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_changed = np.average(baseline_tax[change_mask], weights=weight[change_mask])\n", + " new_avg_changed = np.average(reform_tax[change_mask], weights=weight[change_mask])\n", + " avg_change = np.average(tax_change[change_mask], weights=weight[change_mask])\n", + " else:\n", + " old_avg_changed = 0\n", + " new_avg_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est # Returns\": int(round(est_returns)),\n", + " \"Est % Returns\": f\"{pct_returns:.1f}%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(old_avg_tax))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(new_avg_tax))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change)),\n", + " \"% Returns in Range with Change\": f\"{pct_with_change:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_pct:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns)),\n", + " \"Tax Increase % in Range\": f\"{increase_pct:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns)),\n", + " \"No Change % Returns\": f\"{no_change_pct:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_pct:.1f}%\"\n", + " })\n", + "\n", + "print(\"Bracket analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Totals calculated!\n" + ] + } + ], + "source": [ + "# Calculate totals\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "decrease_mask_all = tax_change < -1\n", + "increase_mask_all = tax_change > 1\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "\n", + "total_old_avg = np.average(baseline_tax, weights=weight)\n", + "total_new_avg = np.average(reform_tax, weights=weight)\n", + "total_change_amount = (tax_change * weight).sum()\n", + "\n", + "returns_with_change_all = weight[change_mask_all].sum()\n", + "old_avg_changed_all = np.average(baseline_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "new_avg_changed_all = np.average(reform_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "avg_change_all = np.average(tax_change[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "\n", + "decrease_returns_all = weight[decrease_mask_all].sum()\n", + "total_decrease_all = (tax_change[decrease_mask_all] * weight[decrease_mask_all]).sum()\n", + "avg_decrease_all = np.average(tax_change[decrease_mask_all], weights=weight[decrease_mask_all]) if decrease_returns_all > 0 else 0\n", + "\n", + "increase_returns_all = weight[increase_mask_all].sum()\n", + "total_increase_all = (tax_change[increase_mask_all] * weight[increase_mask_all]).sum()\n", + "avg_increase_all = np.average(tax_change[increase_mask_all], weights=weight[increase_mask_all]) if increase_returns_all > 0 else 0\n", + "\n", + "no_change_returns_all = weight[no_change_mask_all].sum()\n", + "zero_tax_returns_all = weight[zero_tax_mask_all].sum()\n", + "\n", + "results.append({\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est # Returns\": int(round(total_weight)),\n", + " \"Est % Returns\": \"100.0%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(total_old_avg))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(total_new_avg))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change_all)),\n", + " \"% Returns in Range with Change\": f\"{returns_with_change_all / total_weight * 100:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed_all))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed_all))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change_all))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change_amount))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns_all)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease_all))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease_all))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns_all)),\n", + " \"Tax Increase % in Range\": f\"{increase_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase_all))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase_all))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns_all)),\n", + " \"No Change % Returns\": f\"{no_change_returns_all / total_weight * 100:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns_all)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_returns_all / total_weight * 100:.1f}%\"\n", + "})\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Totals calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H.4216 - POLICYENGINE ANALYSIS (Test Dataset, 5.39% Top Rate)\n", + "====================================================================================================\n", + "\n", + "Total Returns: 2,705,849\n", + "General Fund Impact: $-92,710,912\n", + "\n", + "RFA Estimate: -$119,100,000\n", + "Difference: $26,389,088\n", + "Accuracy: 77.8%\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Display summary\n", + "print(\"=\"*100)\n", + "print(\"H.4216 - POLICYENGINE ANALYSIS (Test Dataset, 5.39% Top Rate)\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nTotal Returns: {int(total_weight):,}\")\n", + "print(f\"General Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(f\"\\nRFA Estimate: -$119,100,000\")\n", + "print(f\"Difference: ${total_change_amount - (-119100000):,.0f}\")\n", + "print(f\"Accuracy: {(1 - abs(total_change_amount - (-119100000)) / 119100000) * 100:.1f}%\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exported to: pe_h4216_test_analysis.csv\n" + ] + } + ], + "source": [ + "# Export to CSV in RFA format\n", + "df_results.to_csv('pe_h4216_test_analysis.csv', index=False)\n", + "print(\"Exported to: pe_h4216_test_analysis.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "KEY METRICS:\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Total Dollar Change\n", + " $0* 727881 26.9% $0 $0 $0\n", + " $1 to $10000 498186 18.4% $0 $0 $0\n", + " $10001 to $20000 233000 8.6% $0 $4 $847688\n", + " $20001 to $30000 171515 6.3% $40 $56 $2756262\n", + " $30001 to $40000 157010 5.8% $149 $135 $-2140517\n", + " $40001 to $50000 132402 4.9% $399 $302 $-12807614\n", + " $50001 to $75000 245406 9.1% $701 $584 $-28577564\n", + " $75001 to $100000 165713 6.1% $1452 $1290 $-26753744\n", + " $100001 to $150000 225396 8.3% $2929 $3149 $49609656\n", + " $150001 to $200000 42792 1.6% $5236 $5998 $32593342\n", + " $200001 to $300000 55391 2.0% $9952 $10461 $28205350\n", + " $300001 to $500000 32748 1.2% $16226 $15952 $-8944396\n", + "$500001 to $1000000 11418 0.4% $31912 $29991 $-21936012\n", + " Over $1000000 6993 0.3% $171527 $156431 $-105563360\n", + " Total 2705850 100.0% $1488 $1454 $-92710912\n" + ] + } + ], + "source": [ + "# Display key columns for quick comparison\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est # Returns\", \"Est % Returns\",\n", + " \"Old Avg Tax Liability\", \"New Avg Tax Liability\", \"Total Dollar Change\"\n", + "]\n", + "print(\"\\nKEY METRICS:\")\n", + "print(df_results[display_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-9", + "metadata": {}, + "source": [ + "## Side-by-Side Comparison with RFA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-10", + "metadata": {}, + "outputs": [], + "source": "# Load RFA data\nrfa_df = pd.read_csv('../rfa_h4216_analysis.csv')\n\ndef parse_dollar(val):\n if isinstance(val, str):\n return float(val.replace('$', '').replace(',', '').replace('%', ''))\n return val\n\ndef parse_pct(val):\n if isinstance(val, str):\n return float(val.replace('%', ''))\n return val\n\n# Create comparison\ncomparison = []\nfor idx, pe_row in df_results.iterrows():\n agi_range = pe_row['Federal AGI Range']\n rfa_match = rfa_df[rfa_df['Federal AGI Range'] == agi_range]\n \n pe_returns = pe_row['Est # Returns']\n pe_impact = parse_dollar(pe_row['Total Dollar Change'])\n \n if len(rfa_match) > 0:\n rfa_returns = rfa_match['Est # Returns'].values[0]\n rfa_impact = parse_dollar(rfa_match['Total Dollar Change'].values[0])\n else:\n rfa_returns = 0\n rfa_impact = 0\n \n comparison.append({\n 'AGI Range': agi_range,\n 'PE Returns': f\"{pe_returns:,}\",\n 'RFA Returns': f\"{rfa_returns:,}\" if rfa_returns else \"N/A\",\n 'PE Impact': f\"${pe_impact:,.0f}\",\n 'RFA Impact': f\"${rfa_impact:,.0f}\" if rfa_impact else \"N/A\",\n 'Diff': f\"${pe_impact - rfa_impact:+,.0f}\" if rfa_impact else \"N/A\"\n })\n\ncomparison_df = pd.DataFrame(comparison)\nprint(\"\\n\" + \"=\"*100)\nprint(\"POLICYENGINE (Test) vs RFA COMPARISON (5.39% Rate)\")\nprint(\"=\"*100)\nprint(comparison_df.to_string(index=False))\nprint(\"=\"*100)" + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "FULL POLICYENGINE ANALYSIS (RFA Format)\n", + "========================================================================================================================\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Returns with Tax Change % Returns in Range with Change Old Avg Tax (Changed) New Avg Tax (Changed) Avg Tax Change Total Dollar Change Tax Decrease # Returns Tax Decrease % in Range Total Decrease Amount Avg Decrease Amount Tax Increase # Returns Tax Increase % in Range Total Increase Amount Avg Increase Amount No Tax Change # Returns No Change % Returns Zero Tax # Returns Zero Tax % Returns\n", + " $0* 727881 26.9% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 727881 100.0% 727881 100.0%\n", + " $1 to $10000 498186 18.4% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 498186 100.0% 498186 100.0%\n", + " $10001 to $20000 233000 8.6% $0 $4 16527 7.1% $0 $51 $51 $847688 0 0.0% $0 $0 16527 7.1% $847510 $51 216473 92.9% 215471 92.5%\n", + " $20001 to $30000 171515 6.3% $40 $56 48979 28.6% $131 $187 $56 $2756262 2691 1.6% $-22726 $-8 46288 27.0% $2778858 $60 122536 71.4% 121168 70.6%\n", + " $30001 to $40000 157010 5.8% $149 $135 70118 44.7% $333 $302 $-31 $-2140517 45821 29.2% $-3836658 $-84 24298 15.5% $1696106 $70 86892 55.3% 86762 55.3%\n", + " $40001 to $50000 132402 4.9% $399 $302 95777 72.3% $548 $414 $-134 $-12807614 49802 37.6% $-16610524 $-334 45975 34.7% $3801858 $83 36624 27.7% 35193 26.6%\n", + " $50001 to $75000 245406 9.1% $701 $584 205399 83.7% $836 $697 $-139 $-28577564 139789 57.0% $-38644192 $-276 65610 26.7% $10066116 $153 40008 16.3% 39028 15.9%\n", + " $75001 to $100000 165713 6.1% $1452 $1290 163884 98.9% $1468 $1305 $-163 $-26753744 118394 71.4% $-40121352 $-339 45491 27.5% $13367631 $294 1829 1.1% 1759 1.1%\n", + " $100001 to $150000 225396 8.3% $2929 $3149 220578 97.9% $2991 $3216 $225 $49609656 92356 41.0% $-12660524 $-137 128222 56.9% $62270140 $486 4818 2.1% 4765 2.1%\n", + " $150001 to $200000 42792 1.6% $5236 $5998 42792 100.0% $5236 $5998 $762 $32593342 879 2.1% $-318522 $-362 41913 97.9% $32911862 $785 0 0.0% 0 0.0%\n", + " $200001 to $300000 55391 2.0% $9952 $10461 55391 100.0% $9952 $10461 $509 $28205350 3638 6.6% $-353202 $-97 51753 93.4% $28558554 $552 0 0.0% 0 0.0%\n", + " $300001 to $500000 32748 1.2% $16226 $15952 32748 100.0% $16226 $15952 $-273 $-8944396 23765 72.6% $-13389474 $-563 8983 27.4% $4445078 $495 0 0.0% 0 0.0%\n", + "$500001 to $1000000 11418 0.4% $31912 $29991 11417 100.0% $31913 $29992 $-1921 $-21936012 11383 99.7% $-22036886 $-1936 35 0.3% $100874 $2922 0 0.0% 0 0.0%\n", + " Over $1000000 6993 0.3% $171527 $156431 6993 100.0% $171530 $156433 $-15096 $-105563360 6959 99.5% $-111170816 $-15974 33 0.5% $5607453 $167720 0 0.0% 0 0.0%\n", + " Total 2705850 100.0% $1488 $1454 970603 35.9% $4147 $4051 $-96 $-92710912 495476 18.3% $-259164880 $-523 475127 17.6% $166452032 $350 1735247 64.1% 1730213 63.9%\n" + ] + } + ], + "source": [ + "# Full results table\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"FULL POLICYENGINE ANALYSIS (RFA Format)\")\n", + "print(\"=\"*120)\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "print(df_results.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/us/states/sc/h4216_analysis/h4216_analysis_comparison.md b/us/states/sc/h4216_analysis/h4216_analysis_comparison.md new file mode 100644 index 0000000..f274508 --- /dev/null +++ b/us/states/sc/h4216_analysis/h4216_analysis_comparison.md @@ -0,0 +1,286 @@ +# SC H.4216 Analysis: PolicyEngine vs RFA Comparison + +## Executive Summary + +This analysis compares PolicyEngine estimates against the SC Revenue and Fiscal Affairs (RFA) fiscal notes for H.4216, a tax reform bill that restructures South Carolina's income tax system. + +### Budget Impact Summary + +| Rate Option | RFA Estimate | State Dataset | Test Dataset | State Accuracy | Test Accuracy | +|-------------|--------------|---------------|--------------|----------------|---------------| +| **5.21%** | **-$308.7M** | -$393.0M | -$212.0M | 73% (27% over) | 69% (31% under) | +| **5.39%** | **-$119.1M** | -$198.2M | -$92.7M | 34% (66% over) | 78% (22% under) | + +**Key Finding:** The policy encoding is correct. All discrepancies stem from dataset characteristics, primarily the distribution of millionaire tax filers. + +--- + +## H.4216 Reform Structure + +### Current SC Tax System (Baseline) +- 0% on income up to $3,640 +- 3% on income $3,640 to $18,230 +- 6% on income over $18,230 +- Taxable income = Federal Taxable Income + SC Additions - SC Subtractions + +### H.4216 Reform +- 1.99% on income up to $30,000 +- 5.21% (bill default) or 5.39% (RFA version) on income over $30,000 +- Taxable income = AGI - SC Subtractions - SCIAD (new deduction) +- No federal standard/itemized deductions in base + +### SCIAD (SC Individual Adjustment Deduction) + +| Filing Status | Deduction | Phase-out Start | Phase-out End | +|---------------|-----------|-----------------|---------------| +| Single | $15,000 | $40,000 AGI | $95,000 AGI | +| MFJ | $30,000 | $80,000 AGI | $190,000 AGI | +| HoH | $22,500 | $60,000 AGI | $142,500 AGI | + +--- + +## Dataset Comparison + +### Overview + +| Metric | RFA | State (Production) | Test | +|--------|-----|-------------------|------| +| **Total Returns** | 2,757,573 | 2,935,621 (+6.5%) | 2,705,850 (-1.9%) | +| **Millionaire Returns** | 11,936 | 22,686 (+90%) | 6,993 (-41%) | +| **Baseline Revenue** | ~$6.4B | ~$6.5B | ~$4.0B | +| **Median HH AGI** | N/A | $43,222 | $34,927 | +| **Avg HH AGI** | N/A | $103,858 | $74,061 | +| **Max AGI** | N/A | $6.4M | $418.7M | + +### Dataset Paths +- **State (Production):** `hf://policyengine/policyengine-us-data/states/SC.h5` +- **Test:** `hf://policyengine/test/mar/SC.h5` + +--- + +## 5.21% Rate Analysis + +### Budget Impact by Income Bracket + +| AGI Range | RFA | State | Test | State vs RFA | Test vs RFA | +|-----------|-----|-------|------|--------------|-------------| +| $0* | -$671K | $0 | $0 | +$671K | +$671K | +| $1-$10K | +$1.7M | $0 | $0 | -$1.7M | -$1.7M | +| $10K-$20K | +$2.9M | +$2.7M | +$0.8M | -$0.2M | -$2.1M | +| $20K-$30K | +$0.8M | +$9.3M | +$2.8M | +$8.5M | +$2.0M | +| $30K-$40K | -$19.4M | -$5.4M | -$2.1M | +$14.0M | +$17.3M | +| $40K-$50K | -$42.6M | -$28.1M | -$12.9M | +$14.5M | +$29.7M | +| $50K-$75K | -$89.9M | -$30.1M | -$30.5M | +$59.8M | +$59.4M | +| $75K-$100K | -$48.6M | -$26.5M | -$31.6M | +$22.1M | +$17.0M | +| $100K-$150K | -$26.1M | +$17.9M | +$28.5M | +$44.0M | +$54.6M | +| $150K-$200K | +$23.8M | +$26.7M | +$24.6M | +$2.9M | +$0.8M | +| $200K-$300K | +$4.0M | +$10.3M | +$9.6M | +$6.3M | +$5.6M | +| $300K-$500K | -$32.1M | -$16.5M | -$26.0M | +$15.6M | +$6.1M | +| $500K-$1M | -$37.4M | -$20.3M | -$33.2M | +$17.1M | +$4.2M | +| **Over $1M** | **-$45.0M** | **-$332.9M** | **-$142.0M** | **-$287.9M** | **-$97.0M** | +| **TOTAL** | **-$308.7M** | **-$393.0M** | **-$212.0M** | **-$84.3M** | **+$96.7M** | + +### Winner/Loser Distribution (5.21%) + +| Metric | RFA | State | Test | +|--------|-----|-------|------| +| **Tax Decrease** | 42.8% | 23.8% | 20.6% | +| **Tax Increase** | 22.6% | 20.1% | 15.2% | +| **No Change** | 34.6% | 56.0% | 64.2% | +| **Total Decrease $** | -$522.1M | -$545.9M | -$345.7M | +| **Total Increase $** | +$213.4M | +$152.9M | +$133.7M | + +--- + +## 5.39% Rate Analysis + +### Budget Impact by Income Bracket + +| AGI Range | RFA | State | Test | State vs RFA | Test vs RFA | +|-----------|-----|-------|------|--------------|-------------| +| $0* | -$571K | $0 | $0 | +$571K | +$571K | +| $1-$10K | +$1.7M | $0 | $0 | -$1.7M | -$1.7M | +| $10K-$20K | +$2.9M | +$2.7M | +$0.8M | -$0.2M | -$2.1M | +| $20K-$30K | +$0.8M | +$9.3M | +$2.8M | +$8.5M | +$2.0M | +| $30K-$40K | -$19.4M | -$5.4M | -$2.1M | +$14.0M | +$17.3M | +| $40K-$50K | -$42.0M | -$27.8M | -$12.8M | +$14.2M | +$29.2M | +| $50K-$75K | -$82.1M | -$26.5M | -$28.6M | +$55.6M | +$53.5M | +| $75K-$100K | -$36.5M | -$19.1M | -$26.8M | +$17.4M | +$9.7M | +| $100K-$150K | +$3.1M | +$37.2M | +$49.6M | +$34.1M | +$46.5M | +| $150K-$200K | +$50.9M | +$42.0M | +$32.6M | -$8.9M | -$18.3M | +| $200K-$300K | +$36.7M | +$26.7M | +$28.2M | -$10.0M | -$8.5M | +| $300K-$500K | -$4.6M | +$4.3M | -$8.9M | +$8.9M | -$4.3M | +| $500K-$1M | -$16.2M | -$6.4M | -$21.9M | +$9.8M | -$5.7M | +| **Over $1M** | **-$13.8M** | **-$235.2M** | **-$105.6M** | **-$221.4M** | **-$91.8M** | +| **TOTAL** | **-$119.1M** | **-$198.2M** | **-$92.7M** | **-$79.1M** | **+$26.4M** | + +### Winner/Loser Distribution (5.39%) + +| Metric | RFA | State | Test | +|--------|-----|-------|------| +| **Tax Decrease** | 38.7% | 21.5% | 18.3% | +| **Tax Increase** | 26.7% | 22.5% | 17.6% | +| **No Change** | 34.6% | 56.0% | 64.1% | +| **Total Decrease $** | -$388.7M | -$396.2M | -$259.2M | +| **Total Increase $** | +$269.6M | +$197.9M | +$166.5M | + +--- + +## Root Cause Analysis + +### 1. Millionaire Distribution (Primary Driver) + +The millionaire bracket (>$1M AGI) is the dominant driver of discrepancies: + +| Metric | RFA | State | Test | +|--------|-----|-------|------| +| **Millionaire Count** | 11,936 | 22,686 (+90%) | 6,993 (-41%) | +| **5.21% Impact** | -$45.0M | -$332.9M | -$142.0M | +| **5.39% Impact** | -$13.8M | -$235.2M | -$105.6M | +| **Avg Change (5.21%)** | -$4,031 | -$14,672 | -$20,306 | + +**State Dataset:** Has nearly **double** the millionaires RFA reports. This alone accounts for ~$288M of the $84M overestimate at 5.21%. + +**Test Dataset:** Has 41% fewer millionaires but an extreme outlier ($418.7M AGI) that skews averages significantly. + +### 2. Middle-Income Brackets ($30K-$100K) + +RFA shows much larger tax cuts in middle-income brackets: + +| Bracket Range | RFA Impact | State Impact | Test Impact | +|---------------|------------|--------------|-------------| +| $30K-$100K combined | -$200.5M | -$90.1M | -$77.1M | +| Difference vs RFA | - | +$110.4M | +$123.4M | + +Both PE datasets underweight middle-income filers relative to RFA. + +### 3. Upper-Middle Income ($100K-$300K) + +PE shows tax **increases** where RFA shows mixed results: + +| Bracket Range | RFA Impact | State Impact | Test Impact | +|---------------|------------|--------------|-------------| +| $100K-$300K (5.21%) | -$22.1M | +$54.9M | +$62.7M | +| $100K-$300K (5.39%) | +$90.7M | +$105.9M | +$110.4M | + +This suggests SCIAD phase-out behavior may differ or income distributions within brackets vary. + +### 4. Low-Income Brackets ($0-$30K) + +| Bracket | RFA Returns | State Returns | Test Returns | +|---------|-------------|---------------|--------------| +| $0* | 78,854 (2.9%) | 619,010 (21.1%) | 727,881 (26.9%) | +| $1-$10K | 286,253 (10.4%) | 502,276 (17.1%) | 498,186 (18.4%) | + +PE datasets have significantly more zero/low-income tax units. These units have zero tax liability, so they don't affect budget impact but dilute the "% with tax change" statistics. + +--- + +## Summary of Dataset Characteristics + +### State (Production) Dataset +- **Overestimates** tax cuts at both rates +- Has 90% more millionaires than RFA +- Higher average incomes ($104K vs $74K Test) +- Baseline revenue matches RFA (~$6.5B) +- More total returns than RFA (+6.5%) + +### Test Dataset +- **Underestimates** tax cuts at both rates +- Has 41% fewer millionaires than RFA +- Lower average incomes ($74K) +- Baseline revenue 37% below RFA ($4.0B vs $6.4B) +- Return count close to RFA (-1.9%) +- Has extreme outlier ($418.7M AGI) + +### Ideal Dataset Would Have +- RFA's millionaire count (~11,936) +- RFA's return count (~2.76M) +- RFA's baseline revenue (~$6.4B) +- Middle-income weighting matching SC tax filer data + +--- + +## Recommendations + +### For Data Team +1. Investigate millionaire overcount in State dataset (22,686 vs 11,936 RFA) +2. Investigate baseline revenue undercount in Test dataset ($4.0B vs $6.4B) +3. Recalibrate weights to match SC DOR filer distribution by income bracket +4. Validate against IRS SOI data for SC + +### For Analysis +1. Report range of estimates from both datasets +2. Use State for directional analysis (correct baseline revenue magnitude) +3. Use Test for return count validation (closer to RFA) +4. Note millionaire bracket as primary source of uncertainty + +### For Reporting +| Rate | Conservative | Central | Aggressive | +|------|--------------|---------|------------| +| 5.21% | -$212M (Test) | -$309M (RFA) | -$393M (State) | +| 5.39% | -$93M (Test) | -$119M (RFA) | -$198M (State) | + +--- + +## File Structure + +``` +sc/ +├── data_exploration.ipynb # State dataset exploration +├── data_exploration_test.ipynb # Test dataset exploration +├── sc_dataset_summary_weighted.csv # State dataset summary stats +├── sc_test_dataset_summary_weighted.csv # Test dataset summary stats +└── h4216_analysis/ + ├── h4216_analysis_comparison.md # This file + ├── 5.21_rate/ + │ ├── rfa_h4216_5.21_analysis.csv # RFA fiscal note data + │ ├── state/ + │ │ ├── pe_h4216_5.21_state_analysis.csv + │ │ └── sc_h4216_5.21_state_analysis.ipynb + │ └── test/ + │ ├── pe_h4216_5.21_analysis.csv + │ └── sc_h4216_5.21_analysis.ipynb + └── 5.39_rate/ + ├── rfa_h4216_analysis.csv # RFA fiscal note data + ├── state/ + │ ├── pe_h4216_5.39_state_analysis.csv + │ └── sc_h4216_5.39_state_analysis.ipynb + └── test/ + ├── pe_h4216_test_analysis.csv + └── sc_h4216_test_analysis.ipynb +``` + +--- + +## Technical Notes + +### PR #7514 Fix (February 2025) + +Fixed bug where `sc_additions` (QBI and SALT addbacks) were incorrectly applied under H.4216. Since H.4216 starts from AGI (before federal deductions), addbacks are inappropriate. + +- **Before fix:** +$39.8M (wrong direction - showed revenue increase) +- **After fix:** -$93M to -$393M depending on dataset and rate + +### Policy Parameters Location +``` +policyengine-us/policyengine_us/parameters/gov/contrib/states/sc/h4216/ +``` + +### Microsimulation Usage +```python +from policyengine_us import Microsimulation +from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216 +from policyengine_core.reforms import Reform + +# Create reform with specific top rate +param_reform = Reform.from_dict({ + "gov.contrib.states.sc.h4216.in_effect": {"2026-01-01.2100-12-31": True}, + "gov.contrib.states.sc.h4216.rates[1].rate": {"2026-01-01.2100-12-31": 0.0521} +}, country_id="us") + +base_reform = create_sc_h4216() +reform = (base_reform, param_reform) + +sim = Microsimulation(dataset="hf://policyengine/test/mar/SC.h5", reform=reform) +``` diff --git a/us/states/sc/sc_dataset_summary_weighted.csv b/us/states/sc/sc_dataset_summary_weighted.csv new file mode 100644 index 0000000..6ff9465 --- /dev/null +++ b/us/states/sc/sc_dataset_summary_weighted.csv @@ -0,0 +1,22 @@ +Metric,Value +Household count (weighted),"1,887,388" +Person count (weighted),"5,451,832" +Average household size,2.9 +Weighted median household AGI,"$43,222" +Weighted average household AGI,"$103,858" +Weighted median person AGI,"$38,962" +Weighted average person AGI,"$93,926" +Unweighted median household AGI,"$41,884" +Unweighted median person AGI,"$40,216" +25th percentile household AGI,"$9,425" +75th percentile household AGI,"$91,877" +90th percentile household AGI,"$167,068" +95th percentile household AGI,"$268,311" +Max household AGI,"$6,430,892" +Total households with children,"598,564" +Households with 1 child,"247,956" +Households with 2 children,"190,545" +Households with 3+ children,"160,063" +Total children under 18,"1,198,147" +Children under 6,"349,101" +Children under 3,"169,412" diff --git a/us/states/sc/sc_test_dataset_summary_weighted.csv b/us/states/sc/sc_test_dataset_summary_weighted.csv new file mode 100644 index 0000000..e5cc13e --- /dev/null +++ b/us/states/sc/sc_test_dataset_summary_weighted.csv @@ -0,0 +1,22 @@ +Metric,Value +Household count (weighted),"1,844,111" +Person count (weighted),"5,389,226" +Average household size,2.9 +Weighted median household AGI,"$34,927" +Weighted average household AGI,"$74,061" +Weighted median person AGI,"$34,911" +Weighted average person AGI,"$78,962" +Unweighted median household AGI,"$57,308" +Unweighted median person AGI,"$58,750" +25th percentile household AGI,"$2,489" +75th percentile household AGI,"$86,301" +90th percentile household AGI,"$140,239" +95th percentile household AGI,"$236,759" +Max household AGI,"$418,650,960" +Total households with children,"663,513" +Households with 1 child,"303,647" +Households with 2 children,"209,804" +Households with 3+ children,"150,062" +Total children under 18,"1,247,050" +Children under 6,"361,890" +Children under 3,"177,869"