PolicyEngine · baogorek · Feb 17, 2026 · Feb 17, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/.github/workflows/local_area_publish.yaml b/.github/workflows/local_area_publish.yaml
@@ -5,7 +5,7 @@ on:
   # push:
   #   branches: [main]
   #   paths:
-  #     - 'policyengine_us_data/datasets/cps/local_area_calibration/**'
+  #     - 'policyengine_us_data/calibration/**'
   #     - '.github/workflows/local_area_publish.yaml'
   #     - 'modal_app/**'
   # repository_dispatch:
@@ -24,7 +24,7 @@ on:
         type: boolean
 
 # Trigger strategy:
-# 1. Automatic: Code changes to local_area_calibration/ pushed to main
+# 1. Automatic: Code changes to calibration/ pushed to main
 # 2. repository_dispatch: Calibration workflow triggers after uploading new weights
 # 3. workflow_dispatch: Manual trigger with optional parameters
 
@@ -72,5 +72,60 @@ jobs:
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "Files have been uploaded to GCS and staged on HuggingFace." >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
-          echo "### Next step: Promote to production" >> $GITHUB_STEP_SUMMARY
-          echo "Trigger the **Promote Local Area H5 Files** workflow with the version from the build output." >> $GITHUB_STEP_SUMMARY
+          echo "### Next step: Validation runs automatically" >> $GITHUB_STEP_SUMMARY
+          echo "The validate-staging job will now check all staged H5s." >> $GITHUB_STEP_SUMMARY
+
+  validate-staging:
+    needs: publish-local-area
+    runs-on: ubuntu-latest
+    env:
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Validate staged H5s
+        run: |
+          uv run python -m policyengine_us_data.calibration.validate_staging \
+            --area-type states --output validation_results.csv
+
+      - name: Upload validation results to HF
+        run: |
+          uv run python -c "
+          from policyengine_us_data.utils.huggingface import upload
+          upload('validation_results.csv',
+                 'policyengine/policyengine-us-data',
+                 'calibration/logs/validation_results.csv')
+          "
+
+      - name: Post validation summary
+        if: always()
+        run: |
+          echo "## Validation Results" >> $GITHUB_STEP_SUMMARY
+          if [ -f validation_results.csv ]; then
+            TOTAL=$(tail -n +2 validation_results.csv | wc -l)
+            FAILS=$(grep -c ',FAIL,' validation_results.csv || true)
+            echo "- **${TOTAL}** targets validated" >> $GITHUB_STEP_SUMMARY
+            echo "- **${FAILS}** sanity failures" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "Review in dashboard, then trigger **Promote** workflow." >> $GITHUB_STEP_SUMMARY
+          else
+            echo "Validation did not produce output." >> $GITHUB_STEP_SUMMARY
+          fi
+
+      - name: Upload validation artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: validation-results
+          path: validation_results.csv
diff --git a/.gitignore b/.gitignore
@@ -30,12 +30,12 @@ docs/.ipynb_checkpoints/
 ## ACA PTC state-level uprating factors
 !policyengine_us_data/storage/aca_ptc_multipliers_2022_2024.csv
 
-## Raw input cache for database pipeline
-policyengine_us_data/storage/calibration/raw_inputs/
+## Calibration run outputs (weights, diagnostics, packages, config)
+policyengine_us_data/storage/calibration/
 
 ## Batch processing checkpoints
 completed_*.txt
 
 ## Test fixtures
-!policyengine_us_data/tests/test_local_area_calibration/test_fixture_50hh.h5
+!policyengine_us_data/tests/test_calibration/test_fixture_50hh.h5
 oregon_ctc_analysis.py
diff --git a/Makefile b/Makefile
@@ -1,4 +1,12 @@
-.PHONY: all format test install download upload docker documentation data validate-data calibrate publish-local-area clean build paper clean-paper presentations database database-refresh promote-database promote-dataset
+.PHONY: all format test install download upload docker documentation data validate-data calibrate calibrate-build publish-local-area upload-calibration upload-dataset upload-database push-to-modal build-matrices calibrate-modal calibrate-modal-national calibrate-both stage-h5s stage-national-h5 stage-all-h5s pipeline validate-staging validate-staging-full upload-validation check-staging check-sanity clean build paper clean-paper presentations database database-refresh promote-database promote-dataset promote build-h5s validate-local
+
+GPU ?= A100-80GB
+EPOCHS ?= 200
+NATIONAL_GPU ?= T4
+NATIONAL_EPOCHS ?= 200
+BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
+NUM_WORKERS ?= 8
+VERSION ?=
 
 HF_CLONE_DIR ?= $(HOME)/huggingface/policyengine-us-data
 
@@ -79,8 +87,8 @@ promote-database:
 	@echo "Copied DB and raw_inputs to HF clone. Now cd to HF repo, commit, and push."
 
 promote-dataset:
-	cp policyengine_us_data/storage/stratified_extended_cps_2024.h5 \
-		$(HF_CLONE_DIR)/calibration/stratified_extended_cps.h5
+	cp policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5 \
+		$(HF_CLONE_DIR)/calibration/source_imputed_stratified_extended_cps.h5
 	@echo "Copied dataset to HF clone. Now cd to HF repo, commit, and push."
 
 data: download
@@ -90,20 +98,146 @@ data: download
 	python policyengine_us_data/datasets/puf/irs_puf.py
 	python policyengine_us_data/datasets/puf/puf.py
 	python policyengine_us_data/datasets/cps/extended_cps.py
+	python policyengine_us_data/calibration/create_stratified_cps.py
+	python policyengine_us_data/calibration/create_source_imputed_cps.py
+
+data-legacy: data
 	python policyengine_us_data/datasets/cps/enhanced_cps.py
 	python policyengine_us_data/datasets/cps/small_enhanced_cps.py
-	python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
 
 calibrate: data
 	python -m policyengine_us_data.calibration.unified_calibration \
-		--puf-dataset policyengine_us_data/storage/puf_2024.h5
+		--target-config policyengine_us_data/calibration/target_config.yaml
+
+calibrate-build: data
+	python -m policyengine_us_data.calibration.unified_calibration \
+		--target-config policyengine_us_data/calibration/target_config.yaml \
+		--build-only
+
+validate-package:
+	python -m policyengine_us_data.calibration.validate_package
 
 publish-local-area:
-	python policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py
+	python policyengine_us_data/calibration/publish_local_area.py --upload
+
+build-h5s:
+	python -m policyengine_us_data.calibration.publish_local_area \
+		--weights-path policyengine_us_data/storage/calibration/calibration_weights.npy \
+		--dataset-path policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5 \
+		--n-clones 430 \
+		--seed 42 \
+		--states-only
+
+validate-local:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--hf-prefix local_area_build \
+		--area-type states --output validation_results.csv
 
 validate-data:
 	python -c "from policyengine_us_data.storage.upload_completed_datasets import validate_all_datasets; validate_all_datasets()"
 
+upload-calibration:
+	python -c "from policyengine_us_data.utils.huggingface import upload_calibration_artifacts; \
+		upload_calibration_artifacts()"
+
+upload-dataset:
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5', \
+		'policyengine/policyengine-us-data', \
+		'calibration/source_imputed_stratified_extended_cps.h5')"
+	@echo "Dataset uploaded to HF."
+
+upload-database:
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('policyengine_us_data/storage/calibration/policy_data.db', \
+		'policyengine/policyengine-us-data', \
+		'calibration/policy_data.db')"
+	@echo "Database uploaded to HF."
+
+push-to-modal:
+	modal volume put local-area-staging \
+		policyengine_us_data/storage/calibration/calibration_weights.npy \
+		calibration_inputs/calibration/calibration_weights.npy --force
+	modal volume put local-area-staging \
+		policyengine_us_data/storage/calibration/stacked_blocks.npy \
+		calibration_inputs/calibration/stacked_blocks.npy --force
+	modal volume put local-area-staging \
+		policyengine_us_data/storage/calibration/stacked_takeup.npz \
+		calibration_inputs/calibration/stacked_takeup.npz --force
+	modal volume put local-area-staging \
+		policyengine_us_data/storage/calibration/policy_data.db \
+		calibration_inputs/calibration/policy_data.db --force
+	modal volume put local-area-staging \
+		policyengine_us_data/storage/calibration/geo_labels.json \
+		calibration_inputs/calibration/geo_labels.json --force
+	modal volume put local-area-staging \
+		policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5 \
+		calibration_inputs/calibration/source_imputed_stratified_extended_cps.h5 --force
+	@echo "All calibration inputs pushed to Modal volume."
+
+build-matrices:
+	modal run modal_app/remote_calibration_runner.py::build_package \
+		--branch $(BRANCH)
+
+calibrate-modal:
+	modal run modal_app/remote_calibration_runner.py::main \
+		--branch $(BRANCH) --gpu $(GPU) --epochs $(EPOCHS) \
+		--push-results
+
+calibrate-modal-national:
+	modal run modal_app/remote_calibration_runner.py::main \
+		--branch $(BRANCH) --gpu $(NATIONAL_GPU) \
+		--epochs $(NATIONAL_EPOCHS) \
+		--push-results --national
+
+calibrate-both:
+	$(MAKE) calibrate-modal & $(MAKE) calibrate-modal-national & wait
+
+stage-h5s:
+	modal run modal_app/local_area.py::main \
+		--branch $(BRANCH) --num-workers $(NUM_WORKERS) \
+		$(if $(SKIP_DOWNLOAD),--skip-download)
+
+stage-national-h5:
+	modal run modal_app/local_area.py::main_national \
+		--branch $(BRANCH)
+
+stage-all-h5s:
+	$(MAKE) stage-h5s & $(MAKE) stage-national-h5 & wait
+
+promote:
+	$(eval VERSION := $(or $(VERSION),$(shell python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")))
+	modal run modal_app/local_area.py::main_promote \
+		--branch $(BRANCH) --version $(VERSION)
+
+validate-staging:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--area-type states --output validation_results.csv
+
+validate-staging-full:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--area-type states,districts --output validation_results.csv
+
+upload-validation:
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('validation_results.csv', \
+		'policyengine/policyengine-us-data', \
+		'calibration/logs/validation_results.csv')"
+
+check-staging:
+	python -m policyengine_us_data.calibration.check_staging_sums
+
+check-sanity:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--sanity-only --area-type states --areas NC
+
+pipeline: data upload-dataset build-matrices calibrate-both stage-all-h5s
+	@echo ""
+	@echo "========================================"
+	@echo "Pipeline complete. H5s are in HF staging."
+	@echo "Run 'Promote Local Area H5 Files' workflow in GitHub to publish."
+	@echo "========================================"
+
 clean:
 	rm -f policyengine_us_data/storage/*.h5
 	rm -f policyengine_us_data/storage/*.db

diff --git a/changelog.d/add-database-build-test.added.md b/changelog.d/add-database-build-test.added.md
@@ -0,0 +1 @@
+Add end-to-end test for calibration database build pipeline.
diff --git a/changelog.d/calibration-pipeline-improvements.added.md b/changelog.d/calibration-pipeline-improvements.added.md
@@ -0,0 +1,8 @@
+Unified calibration pipeline with GPU-accelerated L1/L0 solver, target config YAML, and CLI package validator.
+Per-state and per-county precomputation replacing per-clone Microsimulation (51 sims instead of 436).
+Parallel state, county, and clone loop processing via ProcessPoolExecutor.
+Block-level takeup re-randomization with deterministic seeded draws.
+Hierarchical uprating with ACA PTC state-level CSV factors and CD reconciliation.
+Modal remote runner with Volume support, CUDA OOM fixes, and checkpointing.
+Stacked dataset builder with sparse CD subsets and calibration block propagation.
+Staging validation script (validate_staging.py) with sim.calculate() comparison and sanity checks.
diff --git a/changelog.d/calibration-pipeline-improvements.changed.md b/changelog.d/calibration-pipeline-improvements.changed.md
@@ -0,0 +1,3 @@
+Geography assignment now prevents clone-to-CD collisions.
+County-dependent vars (aca_ptc) selectively precomputed per county; other vars use state-only path.
+Target config switched to finest-grain include mode (~18K targets).
diff --git a/changelog.d/calibration-pipeline-improvements.fixed.md b/changelog.d/calibration-pipeline-improvements.fixed.md
@@ -0,0 +1,3 @@
+Cross-state cache pollution in matrix builder precomputation.
+Takeup draw ordering mismatch between matrix builder and stacked builder.
+At-large district geoid mismatch (7 districts had 0 estimates).
diff --git a/changelog.d/migrate-to-towncrier.changed.md b/changelog.d/migrate-to-towncrier.changed.md
@@ -0,0 +1 @@
+Migrated from changelog_entry.yaml to towncrier fragments to eliminate merge conflicts.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add end-to-end test for calibration database build pipeline.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Migrated from changelog_entry.yaml to towncrier fragments to eliminate merge conflicts.