From a474c07b9a0293b9a4dd90099d8e24bbf62a35bd Mon Sep 17 00:00:00 2001
From: D-Pankey <30415217+D-Pankey@users.noreply.github.com>
Date: Tue, 3 Feb 2026 17:49:11 -0500
Subject: [PATCH 01/16] updated parse command to take multip le files
---
scripts/parse_cohort_files.py | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/scripts/parse_cohort_files.py b/scripts/parse_cohort_files.py
index 46f83c0..5640b15 100644
--- a/scripts/parse_cohort_files.py
+++ b/scripts/parse_cohort_files.py
@@ -83,17 +83,23 @@ def ci_tags_to_primary_ids(samples, file_group):
return primary_ids
-def parse_cohort_file(input_file, output_file, file_group="b54d035d-f63c-4ea8-86fb-9dbc976bb7fe"):
- # Parse cohort file
- samples = get_list_of_samples_from_cohort_file(input_file)
- # Convert from ciTags to primaryIds
- primary_ids = ci_tags_to_primary_ids(samples, file_group)
+import os
+
+def parse_cohort_files(input_directory, output_file):
+ all_samples = []
+
+ for file in os.listdir(input_directory):
+ if file.endswith(".txt"):
+ file_path = os.path.join(input_directory, file)
+ samples = get_list_of_samples_from_cohort_file(file_path)
+ all_samples.extend(samples)
+
+ # Write all samples to the output file
with open(output_file, "w") as f:
- for sample in primary_ids:
+ for sample in all_samples:
f.write(f"{sample}\n")
- print(f"File {output_file} successfully generated. Number of samples to run {len(primary_ids)}")
-
+ print(f"File {output_file} successfully generated. Number of samples to run {len(all_samples)}")
HELP = """USAGE:
python3 parse_cohort_files.py parse