From dd80f909434fb864e1737dc2dcb7ec0f888cf140 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 11:41:26 -0500 Subject: [PATCH 01/11] Added pipeline update cache method --- beaglecli | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/beaglecli b/beaglecli index 8357061..9946edf 100755 --- a/beaglecli +++ b/beaglecli @@ -75,6 +75,7 @@ Usage: beaglecli files patch [--file-path=] [--file-type=] [--file-group=] [--metadata=]... [--size=] beaglecli files list [--page-size=] [--path=]... [--metadata=]... [--file-group=]... [--file-name=]... [--filename-regex=] [--file-type=]... [--all]... [--packaged]... [--force]... beaglecli files delete --file-id=... + beaglecli pipeline update-cache beaglecli sample create beaglecli sample list [--sample-id=] beaglecli sample redact [--value=] @@ -157,6 +158,11 @@ def files_commands(arguments, config): return _patch_file(arguments, config) +def pipeline_commands(arguments, config): + if arguments.get('update-cache'): + return _update_cache(arguments, config) + + def storage_commands(arguments, config): if arguments.get('list'): return _list_storage(arguments, config) @@ -221,6 +227,8 @@ def sample_commands(arguments, config): def command(arguments, config): if arguments.get('files'): return files_commands(arguments, config) + if arguments.get('pipeline'): + return pipeline_commands(arguments, config) if arguments.get('storage'): return storage_commands(arguments, config) if arguments.get('file-types'): @@ -351,6 +359,33 @@ def _get_latest_runs(run_dict): return run_list +def _get_cwl_apps(): + url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) + params = dict() + params['page_size'] = 1000000 + response = requests.get(url, headers={ + 'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'}, params=params) + cwl_set = set() + if response.ok: + response_json = response.json() + if "results" in response_json: + result_list = response_json["results"] + for single_pipeline in result_list: + pipeline_type = single_pipeline["pipeline_type"] + if pipeline_type == 0: + github = single_pipeline["github"] + version = single_pipeline["version"] + entrypoint = single_pipeline["entrypoint"] + cwl_set.add((github, version, entrypoint)) + return cwl_set + else: + print("Error: beagle returned an empty") + exit(1) + else: + print("ERROR: Could not retrieve app list") + exit(1) + + def _get_apps_dict(): url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) params = dict() @@ -712,6 +747,14 @@ def _create_sample(arguments, config): response_json = json.dumps(response.json(), indent=4) return response_json +# Pipeline + + +def _update_cache(arguments, config): + cache_path = arguments.get('') + cwl_apps = _get_cwl_apps() + print(cwl_apps) + # Update From 9f7eec82f2cf85283db1f39966d8f781e4807773 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 11:41:49 -0500 Subject: [PATCH 02/11] Fixed formatting --- beaglecli | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/beaglecli b/beaglecli index 9946edf..edb15ba 100755 --- a/beaglecli +++ b/beaglecli @@ -630,9 +630,10 @@ def _list_files(arguments, config): params['filename_regex'] = filename_regex params['file_type'] = file_type if all_pages: - count_params = params - count_params['count'] = True - params['page_size'] = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={'Authorization': 'Bearer %s' % config.token}, params=count_params).json()['count'] + count_params = params + count_params['count'] = True + params['page_size'] = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ + 'Authorization': 'Bearer %s' % config.token}, params=count_params).json()['count'] response = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ 'Authorization': 'Bearer %s' % config.token}, params=params) response_json = json.dumps(response.json(), indent=4) @@ -1002,6 +1003,7 @@ def _redact_sample(arguments, config): response_json = json.dumps(response.json(), indent=4) return response_json + if __name__ == '__main__': config = Config.load() authenticate_command(config) From 68aa2f8e7e64413e116cd30833fd10df056c5873 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 11:45:01 -0500 Subject: [PATCH 03/11] Updated requirements for updating cache --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 6cf06d1..67e10ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ docopt==0.6.2 requests==2.22.0 +git+https://github.com/mskcc/cwl-utils.git pandas \ No newline at end of file From 54dcbb0284d609ff1f7ed7796d46c8afb32de70b Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 12:16:21 -0500 Subject: [PATCH 04/11] Added gitpython for updating pipeline cache --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 67e10ff..0ec9025 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ docopt==0.6.2 requests==2.22.0 git+https://github.com/mskcc/cwl-utils.git +GitPython==3.0.8 pandas \ No newline at end of file From 104c392a42ef8a9de1cd67236736fe12d2f8a1cd Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 14:02:37 -0500 Subject: [PATCH 05/11] Added command to update the cwl cache --- beaglecli | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/beaglecli b/beaglecli index edb15ba..a7c8d75 100755 --- a/beaglecli +++ b/beaglecli @@ -27,6 +27,10 @@ from urllib.parse import urljoin from datetime import datetime import traceback import csv +import git +import subprocess +import re +import tempfile from apps.access import access_commands from apps.cmoch import cmoch_commands @@ -754,7 +758,20 @@ def _create_sample(arguments, config): def _update_cache(arguments, config): cache_path = arguments.get('') cwl_apps = _get_cwl_apps() - print(cwl_apps) + num_cwl_apps = len(cwl_apps) + current_cwl_app = 0 + find_pipeline_folder = re.compile("Cloning into '(\S+)'") + for repo, version, cwl in cwl_apps: + with tempfile.TemporaryDirectory() as tmpDir: + print("Working on cwl {} of {}".format( + current_cwl_app, num_cwl_apps)) + git_repo = git.Git(tmpDir).clone( + repo, "--branch", version, "--recurse-submodules") + pipeline_folder = find_pipeline_folder.match(git_repo).group(1) + cwl_path = os.path.joins(tmpDir, pipeline_folder, cwl) + cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( + cache_path, cwl_path) + subprocess.check_output(cache_command, shell=True) # Update From a54cd759750ed92bcad5bfa4b40552f1ce3665d1 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 14:04:39 -0500 Subject: [PATCH 06/11] A few fixes on the update cache function --- beaglecli | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beaglecli b/beaglecli index a7c8d75..569260e 100755 --- a/beaglecli +++ b/beaglecli @@ -765,10 +765,11 @@ def _update_cache(arguments, config): with tempfile.TemporaryDirectory() as tmpDir: print("Working on cwl {} of {}".format( current_cwl_app, num_cwl_apps)) + current_cwl_app += 1 git_repo = git.Git(tmpDir).clone( repo, "--branch", version, "--recurse-submodules") pipeline_folder = find_pipeline_folder.match(git_repo).group(1) - cwl_path = os.path.joins(tmpDir, pipeline_folder, cwl) + cwl_path = os.path.join(tmpDir, pipeline_folder, cwl) cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( cache_path, cwl_path) subprocess.check_output(cache_command, shell=True) From 59d96319d07de6ae57c7e83461cdd30949c2286c Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 14:35:22 -0500 Subject: [PATCH 07/11] Made update cache more robust --- beaglecli | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/beaglecli b/beaglecli index 569260e..81d47dd 100755 --- a/beaglecli +++ b/beaglecli @@ -770,10 +770,13 @@ def _update_cache(arguments, config): repo, "--branch", version, "--recurse-submodules") pipeline_folder = find_pipeline_folder.match(git_repo).group(1) cwl_path = os.path.join(tmpDir, pipeline_folder, cwl) - cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( - cache_path, cwl_path) - subprocess.check_output(cache_command, shell=True) - + if os.path.exists(cwl_path): + cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( + cache_path, cwl_path) + subprocess.check_output(cache_command, shell=True) + else: + print("Malformed pipeline: Could not find cwl: {} in {} version {}".format( + cwl, repo, version)) # Update From 0b7c2bb1047598489cc1245165ee4d57d2b1512c Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 14:41:02 -0500 Subject: [PATCH 08/11] More robust handling of git repos --- beaglecli | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/beaglecli b/beaglecli index 81d47dd..3baaa46 100755 --- a/beaglecli +++ b/beaglecli @@ -766,8 +766,11 @@ def _update_cache(arguments, config): print("Working on cwl {} of {}".format( current_cwl_app, num_cwl_apps)) current_cwl_app += 1 - git_repo = git.Git(tmpDir).clone( - repo, "--branch", version, "--recurse-submodules") + try: + git_repo = git.Git(tmpDir).clone( + repo, "--branch", version, "--recurse-submodules") + except git.exc.GitCommandError: + print("Could not find repo {} version {}".format(repo, version)) pipeline_folder = find_pipeline_folder.match(git_repo).group(1) cwl_path = os.path.join(tmpDir, pipeline_folder, cwl) if os.path.exists(cwl_path): From 7981b08331280197d98ca7b803c4e0c8989640b8 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 15:04:46 -0500 Subject: [PATCH 09/11] Made docker extract step more robust --- beaglecli | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/beaglecli b/beaglecli index 3baaa46..15a5f57 100755 --- a/beaglecli +++ b/beaglecli @@ -776,7 +776,11 @@ def _update_cache(arguments, config): if os.path.exists(cwl_path): cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( cache_path, cwl_path) - subprocess.check_output(cache_command, shell=True) + try: + subprocess.check_output(cache_command, shell=True) + except: + print("Malformed CWL: CWL might not be valid: {}".format( + current_cwl_app)) else: print("Malformed pipeline: Could not find cwl: {} in {} version {}".format( cwl, repo, version)) From 17cbe1522928f2a4d3216bdae7c34baa240028d3 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Mon, 9 Jan 2023 15:07:56 -0500 Subject: [PATCH 10/11] Fixed typo --- beaglecli | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beaglecli b/beaglecli index 15a5f57..649e93b 100755 --- a/beaglecli +++ b/beaglecli @@ -779,8 +779,8 @@ def _update_cache(arguments, config): try: subprocess.check_output(cache_command, shell=True) except: - print("Malformed CWL: CWL might not be valid: {}".format( - current_cwl_app)) + print("Malformed CWL: CWL {} for repo {} version {} might not be valid".format( + cwl, repo, version)) else: print("Malformed pipeline: Could not find cwl: {} in {} version {}".format( cwl, repo, version)) From 6e72057dcd5443c629ff8cc116b891acacb23006 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 19 Jan 2023 13:43:24 -0500 Subject: [PATCH 11/11] Updated error message --- beaglecli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beaglecli b/beaglecli index 649e93b..8c52cdc 100755 --- a/beaglecli +++ b/beaglecli @@ -383,7 +383,7 @@ def _get_cwl_apps(): cwl_set.add((github, version, entrypoint)) return cwl_set else: - print("Error: beagle returned an empty") + print("Error: beagle returned an empty response") exit(1) else: print("ERROR: Could not retrieve app list")