dbpedia · Tahoora-Tabassum · Dec 23, 2025 · Dec 23, 2025 · Dec 26, 2025 · Dec 4, 2025
diff --git a/PR_BODY.md b/PR_BODY.md
@@ -0,0 +1,19 @@
+Title: Add verbose CLI flag (-v) with redacted HTTP logging
+
+Short description:
+- Add a global `-v/--verbose` CLI flag to enable redacted HTTP request/response logging to help debug interactions with the Databus and Vault.
+
+What changed:
+- Add global `-v/--verbose` option to `databusclient` CLI and propagate it to API calls.
+- Implement redacted HTTP logging helper (redacts `Authorization` and `X-API-KEY` headers).
+- Instrument `download` and Vault token exchange flows to print HTTP request/response details when `-v` is enabled.
+- Add unit tests ensuring verbose logs are printed and sensitive tokens are redacted.
+- Update `README.md` and add a `CHANGELOG.md` entry.
+
+Why:
+- Provides safe, actionable debugging output for issues involving HTTP communication and auth problems without exposing secrets.
+
+Security note:
+- Authorization and API-key headers are redacted in verbose output. Avoid enabling verbose output in public CI logs.
+
+Closes #27
diff --git a/databusclient/api/download.py b/databusclient/api/download.py
@@ -203,7 +203,6 @@ class DownloadAuthError(Exception):
     """Raised when an authorization problem occurs during download."""
 
 
-
 def _extract_checksums_from_jsonld(json_str: str) -> dict:
     """
     Parse a JSON-LD string and return a mapping of file URI (and @id) -> checksum.
@@ -466,7 +465,6 @@ def _download_file(
         target_filepath = os.path.join(localDir, target_filename)
         _convert_compression_format(filename, target_filepath, source_format, convert_to)
 
-
 def _download_files(
     urls: List[str],
     localDir: str,
@@ -679,7 +677,6 @@ def _download_collection(
     checksums: dict = {}
     if validate_checksum:
         checksums = _resolve_checksums_for_urls(list(file_urls), databus_key)
-
     _download_files(
         list(file_urls),
         localDir,
@@ -1010,6 +1007,7 @@ def download(
                         print(f"WARNING: Could not fetch checksum for single file: {e}")
 
                 # Call the worker to download the single file (passes expected checksum)
+
                 _download_file(
                     databusURI,
                     localDir,

diff --git a/databusclient/cli.py b/databusclient/cli.py
@@ -4,7 +4,6 @@
 from typing import List
 
 import click
-
 import databusclient.api.deploy as api_deploy
 from databusclient.api.delete import delete as api_delete
 from databusclient.api.download import download as api_download, DownloadAuthError
@@ -244,5 +243,51 @@ def delete(databusuris: List[str], databus_key: str, dry_run: bool, force: bool)
     )
 
 
+@app.command()
+@click.argument("url")
+@click.option("--cv", "cvs", multiple=True, help="Content variant like key=value (repeatable). Keys must not contain '|' or '_'")
+@click.option("--format", "file_format", help="Format extension (e.g. ttl)")
+@click.option("--compression", help="Compression (e.g. gzip)")
+@click.option("--sha-length", help="sha256:length (64 hex chars followed by ':' and integer length)")
+@click.option("--json-output", is_flag=True, help="Output JSON distribution object instead of plain string")
+def mkdist(url, cvs, file_format, compression, sha_length, json_output):
+    """Create a distribution string from components."""
+    # Validate CVs
+    cvs_dict = {}
+    for cv in cvs:
+        if "=" not in cv:
+            raise click.BadParameter(f"Invalid content variant '{cv}': expected key=value")
+        key, val = cv.split("=", 1)
+        if any(ch in key for ch in ("|", "_")):
+            raise click.BadParameter("Invalid characters in content-variant key (forbidden: '|' and '_')")
+        if key in cvs_dict:
+            raise click.BadParameter(f"Duplicate content-variant key '{key}'")
+        cvs_dict[key] = val
+
+    # Validate sha-length
+    sha_tuple = None
+    if sha_length:
+        if not re.match(r'^[A-Fa-f0-9]{64}:\d+$', sha_length):
+            raise click.BadParameter("Invalid --sha-length; expected SHA256HEX:length")
+        sha, length = sha_length.split(":", 1)
+        sha_tuple = (sha, int(length))
+
+    # Deterministic ordering
+    sorted_cvs = {k: cvs_dict[k] for k in sorted(cvs_dict)}
+
+    dist = api_deploy.create_distribution(url=url, cvs=sorted_cvs, file_format=file_format, compression=compression, sha256_length_tuple=sha_tuple)
+    if json_output:
+        import json as _json
+        click.echo(_json.dumps({"distribution": dist}))
+    else:
+        click.echo(dist)
+
+
+@app.command()
+@click.argument("shell", type=click.Choice(["bash","zsh","fish","powershell"]), required=False)
+def completion(shell="bash"):
+    click.echo(f"Run: eval \"$(_DATABUSCLIENT_COMPLETE=source_{shell} python -m databusclient)\"")
-@app.command()
-@click.argument("shell", type=click.Choice(["bash","zsh","fish","powershell"]), required=False)
-def completion(shell="bash"):
-    click.echo(f"Run: eval \"$(_DATABUSCLIENT_COMPLETE=source_{shell} python -m databusclient)\"")
+`@app.command`()
+`@click.argument`("shell", type=click.Choice(["bash","zsh","fish","powershell"]), default="bash")
+def completion(shell):
+    click.echo(f"Run: eval \"$(_DATABUSCLIENT_COMPLETE=source_{shell} python -m databusclient)\"")
-@app.command()
-@click.argument("shell", type=click.Choice(["bash","zsh","fish","powershell"]), required=False)
-def completion(shell="bash"):
-    click.echo(f"Run: eval \"$(_DATABUSCLIENT_COMPLETE=source_{shell} python -m databusclient)\"")
+`@app.command`()
+`@click.argument`("shell", type=click.Choice(["bash","zsh","fish","powershell"]), default="bash")
+def completion(shell):
+    click.echo(f"Run: eval \"$(_DATABUSCLIENT_COMPLETE=source_{shell} python -m databusclient)\"")
+
+
 if __name__ == "__main__":
     app()
diff --git a/file.txt b/file.txt
diff --git a/test.sh b/test.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 databusclient deploy \
-	--version-id "https://d8lr.tools.dbpedia.org/hopver/testGroup/testArtifact/1.0-alpha/" \
+	--versionid "https://d8lr.tools.dbpedia.org/hopver/testGroup/testArtifact/1.0-alpha/" \
 	--title "Test Title" \
 	--abstract "Test Abstract" \
 	--description "Test Description" \

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,42 @@
+from click.testing import CliRunner
+from databusclient import cli
+
+
+def test_mkdist_multiple_cv():
+    runner = CliRunner()
+    sha = 'a' * 64
+    res = runner.invoke(cli.app, [
+        'mkdist',
+        'https://example.org/file',
+        '--cv', 'b=2',
+        '--cv', 'a=1',
+        '--format', 'ttl',
+        '--compression', 'gz',
+        '--sha-length', f'{sha}:42'
+    ])
+    assert res.exit_code == 0, res.output
+    # keys should be sorted alphabetically: a then b
+    assert res.output.strip() == f'https://example.org/file|a=1_b=2|ttl|gz|{sha}:42'
+
+
+def test_mkdist_invalid_cv():
+    runner = CliRunner()
+    res = runner.invoke(cli.app, ['mkdist', 'https://example.org/file', '--cv', 'badcv'])
+    assert res.exit_code != 0
+    assert 'Invalid content variant' in res.output
+
+
+def test_mkdist_invalid_sha():
+    runner = CliRunner()
+    res = runner.invoke(cli.app, [
+        'mkdist', 'https://example.org/file', '--cv', 'k=v', '--sha-length', 'abc:123'
+    ])
+    assert res.exit_code != 0
+    assert 'Invalid --sha-length' in res.output
+
+
+def test_completion_output():
+    runner = CliRunner()
+    res = runner.invoke(cli.app, ['completion', 'bash'])
+    assert res.exit_code == 0
+    assert '_DATABUSCLIENT_COMPLETE' in res.output
diff --git a/tests/test_cli_verbose.py b/tests/test_cli_verbose.py
@@ -0,0 +1,38 @@
+from click.testing import CliRunner
+from unittest.mock import Mock, patch
+
+import databusclient.cli as cli
+
+
+# CLI-level integration test for -v flag
+def test_cli_download_verbose_logs_redacted(caplog):
+    caplog.set_level("DEBUG", logger="databusclient")
+    runner = CliRunner()
+
+    # Prepare mocked HTTP responses
+    resp_head_401 = Mock()
+    resp_head_401.status_code = 401
+    resp_head_401.headers = {}
+
+    resp_head_200 = Mock()
+    resp_head_200.status_code = 200
+    resp_head_200.headers = {}
+
+    resp_get = Mock()
+    resp_get.status_code = 200
+    resp_get.headers = {"content-length": "0"}
+    resp_get.iter_content = lambda chunk: iter([])
+
+    # Initial HEAD returns 401 so client uses --databus-key header on retry
+    with patch("requests.head", side_effect=[resp_head_401, resp_head_200]), patch(
+        "requests.get", return_value=resp_get
+    ):
+        # Run CLI with verbose flag and databus key (so X-API-KEY will be redacted in logs)
+        target = "https://example.com/account/group/artifact/1/file.txt"
+        res = runner.invoke(cli.app, ["-v", "download", target, "--localdir", ".", "--databus-key", "SECRET"]) 
+
+    assert res.exit_code == 0, res.output
+    # Should log HTTP activity and redact secret (captured by caplog)
+    assert "[HTTP]" in caplog.text
+    assert "REDACTED" in caplog.text
+    assert "SECRET" not in caplog.text
diff --git a/tests/test_download.py b/tests/test_download.py
@@ -23,10 +23,10 @@
 )
 
 
+@pytest.mark.skip(reason="Integration test: requires live databus.dbpedia.org connection")
 def test_with_query():
     api_download("tmp", DEFAULT_ENDPOINT, [TEST_QUERY])
 
-
 @pytest.mark.skip(reason="Live collection download is long-running and flakes on network timeouts")
 @pytest.mark.skip(reason="Integration test: requires live databus.dbpedia.org connection")
 def test_with_collection():

diff --git a/tests/test_download_auth.py b/tests/test_download_auth.py
@@ -3,11 +3,13 @@
 import pytest
 
 import requests
-
+import logging
 import databusclient.api.download as dl
 
 from databusclient.api.download import VAULT_REQUIRED_HOSTS, DownloadAuthError
+from unittest.mock import patch
 
+from databusclient.api.download import download, DownloadAuthError
-from unittest.mock import patch
-
-from databusclient.api.download import download, DownloadAuthError
-from unittest.mock import patch
-
-from databusclient.api.download import download, DownloadAuthError
 
 def make_response(status=200, headers=None, content=b""):
     headers = headers or {}
@@ -102,3 +104,32 @@ def test_403_reports_insufficient_permissions():
             dl._download_file(url, localDir='.', vault_token_file="/some/token/file")
 
         assert "permission" in str(exc.value) or "forbidden" in str(exc.value)
+
+def test_verbose_redacts_authorization(monkeypatch, caplog):
+    caplog.set_level(logging.DEBUG, logger='databusclient')
+    vault_host = next(iter(VAULT_REQUIRED_HOSTS))
+    url = f"https://{vault_host}/protected/file.ttl"
+
+    resp_head = make_response(status=200, headers={})
+    resp_401 = make_response(status=401, headers={"WWW-Authenticate": "Bearer realm=\"auth\""})
+    resp_200 = make_response(status=200, headers={"content-length": "0"}, content=b"")
+
+    get_side_effects = [resp_401, resp_200]
+
+    post_resp_1 = Mock()
+    post_resp_1.json.return_value = {"access_token": "ACCESS"}
+    post_resp_2 = Mock()
+    post_resp_2.json.return_value = {"access_token": "VAULT"}
+
+    with patch("requests.head", return_value=resp_head), patch(
+        "requests.get", side_effect=get_side_effects
+    ), patch("requests.post", side_effect=[post_resp_1, post_resp_2]):
+        monkeypatch.setenv("REFRESH_TOKEN", "x" * 90)
+
+        # run download with verbose enabled
+        dl._download_file(url, localDir='.', vault_token_file="/does/not/matter", verbose=True)
+        assert "[HTTP] HEAD" in caplog.text or "[HTTP] GET" in caplog.text
+        assert "REDACTED" in caplog.text
+        # Ensure token values are not directly printed
+        assert "ACCESS" not in caplog.text
+        assert "VAULT" not in caplog.text
diff --git a/tests/test_utils_verbose.py b/tests/test_utils_verbose.py
@@ -0,0 +1,76 @@
+from unittest.mock import Mock, patch
+
+import databusclient.api.utils as utils
+import databusclient.api.download as dl
+
+import requests
+import logging
+
+
+
+
+def make_response(status=200, headers=None, text=''):
+    headers = headers or {}
+    mock = Mock()
+    mock.status_code = status
+    mock.headers = headers
+    mock.text = text
+    def raise_for_status():
+        if mock.status_code >= 400:
+            raise requests.exceptions.HTTPError()
+    mock.raise_for_status = raise_for_status
+    return mock
+
+
+def test_fetch_databus_jsonld_verbose_redacts_api_key(caplog):
+    caplog.set_level(logging.DEBUG, logger='databusclient')
+    url = "https://databus.example/resource"
+    resp = make_response(status=200, headers={"content-type": "application/ld+json"}, text='{}')
+    with patch("databusclient.api.utils.requests.get", return_value=resp):
+        txt = utils.fetch_databus_jsonld(url, databus_key="SECRET", verbose=True)
+        assert "[HTTP] GET" in caplog.text
+        assert "REDACTED" in caplog.text
+        assert "SECRET" not in caplog.text
+        assert txt == '{}'
+
+
+
+def test_get_sparql_query_of_collection_verbose(caplog):
+    caplog.set_level(logging.DEBUG, logger='databusclient')
+    url = "https://databus.example/collections/col"
+    resp = make_response(status=200, headers={"content-type": "text/sparql"}, text='SELECT *')
+    with patch("databusclient.api.download.requests.get", return_value=resp):
+        txt = dl._get_sparql_query_of_collection(url, databus_key="SECRET", verbose=True)
+        assert "[HTTP] GET" in caplog.text
+        assert "REDACTED" in caplog.text
+        assert "SECRET" not in caplog.text
+        assert txt == 'SELECT *'
+
+
+
+def test_query_sparql_endpoint_verbose(caplog):
+    caplog.set_level(logging.DEBUG, logger='databusclient')
+    endpoint = "https://dbpedia.org/sparql"
+    sample = {"results": {"bindings": []}}
+    class MockSPARQL:
+        def __init__(self, url):
+            self.url = url
+            self.method = None
+            self._query = None
+            self._headers = None
+        def setQuery(self, q):
+            self._query = q
+        def setReturnFormat(self, fmt):
+            pass
+        def setCustomHttpHeaders(self, headers):
+            self._headers = headers
+        def query(self):
+            mock = Mock()
+            mock.convert.return_value = sample
+            return mock
+    with patch("databusclient.api.download.SPARQLWrapper", new=MockSPARQL):
+        res = dl._query_sparql_endpoint(endpoint, "SELECT ?s WHERE { ?s ?p ?o }", databus_key="SECRET", verbose=True)
+        assert "[HTTP] POST" in caplog.text
+        assert "REDACTED" in caplog.text
+        assert "SECRET" not in caplog.text
+        assert res == sample