Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f32c12c
reimplement broken tests handling for integration jobs
strtgbb Feb 5, 2026
096dbed
fix docker push credentials
strtgbb Feb 5, 2026
62a8c89
add debugging
strtgbb Feb 5, 2026
4d09ecb
more debugging and fixes
strtgbb Feb 6, 2026
a4f6460
fixes
strtgbb Feb 6, 2026
92b89d2
remove flaky check jobs
strtgbb Feb 6, 2026
564978e
more debug
strtgbb Feb 6, 2026
60fa43a
integration fails crossout should be working now
strtgbb Feb 6, 2026
5ff6623
fix Unexpected result status [BROKEN]
strtgbb Feb 6, 2026
58ae3c2
cross out test_dirty_pages_force_purge fail
strtgbb Feb 9, 2026
2ed0970
Merge branch 'antalya-26.1' into rebase-cicd-v26.1.2.11-stable
strtgbb Feb 9, 2026
7d87a85
fix skipping stateless suites
strtgbb Feb 9, 2026
6ba48d4
disable crash reports in programs/server/embedded.xml
strtgbb Feb 9, 2026
2117ccc
fix FinishCIReport
strtgbb Feb 9, 2026
f7037f3
disable crash reports in programs/server/config.yaml.example
strtgbb Feb 9, 2026
622af95
fix integration test time query
strtgbb Feb 9, 2026
c4c283c
increase integration tests session_timeout
strtgbb Feb 9, 2026
3d0d5da
fix rare UnboundLocalError in report
strtgbb Feb 9, 2026
ea64cf7
fix docker image tag again?
strtgbb Feb 10, 2026
a14de4c
increase integration tests session_timeout some more
strtgbb Feb 10, 2026
d685e79
fix
strtgbb Feb 10, 2026
b910367
remove bugfix jobs and make stateless tsan non-blocking
strtgbb Feb 10, 2026
937c931
fix test_acme_tls
strtgbb Feb 10, 2026
28a978f
xfail test_move_after_processing[same_bucket-AzureQueue] on arm
strtgbb Feb 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ def create_workflow_report(
)
except Exception as e:
pr_info_html = e
pr_info = {}

fail_results["job_statuses"] = backfill_skipped_statuses(
fail_results["job_statuses"], pr_number, branch_name, commit_sha
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4650,7 +4650,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/create_workflow_report
with:
workflow_config: ${{ needs.config_workflow.outputs.data.workflow_config }}
workflow_config: ${{ toJson(needs) }}
final: true

SourceUpload:
Expand All @@ -4660,7 +4660,7 @@ jobs:
env:
COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
VERSION: ${{ fromJson(needs.config_workflow.outputs.data).custom_data.version.string }}
VERSION: ${{ fromJson(needs.config_workflow.outputs.data).JOB_KV_DATA.version.string }}
steps:
- name: Check out repository code
uses: Altinity/checkout@19599efdf36c4f3f30eb55d5bb388896faea69f6
Expand Down
338 changes: 73 additions & 265 deletions .github/workflows/pull_request.yml

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions .github/workflows/release_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1278,7 +1278,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/create_workflow_report
with:
workflow_config: ${{ needs.config_workflow.outputs.data.workflow_config }}
workflow_config: ${{ toJson(needs) }}
final: true

SourceUpload:
Expand All @@ -1288,7 +1288,7 @@ jobs:
env:
COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
VERSION: ${{ fromJson(needs.config_workflow.outputs.data).custom_data.version.string }}
VERSION: ${{ fromJson(needs.config_workflow.outputs.data).JOB_KV_DATA.version.string }}
steps:
- name: Check out repository code
uses: Altinity/checkout@19599efdf36c4f3f30eb55d5bb388896faea69f6
Expand Down
2 changes: 1 addition & 1 deletion ci/defs/job_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"./ci/jobs/scripts/docker_in_docker.sh",
],
),
run_in_docker=f"altinityinfra/integration-tests-runner+root+--memory={LIMITED_MEM}+--privileged+--dns-search='.'+--security-opt seccomp=unconfined+--cap-add=SYS_PTRACE+{docker_sock_mount}+--volume=clickhouse_integration_tests_volume:/var/lib/docker+--cgroupns=host",
run_in_docker=f"altinityinfra/integration-tests-runner+root+--memory={LIMITED_MEM}+--privileged+--dns-search='.'+--security-opt seccomp=unconfined+--cap-add=SYS_PTRACE+{docker_sock_mount}+--volume=clickhouse_integration_tests_volume:/var/lib/docker+--cgroupns=host+--env=CLICKHOUSE_TEST_STAT_URL=$CLICKHOUSE_TEST_STAT_URL+--env=CLICKHOUSE_TEST_STAT_LOGIN=$CLICKHOUSE_TEST_STAT_LOGIN+--env=CLICKHOUSE_TEST_STAT_PASSWORD=$CLICKHOUSE_TEST_STAT_PASSWORD",
)

BINARY_DOCKER_COMMAND = (
Expand Down
127 changes: 126 additions & 1 deletion ci/jobs/integration_test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from pathlib import Path
from typing import List, Tuple

import yaml # NOTE (strtgbb): Used for loading broken tests rules
import re

from ci.jobs.scripts.find_tests import Targeting
from ci.jobs.scripts.integration_tests_configs import IMAGES_ENV, get_optimal_test_batch
from ci.praktika.info import Info
Expand All @@ -22,6 +25,106 @@
MAX_MEM_PER_WORKER = 11


def get_broken_tests_rules(broken_tests_file_path: str) -> dict:
if (
not os.path.isfile(broken_tests_file_path)
or os.path.getsize(broken_tests_file_path) == 0
):
raise ValueError(
"There is something wrong with getting broken tests rules: "
f"file '{broken_tests_file_path}' is empty or does not exist."
)

with open(broken_tests_file_path, "r", encoding="utf-8") as broken_tests_file:
broken_tests = yaml.safe_load(broken_tests_file)

compiled_rules = {"exact": {}, "pattern": {}}

for test in broken_tests:
regex = test.get("regex") is True
rule = {
"reason": test["reason"],
}

if test.get("message"):
rule["message"] = re.compile(test["message"]) if regex else test["message"]

if test.get("not_message"):
rule["not_message"] = (
re.compile(test["not_message"]) if regex else test["not_message"]
)
if test.get("check_types"):
rule["check_types"] = test["check_types"]

if regex:
rule["regex"] = True
compiled_rules["pattern"][re.compile(test["name"])] = rule
else:
compiled_rules["exact"][test["name"]] = rule

return compiled_rules


def test_is_known_fail(broken_tests_rules, test_name, test_logs, job_flags):
matching_rules = []

def matches_substring(substring, log, is_regex):
if log is None:
return False
if is_regex:
return bool(substring.search(log))
return substring in log

broken_tests_log = f"{temp_path}/broken_tests_handler.log"

with open(broken_tests_log, "a") as log_file:

log_file.write(f"Checking known broken tests for failed test: {test_name}\n")
log_file.write("Potential matching rules:\n")
exact_rule = broken_tests_rules["exact"].get(test_name)
if exact_rule:
log_file.write(f"{test_name} - {exact_rule}\n")
matching_rules.append(exact_rule)

for name_re, data in broken_tests_rules["pattern"].items():
if name_re.fullmatch(test_name):
log_file.write(f"{name_re} - {data}\n")
matching_rules.append(data)

if not matching_rules:
return False

log_file.write(f"First line of test logs: {test_logs.splitlines()[0]}\n")

for rule_data in matching_rules:
if rule_data.get("check_types") and not any(
ct in job_flags for ct in rule_data["check_types"]
):
log_file.write(
f"Skip rule: Check types didn't match: '{rule_data['check_types']}' not in '{job_flags}'\n"
)
continue # check_types didn't match → skip rule

is_regex = rule_data.get("regex", False)
not_message = rule_data.get("not_message")
if not_message and matches_substring(not_message, test_logs, is_regex):
log_file.write(
f"Skip rule: Not message matched: '{rule_data['not_message']}'\n"
)
continue # not_message matched → skip rule
message = rule_data.get("message")
if message and not matches_substring(message, test_logs, is_regex):
log_file.write(
f"Skip rule: Message didn't match: '{rule_data['message']}'\n"
)
continue

log_file.write(f"Matched rule: {rule_data}\n")
return rule_data["reason"]

return False


def _start_docker_in_docker():
with open("./ci/tmp/docker-in-docker.log", "w") as log_file:
dockerd_proc = subprocess.Popen(
Expand Down Expand Up @@ -387,7 +490,7 @@ def main():

has_error = False
if not is_targeted_check:
session_timeout = 5400
session_timeout = 3600 * 2.5
else:
# For targeted jobs, use a shorter session timeout to keep feedback fast.
# If this timeout is exceeded but all completed tests have passed, the
Expand Down Expand Up @@ -533,6 +636,28 @@ def main():
)
attached_files.append("./ci/tmp/dmesg.log")

broken_tests_rules = get_broken_tests_rules("tests/broken_tests.yaml")
for result in test_results:
if result.status == Result.StatusExtended.FAIL:
try:
known_fail_reason = test_is_known_fail(
broken_tests_rules,
result.name,
result.info,
job_params,
)
except Exception as e:
print(f"Error getting known fail reason for result {result.name}: {e}")
continue
else:
if not known_fail_reason:
continue
result.status = Result.StatusExtended.BROKEN
result.info += f"\nMarked as broken: {known_fail_reason}"

if os.path.exists(f"{temp_path}/broken_tests_handler.log"):
attached_files.append(f"{temp_path}/broken_tests_handler.log")

R = Result.create_from(results=test_results, stopwatch=sw, files=attached_files)

if has_error:
Expand Down
9 changes: 7 additions & 2 deletions ci/jobs/scripts/integration_tests_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,11 @@ def get_tests_execution_time(info: Info, job_options: str) -> dict[str, int]:
assert info.updated_at
start_time_filter = f"parseDateTimeBestEffort('{info.updated_at}')"

if info.pr_number == 0:
branch_filter = f"head_ref = '{info.git_branch}'"
else:
branch_filter = f"base_ref = '{info.base_branch}'"

build = job_options.split(",", 1)[0]

query = f"""
Expand All @@ -936,12 +941,12 @@ def get_tests_execution_time(info: Info, job_options: str) -> dict[str, int]:
SELECT
splitByString('::', test_name)[1] AS file,
median(test_duration_ms) AS test_duration_ms
FROM checks
FROM `gh-data`.checks
WHERE (check_name LIKE 'Integration tests%')
AND (check_name LIKE '%{build}%')
AND (check_start_time >= ({start_time_filter} - toIntervalDay(20)))
AND (check_start_time <= ({start_time_filter} - toIntervalHour(5)))
AND ((head_ref = 'master') AND startsWith(head_repo, 'ClickHouse/'))
AND ({branch_filter})
AND (file != '')
AND (test_status != 'SKIPPED')
AND (test_status != 'FAIL')
Expand Down
2 changes: 1 addition & 1 deletion ci/jobs/scripts/workflow_hooks/filter_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def should_skip_job(job_name):

ci_exclude_tags = _info_cache.get_kv_data("ci_exclude_tags") or []
for tag in ci_exclude_tags:
if tag in job_name:
if tag in job_name.lower():
return True, f"Skipped, job name includes excluded tag '{tag}'"

# NOTE (strtgbb): disabled this feature for now
Expand Down
1 change: 1 addition & 0 deletions ci/praktika/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def create_from(
Result.Status.SKIPPED,
Result.StatusExtended.OK,
Result.StatusExtended.SKIPPED,
Result.StatusExtended.BROKEN,
):
continue
elif result.status in (
Expand Down
4 changes: 2 additions & 2 deletions ci/praktika/yaml_additional_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class AltinityWorkflowTemplates:
if: ${{ !cancelled() }}
uses: ./.github/actions/create_workflow_report
with:
workflow_config: ${{ needs.config_workflow.outputs.data.workflow_config }}
workflow_config: ${{ toJson(needs) }}
final: true
""",
"SourceUpload": r"""
Expand All @@ -132,7 +132,7 @@ class AltinityWorkflowTemplates:
env:
COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
VERSION: ${{ fromJson(needs.config_workflow.outputs.data).custom_data.version.string }}
VERSION: ${{ fromJson(needs.config_workflow.outputs.data).JOB_KV_DATA.version.string }}
steps:
- name: Check out repository code
uses: Altinity/checkout@19599efdf36c4f3f30eb55d5bb388896faea69f6
Expand Down
10 changes: 5 additions & 5 deletions ci/workflows/pull_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"_debug, parallel",
"_binary, parallel",
"_asan, distributed plan, parallel",
"_tsan, parallel",
# "_tsan, parallel",
)
)
]
Expand Down Expand Up @@ -59,10 +59,10 @@
JobConfigs.lightweight_functional_tests_job,
# JobConfigs.stateless_tests_targeted_pr_jobs[0].set_allow_merge_on_failure(), # NOTE (strtgbb): Needs configuration
# JobConfigs.integration_test_targeted_pr_jobs[0].set_allow_merge_on_failure(),
*JobConfigs.stateless_tests_flaky_pr_jobs,
*JobConfigs.integration_test_asan_flaky_pr_jobs,
JobConfigs.bugfix_validation_ft_pr_job,
JobConfigs.bugfix_validation_it_job,
# *JobConfigs.stateless_tests_flaky_pr_jobs,
# *JobConfigs.integration_test_asan_flaky_pr_jobs,
# JobConfigs.bugfix_validation_ft_pr_job,
# JobConfigs.bugfix_validation_it_job,
*[
j.set_dependency(
FUNCTIONAL_TESTS_PARALLEL_BLOCKING_JOB_NAMES
Expand Down
4 changes: 2 additions & 2 deletions programs/server/config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -924,8 +924,8 @@ query_masking_rules:
# response_content: config://http_server_default_response

send_crash_reports:
enabled: true
endpoint: 'https://crash.clickhouse.com/'
enabled: false
endpoint: ''

# Uncomment to disable ClickHouse internal DNS caching.
# disable_internal_dns_cache: 1
6 changes: 3 additions & 3 deletions programs/server/embedded.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
<mlock_executable>true</mlock_executable>

<send_crash_reports>
<enabled>true</enabled>
<send_logical_errors>true</send_logical_errors>
<endpoint>https://crash.clickhouse.com/</endpoint>
<enabled>false</enabled>
<send_logical_errors>false</send_logical_errors>
<endpoint></endpoint>
</send_crash_reports>

<http_options_response>
Expand Down
7 changes: 7 additions & 0 deletions tests/broken_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@
reason: 'INVESTIGATE: Out of Memory or startup instability'
- name: test_s3_cache_locality/test.py::test_cache_locality[0]
reason: 'INVESTIGATE: Timeout or AssertionError'
- name: test_dirty_pages_force_purge/test.py::test_dirty_pages_force_purge
reason: 'KNOWN: https://github.com/Altinity/ClickHouse/issues/1369'
message: 'RuntimeError: Failed to find peak memory counter'
- name: test_storage_s3_queue/test_0.py::test_move_after_processing[same_bucket-AzureQueue]
reason: 'INVESTIGATE: Fails on ARM'
check_types:
- arm

# Regex rules should be ordered from most specific to least specific.
# regex: true applies to name, message, and not_message fields, but not to reason or check_types fields.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
pebble:
image: ghcr.io/letsencrypt/pebble:latest
image: ghcr.io/letsencrypt/pebble:2.9.0
command: -config test/config/pebble-config.json -strict -dnsserver 10.5.11.3:8053
environment:
- PEBBLE_WFE_NONCEREJECT=0
Expand All @@ -13,12 +13,12 @@ services:
ipv4_address: 10.5.11.2
cpus: 3
challtestsrv:
image: ghcr.io/letsencrypt/pebble-challtestsrv:latest
image: ghcr.io/letsencrypt/pebble-challtestsrv:2.9.0
command: -defaultIPv6 "" -defaultIPv4 10.5.11.3
ports:
- ${LE_PEBBLE_CHALSRV_EXTERNAL_API_PORT:-8055}:${LE_PEBBLE_CHALSRV_INTERNAL_API_PORT:-8055}
stop_grace_period: 5s
networks:
default:
ipv4_address: 10.5.11.3
cpus: 3
cpus: 3
2 changes: 1 addition & 1 deletion tests/integration/test_acme_tls/test_multi_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_coordinated_acme_authorization(started_multi_replica_cluster):
json={'host': 'multi.integration-tests.clickhouse.com', 'addresses': ['10.5.11.12', '10.5.11.13', '10.5.11.14']}
)

for _ in range(60):
for _ in range(120):
time.sleep(1)

checked_nodes = 0
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_acme_tls/test_single_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_acme_authorization(started_single_replica_cluster):
json={'host': 'single.integration-tests.clickhouse.com', 'addresses': ['10.5.11.11']}
)

for _ in range(60):
for _ in range(120):
time.sleep(1)

curl_result = node.exec_in_container(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)
backward = cluster.add_instance(
"backward",
image="clickhouse/clickhouse-server",
image="altinity/clickhouse-server",
tag=CLICKHOUSE_CI_MIN_TESTED_VERSION,
with_installed_binary=True,
)
Expand Down
Loading