From 80670a78ba3a7464b8dd4c3d0f63744e7fc5acd0 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 23 Feb 2026 18:36:55 -0600 Subject: [PATCH 1/3] DX-103340: Update jarbuild --- .github/workflows/jarbuild.yml | 180 ++++++++++++++++++++++++++------- 1 file changed, 143 insertions(+), 37 deletions(-) diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index e93fc4d439..bb0509ebb4 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -16,7 +16,7 @@ # under the License. name: JarBuild -on: +on: workflow_dispatch: inputs: arrow_branch: @@ -44,16 +44,35 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 5 steps: + - name: Trim workflow inputs + run: | + echo "ARROW_BRANCH=$(echo '${{github.event.inputs.arrow_branch}}' | xargs)" >> $GITHUB_ENV + echo "ARROW_REPO=$(echo '${{github.event.inputs.arrow_repo}}' | xargs)" >> $GITHUB_ENV + echo "RELEASE_TAG_NAME=$(echo '${{github.event.inputs.release_tag_name}}' | xargs)" >> $GITHUB_ENV + - name: Print workflow input parameters + run: | + echo "==========================================" + echo "Workflow Input Parameters" + echo "==========================================" + echo "arrow_branch: ${{env.ARROW_BRANCH}}" + echo "arrow_repo: ${{env.ARROW_REPO}}" + echo "release_tag_name: ${{env.RELEASE_TAG_NAME}}" + echo "arrow-java branch: ${{github.ref_name}}" + echo "" + echo "Direct Links:" + echo "----------------------------------------" + echo "Arrow C++ repo/branch: https://github.com/${{env.ARROW_REPO}}/tree/${{env.ARROW_BRANCH}}" + echo "Arrow Java repo/branch: https://github.com/${{github.repository}}/tree/${{github.ref_name}}" + echo "Release tag: https://github.com/${{github.repository}}/releases/tag/${{env.RELEASE_TAG_NAME}}" + echo "==========================================" - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - - name: Set env - run: echo "release_tag_name=$(echo $release_tag_name)" >> $GITHUB_ENV - name: Prepare for tag run: | - echo "${{github.event.inputs.release_tag_name}}" - ver=$(echo ${{github.event.inputs.release_tag_name}}) + echo "${{env.RELEASE_TAG_NAME}}" + ver=$(echo ${{env.RELEASE_TAG_NAME}}) version=${ver%-rc*} version=${version#v} rc=${ver#*-rc} @@ -81,7 +100,7 @@ jobs: jni-linux: name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} runs-on: ${{ matrix.platform.runs_on }} - timeout-minutes: 120 + timeout-minutes: 240 needs: - source strategy: @@ -158,17 +177,16 @@ jobs: jni-macos: name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} runs-on: ${{ matrix.platform.runs_on }} - timeout-minutes: 45 + timeout-minutes: 445 needs: - source strategy: fail-fast: false matrix: platform: - - { runs_on: macos-13, arch: "x86_64"} - - { runs_on: macos-14, arch: "aarch_64" } + - { runs_on: macos-15, arch: "aarch_64" } env: - MACOSX_DEPLOYMENT_TARGET: "14.0" + MACOSX_DEPLOYMENT_TARGET: "15.0" steps: - name: Download source archive uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -205,20 +223,41 @@ jobs: python-version: 3.12 - name: Install Archery run: pip install -e arrow/dev/archery[all] + - name: Checkout vcpkg + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: Microsoft/vcpkg + path: arrow/vcpkg + - name: Install vcpkg + run: | + cd arrow/vcpkg + ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${PWD}/arrow/vcpkg" >> ${GITHUB_ENV} + echo "${PWD}/arrow/vcpkg" >> ${GITHUB_PATH} + - name: Clean up disk space + run: | + echo "=== Free disk space before cleanup ===" + df -h / + + echo "" + echo "=== Removing Xcode simulators ===" + sudo rm -rf /Library/Developer/CoreSimulator/Caches || : + echo "Removed /Library/Developer/CoreSimulator/Caches" + + echo "" + echo "=== Removing user simulator data ===" + rm -rf ~/Library/Developer/CoreSimulator || : + echo "Removed ~/Library/Developer/CoreSimulator" + + echo "" + echo "=== Free disk space after cleanup ===" + df -h / - name: Install dependencies run: | - # We want to use llvm@14 to avoid shared z3 - # dependency. llvm@14 doesn't depend on z3 and llvm depends - # on z3. And Homebrew's z3 provides only shared library. It - # doesn't provides static z3 because z3's CMake doesn't accept - # building both shared and static libraries at once. - # See also: Z3_BUILD_LIBZ3_SHARED in - # https://github.com/Z3Prover/z3/blob/master/README-CMake.md - # - # If llvm is installed, Apache Arrow C++ uses llvm rather than - # llvm@14 because llvm is newer than llvm@14. - brew uninstall llvm || : + echo "=== Free disk space at start of dependency installation ===" + df -h / + echo "" # Ensure updating python@XXX with the "--overwrite" option. # If python@XXX is updated without "--overwrite", it causes # a conflict error. Because Python 3 installed not by @@ -240,7 +279,13 @@ jobs: brew uninstall pkg-config@0.29.2 || : fi + # Install basic build tools via brew (vcpkg needs these) + brew install cmake ninja pkg-config brew bundle --file=arrow/cpp/Brewfile + + # Clean up any existing LLVM installations in favor of vcpkg. + brew uninstall llvm || : + # We want to link aws-sdk-cpp statically but Homebrew's # aws-sdk-cpp provides only shared library. If we have # Homebrew's aws-sdk-cpp, our build mix Homebrew's @@ -259,6 +304,24 @@ jobs: # bundled Protobuf. brew uninstall protobuf + echo "" + echo "=== Free disk space before LLVM build ===" + df -h / + + echo "" + # Use vcpkg to install LLVM. + vcpkg install \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=arrow/ci/vcpkg \ + --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=gandiva-llvm + + echo "" + echo "=== Free disk space after LLVM build ===" + df -h / + + echo "" brew bundle --file=Brewfile - name: Prepare ccache run: | @@ -271,10 +334,18 @@ jobs: restore-keys: jni-macos-${{ matrix.platform.arch }}- - name: Build run: | + echo "=== Free disk space at start of build ===" + df -h / + + echo "" set -e # make brew Java available to CMake export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home ci/scripts/jni_macos_build.sh . arrow build jni + + echo "" + echo "=== Free disk space at end of build ===" + df -h / - name: Compress into single artifact to keep directory structure run: tar -cvzf jni-macos-${{ matrix.platform.arch }}.tar.gz jni/ - name: Upload artifacts @@ -299,7 +370,6 @@ jobs: tar -xf apache-arrow-java-*.tar.gz --strip-components=1 tar -xvzf jni-linux-x86_64.tar.gz tar -xvzf jni-linux-aarch_64.tar.gz - tar -xvzf jni-macos-x86_64.tar.gz tar -xvzf jni-macos-aarch_64.tar.gz - name: Test that shared libraries exist run: | @@ -315,11 +385,6 @@ jobs: test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.so test -f jni/gandiva_jni/aarch_64/libgandiva_jni.so - test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib - test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib - test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib - test -f jni/gandiva_jni/x86_64/libgandiva_jni.dylib - test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib @@ -437,30 +502,71 @@ jobs: permissions: contents: write steps: + - name: Checkout arrow-java repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Checkout Apache Arrow C++ repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: ${{github.event.inputs.arrow_repo}} + ref: ${{github.event.inputs.arrow_branch}} + path: arrow + - name: Get commit IDs + id: commit_ids + run: | + # Get short commit ID for arrow-java + arrow_java_commit=$(git rev-parse --short HEAD) + echo "arrow_java_commit=${arrow_java_commit}" >> $GITHUB_OUTPUT + + # Get short commit ID for arrow + cd arrow + arrow_commit=$(git rev-parse --short HEAD) + echo "arrow_commit=${arrow_commit}" >> $GITHUB_OUTPUT + cd .. + + # Parse version from release tag + ver=$(echo ${{github.event.inputs.release_tag_name}}) + version=${ver%-rc*} + version=${version#v} + rc=${ver#*-rc} + + # Create release name with both commit IDs + release_name="${version}-${arrow_java_commit}-${arrow_commit}" + release_tag="v${release_name}" + echo "release_name=${release_name}" >> $GITHUB_OUTPUT + echo "release_tag=${release_tag}" >> $GITHUB_OUTPUT + echo "version=${version}" >> $GITHUB_OUTPUT + echo "rc=${rc}" >> $GITHUB_OUTPUT + + echo "Arrow Java commit: ${arrow_java_commit}" + echo "Arrow commit: ${arrow_commit}" + echo "Release tag: ${release_tag}" - name: Download release artifacts uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: pattern: release-* path: artifacts + - name: Create and push tag + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git tag -a "${{ steps.commit_ids.outputs.release_tag }}" -m "Release ${{ steps.commit_ids.outputs.release_name }} RC${{ steps.commit_ids.outputs.rc }}" -m "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" + git push origin "${{ steps.commit_ids.outputs.release_tag }}" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload run: | # GH-499: How to create release notes? - echo "${{github.event.inputs.release_tag_name}}" - ver=$(echo ${{github.event.inputs.release_tag_name}}) - version=${ver%-rc*} - version=${version#v} - rc=${ver#*-rc} - gh release create ${{github.event.inputs.release_tag_name}} \ - --generate-notes \ + echo "Creating release: ${{ steps.commit_ids.outputs.release_tag }}" + gh release create "${{ steps.commit_ids.outputs.release_tag }}" \ + -n "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" \ --prerelease \ --repo ${GITHUB_REPOSITORY} \ - --title "Apache Arrow Java ${version} RC${rc}" \ - --verify-tag + --title "Apache Arrow Java ${{ steps.commit_ids.outputs.version }} RC${{ steps.commit_ids.outputs.rc }} (arrow-java: ${{ steps.commit_ids.outputs.arrow_java_commit }}, arrow: ${{ steps.commit_ids.outputs.arrow_commit }})" # GitHub CLI does not respect their own rate limits # https://github.com/cli/cli/issues/9586 for artifact in artifacts/*/*; do sleep 1 - gh release upload ${{github.event.inputs.release_tag_name}} \ + gh release upload "${{ steps.commit_ids.outputs.release_tag }}" \ --repo ${GITHUB_REPOSITORY} \ $artifact done From 091ba528c4227d175c16a9ce057203da2143b2da Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 23 Feb 2026 18:54:52 -0600 Subject: [PATCH 2/3] DX-114802: Support Jira tickets in PR titles --- .github/workflows/dev_pr.js | 69 +++++++++++++------ ci/scripts/jni_manylinux_build.sh | 16 +++++ .../codegen/templates/UnionListWriter.java | 14 ++-- 3 files changed, 72 insertions(+), 27 deletions(-) diff --git a/.github/workflows/dev_pr.js b/.github/workflows/dev_pr.js index 13acc946e1..05c3a0c8dc 100644 --- a/.github/workflows/dev_pr.js +++ b/.github/workflows/dev_pr.js @@ -122,15 +122,20 @@ module.exports = { const title = context.payload.pull_request.title; if (title.startsWith("MINOR: ")) { console.log("PR is a minor PR"); - return {"issue": null}; + return {"issue": null, "type": "minor"}; } - const match = title.match(/^GH-([0-9]+): .*$/); + const match = title.match(/^(GH|DX)-([0-9]+): .*$/); if (match === null) { - core.setFailed("Invalid PR title format. Must either be MINOR: or GH-NNN:"); - return {"issue": null}; + core.setFailed("Invalid PR title format. Must either be MINOR:, GH-NNN:, or DX-NNN:"); + return {"issue": null, "type": null}; } - return {"issue": parseInt(match[1], 10)}; + + const issueType = match[1]; // "GH" or "DX" + const issueNumber = parseInt(match[2], 10); + + console.log(`PR references ${issueType}-${issueNumber}`); + return {"issue": issueNumber, "type": issueType}; }, apply_labels: async function({core, github, context}) { @@ -203,9 +208,28 @@ See [CONTRIBUTING.md](https://github.com/apache/arrow-java/blob/main/CONTRIBUTIN console.log("This is a MINOR PR"); return; } - const expected = `https://github.com/apache/arrow-java/issues/${issue.issue}`; - const query = ` + // Handle Jira tickets (DX-NNN) + if (issue.type === "DX") { + const jiraUrl = `https://dremio.atlassian.net/browse/DX-${issue.issue}`; + console.log(`This PR references Jira ticket: ${jiraUrl}`); + + // Add a comment with the Jira link + const comment_tag = "jira_link_comment"; + const maybe_comment_id = await have_comment(github, context, context.payload.pull_request.number, comment_tag); + const body_text = ` +**Related Jira Ticket:** [DX-${issue.issue}](${jiraUrl})`; + + await upsert_comment(github, maybe_comment_id, body_text, true); + console.log("Added/updated Jira link comment"); + return; + } + + // Handle GitHub issues (GH-NNN) + if (issue.type === "GH") { + const expected = `https://github.com/apache/arrow-java/issues/${issue.issue}`; + + const query = ` query($owner: String!, $name: String!, $number: Int!) { repository(owner: $owner, name: $name) { pullRequest(number: $number) { @@ -220,22 +244,23 @@ query($owner: String!, $name: String!, $number: Int!) { } }`; - const result = await github.graphql(query, { - owner: context.repo.owner, - name: context.repo.repo, - number: context.payload.pull_request.number, - }); - const issues = result.repository.pullRequest.closingIssuesReferences.edges; - console.log(issues); - - for (const link of issues) { - console.log(`PR is linked to ${link.node.number}`); - if (link.node.number === issue.issue) { - console.log(`Found link to ${expected}`); - return; + const result = await github.graphql(query, { + owner: context.repo.owner, + name: context.repo.repo, + number: context.payload.pull_request.number, + }); + const issues = result.repository.pullRequest.closingIssuesReferences.edges; + console.log(issues); + + for (const link of issues) { + console.log(`PR is linked to ${link.node.number}`); + if (link.node.number === issue.issue) { + console.log(`Found link to ${expected}`); + return; + } } + console.log(`Did not find link to ${expected}`); + core.setFailed("Missing link to issue in title"); } - console.log(`Did not find link to ${expected}`); - core.setFailed("Missing link to issue in title"); }, }; diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index a34ec0f420..0c63fc3408 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -25,6 +25,22 @@ set -euo pipefail # shellcheck source=ci/scripts/util_log.sh . "$(dirname "${0}")/util_log.sh" +github_actions_group_begin "Update llvm" + vcpkg install \ + --debug \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=/arrow/ci/vcpkg \ + --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=parquet \ + --x-feature=gandiva \ + --x-feature=s3 +github_actions_group_end + github_actions_group_begin "Prepare arguments" source_dir="$(cd "${1}" && pwd)" arrow_dir="$(cd "${2}" && pwd)" diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 8844f27296..80383254f0 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter { private boolean inStruct = false; private boolean listStarted = false; private String structName; + private ArrowType extensionType; <#if listName == "LargeList" || listName == "LargeListView"> private static final long OFFSET_WIDTH = 8; <#else> @@ -203,12 +204,13 @@ public MapWriter map(String name, boolean keysSorted) { @Override public ExtensionWriter extension(ArrowType arrowType) { + extensionType = arrowType; return this; } + @Override public ExtensionWriter extension(String name, ArrowType arrowType) { - ExtensionWriter extensionWriter = writer.extension(name, arrowType); - return extensionWriter; + return writer.extension(name, arrowType); } <#if listName == "LargeList"> @@ -335,13 +337,15 @@ public void writeNull() { @Override public void writeExtension(Object value) { - writer.writeExtension(value); + writer.writeExtension(value, extensionType); writer.setPosition(writer.idx() + 1); } + @Override - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { - writer.addExtensionTypeWriterFactory(var1); + public void writeExtension(Object value, ArrowType type) { + writeExtension(value); } + public void write(ExtensionHolder var1) { writer.write(var1); writer.setPosition(writer.idx() + 1); From d00c65777739203bc009c020ccc09ed4e3886ce3 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 23 Feb 2026 22:52:25 -0600 Subject: [PATCH 3/3] Fix pre-commit issues: trailing whitespace and shell formatting --- ci/scripts/jni_manylinux_build.sh | 26 +++++++++---------- .../codegen/templates/UnionListWriter.java | 4 +-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index 0c63fc3408..c56d1d5c87 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -26,19 +26,19 @@ set -euo pipefail . "$(dirname "${0}")/util_log.sh" github_actions_group_begin "Update llvm" - vcpkg install \ - --debug \ - --clean-after-build \ - --x-install-root=${VCPKG_ROOT}/installed \ - --x-manifest-root=/arrow/ci/vcpkg \ - --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \ - --x-feature=dev \ - --x-feature=flight \ - --x-feature=gcs \ - --x-feature=json \ - --x-feature=parquet \ - --x-feature=gandiva \ - --x-feature=s3 +vcpkg install \ + --debug \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=/arrow/ci/vcpkg \ + --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=parquet \ + --x-feature=gandiva \ + --x-feature=s3 github_actions_group_end github_actions_group_begin "Prepare arguments" diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 80383254f0..4b54739230 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -340,12 +340,12 @@ public void writeExtension(Object value) { writer.writeExtension(value, extensionType); writer.setPosition(writer.idx() + 1); } - + @Override public void writeExtension(Object value, ArrowType type) { writeExtension(value); } - + public void write(ExtensionHolder var1) { writer.write(var1); writer.setPosition(writer.idx() + 1);