From 551075bc420baab15ce3f3df1f4445b4c334cab8 Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Fri, 23 Jan 2026 02:14:34 +0530 Subject: [PATCH 1/3] TEZ-4682: [Cloud] Tez AM docker image --- .../org/apache/tez/dag/app/DAGAppMaster.java | 2 +- tez-dist/pom.xml | 32 ++++ tez-dist/src/docker/Dockerfile | 91 +++++++++++ tez-dist/src/docker/README.md | 65 ++++++++ tez-dist/src/docker/build-docker.sh | 128 +++++++++++++++ tez-dist/src/docker/conf/log4j2.properties | 25 +++ tez-dist/src/docker/conf/tez-site.xml | 61 +++++++ tez-dist/src/docker/entrypoint.sh | 153 ++++++++++++++++++ tez-dist/src/docker/tez.env | 31 ++++ 9 files changed, 587 insertions(+), 1 deletion(-) create mode 100644 tez-dist/src/docker/Dockerfile create mode 100644 tez-dist/src/docker/README.md create mode 100755 tez-dist/src/docker/build-docker.sh create mode 100644 tez-dist/src/docker/conf/log4j2.properties create mode 100644 tez-dist/src/docker/conf/tez-site.xml create mode 100644 tez-dist/src/docker/entrypoint.sh create mode 100644 tez-dist/src/docker/tez.env diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java index a8b76204bd..aff76220e5 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java @@ -2429,7 +2429,7 @@ public static void main(String[] args) { Objects.requireNonNull(appSubmitTimeStr, ApplicationConstants.APP_SUBMIT_TIME_ENV + " is null"); - Configuration conf = new Configuration(); + Configuration conf = new TezConfiguration(); AMExtensions amExtensions = getFrameworkService(conf).getAMExtensions(); DAGProtos.ConfigurationProto confProto = amExtensions.loadConfigurationProto(); diff --git a/tez-dist/pom.xml b/tez-dist/pom.xml index 9777d0c0b9..31dae3a28e 100644 --- a/tez-dist/pom.xml +++ b/tez-dist/pom.xml @@ -118,6 +118,38 @@ + + docker + + + + org.codehaus.mojo + exec-maven-plugin + + + build-docker-image + package + + exec + + + /bin/bash + + ${project.basedir}/src/docker/build-docker.sh + -hadoop + ${hadoop.version} + -tez + ${project.version} + -repo + apache + + + + + + + + diff --git a/tez-dist/src/docker/Dockerfile b/tez-dist/src/docker/Dockerfile new file mode 100644 index 0000000000..680da464ff --- /dev/null +++ b/tez-dist/src/docker/Dockerfile @@ -0,0 +1,91 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARG BUILD_ENV=unarchive + +# hadolint ignore=DL3006 +FROM ubuntu AS unarchive +# hadolint ignore=DL3010 +ONBUILD COPY hadoop-*.tar.gz /opt +# hadolint ignore=DL3010 +ONBUILD COPY tez-*.tar.gz /opt + +# hadolint ignore=DL3006 +FROM ${BUILD_ENV} AS env +ARG HADOOP_VERSION +ARG TEZ_VERSION + +RUN mkdir -p /opt/hadoop \ + && tar -xzv \ + --exclude="hadoop-$HADOOP_VERSION/share/doc" \ + --exclude="*/jdiff" \ + --exclude="*/sources" \ + --exclude="*tests.jar" \ + --exclude="*/webapps" \ + -f /opt/hadoop-$HADOOP_VERSION.tar.gz \ + -C /opt/hadoop --strip-components 1 \ + && mkdir -p /opt/tez \ + && tar -xzv \ + -f /opt/tez-$TEZ_VERSION.tar.gz \ + -C /opt/tez \ + && rm -rf /opt/hadoop-$HADOOP_VERSION.tar.gz /opt/tez-$TEZ_VERSION.tar.gz + +FROM eclipse-temurin:21.0.3_9-jre-ubi9-minimal AS run + +ARG UID=1000 +ARG HADOOP_VERSION +ARG TEZ_VERSION + +# Install dependencies +# hadolint ignore=DL3041 +RUN set -ex; \ + microdnf update -y; \ + microdnf -y install procps gettext findutils; \ + microdnf clean all; \ + useradd --no-create-home -s /sbin/nologin -c "" --uid $UID tez + +# Set necessary environment variables +ENV HADOOP_HOME=/opt/hadoop \ + TEZ_HOME=/opt/tez \ + TEZ_CONF_DIR=/opt/tez/conf \ + HADOOP_CONF_DIR=/opt/tez/conf + +ENV TEZ_CLIENT_VERSION=$TEZ_VERSION + +ENV PATH=$TEZ_HOME/bin:$HADOOP_HOME/bin:$PATH + +COPY --from=env --chown=tez /opt/hadoop $HADOOP_HOME +# UPDATED: Copy from the normalized directory name created in 'env' stage +COPY --from=env --chown=tez /opt/tez $TEZ_HOME + +RUN mkdir -p $TEZ_CONF_DIR && chown tez:tez $TEZ_CONF_DIR + +COPY --chown=tez entrypoint.sh / +COPY --chown=tez conf $TEZ_CONF_DIR + +# Create Extension Point Directory +RUN mkdir -p /opt/tez/plugins && chown tez:tez /opt/tez/plugins && chmod 755 /opt/tez/plugins + +RUN chmod +x /entrypoint.sh + +USER tez +WORKDIR $TEZ_HOME + +# Expose AM ports via -p flag in docker command +# EXPOSE 10001 10002 10003 8042 + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/tez-dist/src/docker/README.md b/tez-dist/src/docker/README.md new file mode 100644 index 0000000000..b055d8b629 --- /dev/null +++ b/tez-dist/src/docker/README.md @@ -0,0 +1,65 @@ + + +# Tez AM Docker + +1. Building the docker image: + + ```bash + mvn clean install -DskipTests -Pdocker,tools + ``` + +2. Install zookeeper in mac by: + + ```bash + brew install zookeeper + zkServer start + ``` + +3. Running the Tez AM container: + + ```bash + docker run \ + -p 10001:10001 -p 8042:8042 \ + --name tez-am \ + apache/tez-am:1.0.0-SNAPSHOT + ``` + +4. Debugging the Tez AM container: +Uncomment the JAVA_TOOL_OPTIONS in tez.env and expose 5005 port using -p flag + + ```bash + docker run --rm \ + -p 10001:10001 -p 8042:8042 -p 5005:5005 \ + -e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \ + --env-file tez.env \ + --name tez-am \ + apache/tez-am:1.0.0-SNAPSHOT + ``` + +5. To override the tez-site.xml in docker image use: + +```bash + docker run --rm \ + -p 10001:10001 -p 8042:8042 -p 5005:5005 \ + -e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \ + --env-file tez.env \ + -v "$(pwd)/conf/tez-site.xml:/opt/tez/custom-conf/tez-site.xml" \ + --name tez-am \ + apache/tez-am:1.0.0-SNAPSHOT + ``` diff --git a/tez-dist/src/docker/build-docker.sh b/tez-dist/src/docker/build-docker.sh new file mode 100755 index 0000000000..fabe94ed77 --- /dev/null +++ b/tez-dist/src/docker/build-docker.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -xeou pipefail + +HADOOP_VERSION= +TEZ_VERSION= +REPO= + +usage() { + cat <&2 +Usage: $0 [-h] [-hadoop ] [-tez ] [-repo ] +Build the Apache Tez AM Docker image +-help Display help +-hadoop Build image with the specified Hadoop version +-tez Build image with the specified Tez version +-repo Docker repository +EOF +} + +while [ $# -gt 0 ]; do + case "$1" in + -h) + usage + exit 0 + ;; + -hadoop) + shift + HADOOP_VERSION=$1 + shift + ;; + -tez) + shift + TEZ_VERSION=$1 + shift + ;; + -repo) + shift + REPO=$1 + shift + ;; + *) + shift + ;; + esac +done + +SCRIPT_DIR=$( + cd "$(dirname "$0")" + pwd +) + +DIST_DIR=${DIST_DIR:-"$SCRIPT_DIR/../.."} +PROJECT_ROOT=${PROJECT_ROOT:-"$SCRIPT_DIR/../../.."} + +repo=${REPO:-apache} +WORK_DIR="$(mktemp -d)" +CACHE_DIR="$SCRIPT_DIR/cache" +mkdir -p "$CACHE_DIR" + +# Defaults Hadoop and Tez versions from pom.xml if not provided +HADOOP_VERSION=${HADOOP_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=hadoop.version -DforceStdout)} +TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout)} + +###################### +# HADOOP FETCH LOGIC # +###################### +HADOOP_FILE_NAME="hadoop-$HADOOP_VERSION.tar.gz" +HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"} +if [ ! -f "$CACHE_DIR/$HADOOP_FILE_NAME" ]; then + echo "Downloading Hadoop from $HADOOP_URL..." + if ! curl --fail -L "$HADOOP_URL" -o "$CACHE_DIR/$HADOOP_FILE_NAME.tmp"; then + echo "Fail to download Hadoop, exiting...." + exit 1 + fi + mv "$CACHE_DIR/$HADOOP_FILE_NAME.tmp" "$CACHE_DIR/$HADOOP_FILE_NAME" +fi + +##################################### +# Pick tez tarball from local build # +##################################### +TEZ_FILE_NAME="tez-$TEZ_VERSION.tar.gz" +LOCAL_DIST_PATH="$DIST_DIR/target/$TEZ_FILE_NAME" + +if [ -f "$LOCAL_DIST_PATH" ]; then + echo "--> Found local Tez build artifact at: $LOCAL_DIST_PATH" + cp "$LOCAL_DIST_PATH" "$WORK_DIR/" +else + echo "--> Error: Local Tez artifact not found at $LOCAL_DIST_PATH" + echo "--> Please build the project first (e.g., mvn clean install -DskipTests)." + exit 1 +fi + +# ------------------------------------------------------------------------- +# BUILD CONTEXT PREPARATION +# ------------------------------------------------------------------------- +cp "$CACHE_DIR/$HADOOP_FILE_NAME" "$WORK_DIR/" +cp -R "$SCRIPT_DIR/conf" "$WORK_DIR/" 2>/dev/null || mkdir -p "$WORK_DIR/conf" +cp "$SCRIPT_DIR/entrypoint.sh" "$WORK_DIR/" +cp "$SCRIPT_DIR/Dockerfile" "$WORK_DIR/" + +echo "Building Docker image..." +docker build \ + "$WORK_DIR" \ + -f "$WORK_DIR/Dockerfile" \ + -t "$repo/tez-am:$TEZ_VERSION" \ + --build-arg "BUILD_ENV=unarchive" \ + --build-arg "HADOOP_VERSION=$HADOOP_VERSION" \ + --build-arg "TEZ_VERSION=$TEZ_VERSION" + +rm -r "${WORK_DIR}" +echo "Docker image $repo/tez-am:$TEZ_VERSION built successfully." diff --git a/tez-dist/src/docker/conf/log4j2.properties b/tez-dist/src/docker/conf/log4j2.properties new file mode 100644 index 0000000000..ddccb1b184 --- /dev/null +++ b/tez-dist/src/docker/conf/log4j2.properties @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n + +rootLogger.level = INFO +rootLogger.appenderRef.console.ref = console diff --git a/tez-dist/src/docker/conf/tez-site.xml b/tez-dist/src/docker/conf/tez-site.xml new file mode 100644 index 0000000000..a38a9ae6d0 --- /dev/null +++ b/tez-dist/src/docker/conf/tez-site.xml @@ -0,0 +1,61 @@ + + + + + + + tez.am.client.am.port-range + 10001-10003 + + + + tez.am.resource.memory.mb + 1024 + + + + tez.framework.mode + STANDALONE_ZOOKEEPER + + + + tez.am.tez-ui.webservice.enable + false + + + + tez.am.zookeeper.quorum + host.docker.internal:2181 + + + + tez.am.log.level + DEBUG + + + + tez.am.mode.session + true + + + + tez.local.mode + true + + + diff --git a/tez-dist/src/docker/entrypoint.sh b/tez-dist/src/docker/entrypoint.sh new file mode 100644 index 0000000000..543c580ff2 --- /dev/null +++ b/tez-dist/src/docker/entrypoint.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -xeou pipefail + +################################################ +# 1. Mocking DAGAppMaster#main() env variables # +################################################ + +: "${CONTAINER_ID:="container_1700000000000_0001_01_000001"}" +: "${USER:="tez"}" +: "${HADOOP_USER_NAME:="tez"}" +: "${NM_HOST:="localhost"}" +: "${NM_PORT:="12345"}" +: "${NM_HTTP_PORT:="8042"}" +: "${LOCAL_DIRS:="/tmp"}" +: "${LOG_DIRS:="/opt/tez/logs"}" +: "${APP_SUBMIT_TIME_ENV:=$(($(date +%s) * 1000))}" +: "${TEZ_AM_EXTERNAL_ID:="tez-session-$(hostname)"}" + +export CONTAINER_ID USER HADOOP_USER_NAME NM_HOST NM_PORT NM_HTTP_PORT \ + LOCAL_DIRS LOG_DIRS APP_SUBMIT_TIME_ENV TEZ_AM_EXTERNAL_ID + +if [[ ! -f "tez-conf.pb" ]]; then + touch "tez-conf.pb" + echo "--> Created dummy tez-conf.pb" +fi + +if [[ ! -f "tez-dag.pb" ]]; then + touch "tez-dag.pb" + echo "--> Created dummy tez-dag.pb" +fi + +mkdir -p "$LOG_DIRS" + +########################## +# CONFIGURATION HANDLING # +########################## + +# Symlink hadoop conf in tez conf dir +if [[ -d "$HADOOP_HOME/etc/hadoop" ]]; then + echo "--> Linking missing Hadoop configs to $TEZ_CONF_DIR..." + for f in "$HADOOP_HOME/etc/hadoop"/*; do + basename=$(basename "$f") + # this check helps in case user wants to provide its custom hfds-site.xml + # or any other configuration file + if [[ ! -e "$TEZ_CONF_DIR/$basename" ]]; then + ln -s "$f" "$TEZ_CONF_DIR/$basename" + fi + done +fi + +########################### +# Custom Config directory # +########################### +if [[ -n "${TEZ_CUSTOM_CONF_DIR:-}" ]] && [[ -d "$TEZ_CUSTOM_CONF_DIR" ]]; then + echo "--> Using custom configuration directory: $TEZ_CUSTOM_CONF_DIR" + find "${TEZ_CUSTOM_CONF_DIR}" -type f -exec \ + ln -sf {} "${TEZ_CONF_DIR}"/ \; + + # Remove template keyword if it exist + if [[ -f "$TEZ_CONF_DIR/tez-site.xml.template" ]]; then + envsubst < "$TEZ_CONF_DIR/tez-site.xml.template" > "$TEZ_CONF_DIR/tez-site.xml" + fi +fi + +############# +# CLASSPATH # +############# + +export HADOOP_USER_CLASSPATH_FIRST=true +# Order is: conf -> plugins -> tez jars -> hadoop jars +CLASSPATH="${TEZ_CONF_DIR}" + +# Custom Plugins +# This allows mounting a volume at /opt/tez/plugins containing aux jars +PLUGIN_DIR="/opt/tez/plugins" +if [[ -d "$PLUGIN_DIR" ]]; then + count=$(find "$PLUGIN_DIR" -maxdepth 1 -name "*.jar" 2>/dev/null | wc -l) + if [ "$count" != "0" ]; then + echo "--> Found $count plugin jars. Prepending to classpath." + CLASSPATH="${CLASSPATH}:${PLUGIN_DIR}/*" + fi +fi + +# Tez Jars +CLASSPATH="${CLASSPATH}:${TEZ_HOME}/*:${TEZ_HOME}/lib/*" + +# Hadoop Jars +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/common/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/common/lib/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/hdfs/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/hdfs/lib/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/yarn/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/yarn/lib/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/mapreduce/*" +CLASSPATH="${CLASSPATH}:${HADOOP_HOME}/share/hadoop/mapreduce/lib/*" + +############# +# Execution # +############# +TEZ_DAG_JAR=$(find "$TEZ_HOME" -maxdepth 1 -name "tez-dag-*.jar" ! -name "*-tests.jar" | head -n 1) + +if [ -z "$TEZ_DAG_JAR" ]; then + echo "Error: Could not find tez-dag-*.jar in $TEZ_HOME" + exit 1 +fi + +echo "--> Starting DAGAppMaster..." +echo "--> HADOOP_CONF_DIR: $HADOOP_CONF_DIR" + +: "${TEZ_AM_HEAP_OPTS:="-Xmx2048m"}" + +# Check for Log4j2 Configuration +LOG4J2_FILE="$TEZ_CONF_DIR/log4j2.properties" +if [[ -f "$LOG4J2_FILE" ]]; then + echo "--> [TEZ-AM] Found Log4j2 configuration: $LOG4J2_FILE" + JAVA_OPTS="${JAVA_OPTS:+$JAVA_OPTS }-Dlog4j.configurationFile=file:$LOG4J2_FILE" +fi + +JAVA_ADD_OPENS=( + "--add-opens=java.base/java.lang=ALL-UNNAMED" + "--add-opens=java.base/java.util=ALL-UNNAMED" + "--add-opens=java.base/java.io=ALL-UNNAMED" +) + +read -r -a JAVA_OPTS_ARR <<< "${JAVA_OPTS:-}" +read -r -a HEAP_OPTS_ARR <<< "${TEZ_AM_HEAP_OPTS}" + +exec java "${HEAP_OPTS_ARR[@]}" "${JAVA_OPTS_ARR[@]}" "${JAVA_ADD_OPENS[@]}" \ + -Duser.name="$HADOOP_USER_NAME" \ + -Djava.library.path="$HADOOP_HOME/lib/native" \ + -Dhadoop.home.dir="$HADOOP_HOME" \ + -Dhadoop.log.dir="$LOG_DIRS" \ + -Dtez.conf.dir="$TEZ_CONF_DIR" \ + -cp "$CLASSPATH" \ + org.apache.tez.dag.app.DAGAppMaster --session \ + "$@" diff --git a/tez-dist/src/docker/tez.env b/tez-dist/src/docker/tez.env new file mode 100644 index 0000000000..ce7d4d278f --- /dev/null +++ b/tez-dist/src/docker/tez.env @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Tez AM Container Environment Configuration + +HADOOP_USER_NAME=tez +USER=tez +CONTAINER_ID=container_1700000000000_0001_01_000001 +NM_HOST=localhost +NM_PORT=12345 +NM_HTTP_PORT=8042 +TEZ_FRAMEWORK_MODE=STANDALONE_ZOOKEEPER +TEZ_AM_ZOOKEEPER_QUORUM=host.docker.internal:2181 +TEZ_AM_LOG_LEVEL=INFO +# TEZ_CUSTOM_CONF_DIR=/opt/tez/custom-conf +# Enable remote debugging on port 5005 +#JAVA_TOOL_OPTIONS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' From 8fe973a54f8ef465805977eed7dc3b03f29de010 Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Sat, 21 Feb 2026 22:05:49 +0530 Subject: [PATCH 2/3] move back to Configuration --- .../java/org/apache/tez/dag/app/DAGAppMaster.java | 2 +- tez-dist/src/docker/conf/tez-site.xml | 14 ++------------ tez-dist/src/docker/entrypoint.sh | 10 ---------- tez-dist/src/docker/tez.env | 2 -- 4 files changed, 3 insertions(+), 25 deletions(-) diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java index aff76220e5..a8b76204bd 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java @@ -2429,7 +2429,7 @@ public static void main(String[] args) { Objects.requireNonNull(appSubmitTimeStr, ApplicationConstants.APP_SUBMIT_TIME_ENV + " is null"); - Configuration conf = new TezConfiguration(); + Configuration conf = new Configuration(); AMExtensions amExtensions = getFrameworkService(conf).getAMExtensions(); DAGProtos.ConfigurationProto confProto = amExtensions.loadConfigurationProto(); diff --git a/tez-dist/src/docker/conf/tez-site.xml b/tez-dist/src/docker/conf/tez-site.xml index a38a9ae6d0..681ecc30b7 100644 --- a/tez-dist/src/docker/conf/tez-site.xml +++ b/tez-dist/src/docker/conf/tez-site.xml @@ -23,16 +23,6 @@ 10001-10003 - - tez.am.resource.memory.mb - 1024 - - - - tez.framework.mode - STANDALONE_ZOOKEEPER - - tez.am.tez-ui.webservice.enable false @@ -49,8 +39,8 @@ - tez.am.mode.session - true + fs.defaultFS + hdfs://host.docker.internal:9000 diff --git a/tez-dist/src/docker/entrypoint.sh b/tez-dist/src/docker/entrypoint.sh index 543c580ff2..06715aba16 100644 --- a/tez-dist/src/docker/entrypoint.sh +++ b/tez-dist/src/docker/entrypoint.sh @@ -36,16 +36,6 @@ set -xeou pipefail export CONTAINER_ID USER HADOOP_USER_NAME NM_HOST NM_PORT NM_HTTP_PORT \ LOCAL_DIRS LOG_DIRS APP_SUBMIT_TIME_ENV TEZ_AM_EXTERNAL_ID -if [[ ! -f "tez-conf.pb" ]]; then - touch "tez-conf.pb" - echo "--> Created dummy tez-conf.pb" -fi - -if [[ ! -f "tez-dag.pb" ]]; then - touch "tez-dag.pb" - echo "--> Created dummy tez-dag.pb" -fi - mkdir -p "$LOG_DIRS" ########################## diff --git a/tez-dist/src/docker/tez.env b/tez-dist/src/docker/tez.env index ce7d4d278f..ed2d208f61 100644 --- a/tez-dist/src/docker/tez.env +++ b/tez-dist/src/docker/tez.env @@ -24,8 +24,6 @@ NM_HOST=localhost NM_PORT=12345 NM_HTTP_PORT=8042 TEZ_FRAMEWORK_MODE=STANDALONE_ZOOKEEPER -TEZ_AM_ZOOKEEPER_QUORUM=host.docker.internal:2181 -TEZ_AM_LOG_LEVEL=INFO # TEZ_CUSTOM_CONF_DIR=/opt/tez/custom-conf # Enable remote debugging on port 5005 #JAVA_TOOL_OPTIONS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' From 0c2a29bea41a23ee6b50b639e5d71ead3f7890c1 Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Wed, 25 Feb 2026 21:22:04 +0530 Subject: [PATCH 3/3] Review Comments --- .gitignore | 1 + tez-dist/pom.xml | 9 +-- tez-dist/src/docker/Dockerfile | 10 ++-- tez-dist/src/docker/README.md | 60 ++++++++++++++----- tez-dist/src/docker/build-docker.sh | 10 ++-- tez-dist/src/docker/conf/tez-site.xml | 2 + .../{entrypoint.sh => tez-am-entrypoint.sh} | 2 +- tez-dist/src/docker/tez.env | 11 ++-- 8 files changed, 69 insertions(+), 36 deletions(-) rename tez-dist/src/docker/{entrypoint.sh => tez-am-entrypoint.sh} (99%) diff --git a/.gitignore b/.gitignore index 85d660672c..e0df115492 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ .settings pom.xml.versionsBackup target +tez-dist/src/docker/cache/ diff --git a/tez-dist/pom.xml b/tez-dist/pom.xml index 31dae3a28e..5271970279 100644 --- a/tez-dist/pom.xml +++ b/tez-dist/pom.xml @@ -136,12 +136,9 @@ /bin/bash ${project.basedir}/src/docker/build-docker.sh - -hadoop - ${hadoop.version} - -tez - ${project.version} - -repo - apache + -hadoop ${hadoop.version} + -tez ${project.version} + -repo apache diff --git a/tez-dist/src/docker/Dockerfile b/tez-dist/src/docker/Dockerfile index 680da464ff..0d40c33253 100644 --- a/tez-dist/src/docker/Dockerfile +++ b/tez-dist/src/docker/Dockerfile @@ -44,7 +44,7 @@ RUN mkdir -p /opt/hadoop \ -C /opt/tez \ && rm -rf /opt/hadoop-$HADOOP_VERSION.tar.gz /opt/tez-$TEZ_VERSION.tar.gz -FROM eclipse-temurin:21.0.3_9-jre-ubi9-minimal AS run +FROM eclipse-temurin:21-jdk-ubi9-minimal AS run ARG UID=1000 ARG HADOOP_VERSION @@ -54,7 +54,7 @@ ARG TEZ_VERSION # hadolint ignore=DL3041 RUN set -ex; \ microdnf update -y; \ - microdnf -y install procps gettext findutils; \ + microdnf -y install procps gettext findutils hostname; \ microdnf clean all; \ useradd --no-create-home -s /sbin/nologin -c "" --uid $UID tez @@ -74,13 +74,13 @@ COPY --from=env --chown=tez /opt/tez $TEZ_HOME RUN mkdir -p $TEZ_CONF_DIR && chown tez:tez $TEZ_CONF_DIR -COPY --chown=tez entrypoint.sh / +COPY --chown=tez tez-am-entrypoint.sh / COPY --chown=tez conf $TEZ_CONF_DIR # Create Extension Point Directory RUN mkdir -p /opt/tez/plugins && chown tez:tez /opt/tez/plugins && chmod 755 /opt/tez/plugins -RUN chmod +x /entrypoint.sh +RUN chmod +x /tez-am-entrypoint.sh USER tez WORKDIR $TEZ_HOME @@ -88,4 +88,4 @@ WORKDIR $TEZ_HOME # Expose AM ports via -p flag in docker command # EXPOSE 10001 10002 10003 8042 -ENTRYPOINT ["/entrypoint.sh"] +ENTRYPOINT ["/tez-am-entrypoint.sh"] diff --git a/tez-dist/src/docker/README.md b/tez-dist/src/docker/README.md index b055d8b629..0c15f613b9 100644 --- a/tez-dist/src/docker/README.md +++ b/tez-dist/src/docker/README.md @@ -21,7 +21,7 @@ 1. Building the docker image: ```bash - mvn clean install -DskipTests -Pdocker,tools + mvn clean install -DskipTests -Pdocker ``` 2. Install zookeeper in mac by: @@ -34,32 +34,62 @@ 3. Running the Tez AM container: ```bash - docker run \ - -p 10001:10001 -p 8042:8042 \ + export TEZ_VERSION=1.0.0-SNAPSHOT + + docker run --rm \ + -p 10001:10001 \ + --env-file tez-dist/src/docker/tez.env \ --name tez-am \ - apache/tez-am:1.0.0-SNAPSHOT + --hostname localhost \ + apache/tez-am:$TEZ_VERSION ``` + * `TEZ_VERSION` corresponds to the Maven `${project.version}`. + Set this environment variable in your shell before running the commands. + * Expose ports using the `-p` flag based on the + `tez.am.client.am.port-range` property in `tez-site.xml`. + * The `--hostname` flag configures the container's hostname, allowing + services on the host (e.g., macOS) to connect to it. + * Ensure the `--env-file` flag is included, or at a minimum, pass + `-e TEZ_FRAMEWORK_MODE=STANDALONE_ZOOKEEPER` to the `docker run` command. + 4. Debugging the Tez AM container: -Uncomment the JAVA_TOOL_OPTIONS in tez.env and expose 5005 port using -p flag +Uncomment the `JAVA_TOOL_OPTIONS` in `tez.env` and expose 5005 port using `-p` flag ```bash docker run --rm \ - -p 10001:10001 -p 8042:8042 -p 5005:5005 \ - -e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \ - --env-file tez.env \ + -p 10001:10001 -p 5005:5005 \ + --env-file tez-dist/src/docker/tez.env \ --name tez-am \ - apache/tez-am:1.0.0-SNAPSHOT + --hostname localhost \ + apache/tez-am:$TEZ_VERSION ``` 5. To override the tez-site.xml in docker image use: + * Set the `TEZ_CUSTOM_CONF_DIR` environment variable in `tez.env` + or via the `docker run` command (e.g., `/opt/tez/custom-conf`). + + ```bash + export TEZ_SITE_PATH=$(pwd)/tez-dist/src/docker/conf/tez-site.xml + + docker run --rm \ + -p 10001:10001 \ + --env-file tez-dist/src/docker/tez.env \ + -v "$TEZ_SITE_PATH:/opt/tez/custom-conf/tez-site.xml" \ + --name tez-am \ + --hostname localhost \ + apache/tez-am:$TEZ_VERSION + ``` -```bash +6. To add plugin jars in docker image use: + * The plugin directory path inside the Docker container is fixed at `/opt/tez/plugins`. + + ```bash docker run --rm \ - -p 10001:10001 -p 8042:8042 -p 5005:5005 \ - -e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \ - --env-file tez.env \ - -v "$(pwd)/conf/tez-site.xml:/opt/tez/custom-conf/tez-site.xml" \ + -p 10001:10001 \ + --env-file tez-dist/src/docker/tez.env \ + -v "/path/to/your/local/plugins:/opt/tez/plugins" \ --name tez-am \ - apache/tez-am:1.0.0-SNAPSHOT + --hostname localhost \ + apache/tez-am:$TEZ_VERSION ``` diff --git a/tez-dist/src/docker/build-docker.sh b/tez-dist/src/docker/build-docker.sh index fabe94ed77..3642aa7834 100755 --- a/tez-dist/src/docker/build-docker.sh +++ b/tez-dist/src/docker/build-docker.sh @@ -69,7 +69,7 @@ SCRIPT_DIR=$( DIST_DIR=${DIST_DIR:-"$SCRIPT_DIR/../.."} PROJECT_ROOT=${PROJECT_ROOT:-"$SCRIPT_DIR/../../.."} -repo=${REPO:-apache} +REPO=${REPO:-apache} WORK_DIR="$(mktemp -d)" CACHE_DIR="$SCRIPT_DIR/cache" mkdir -p "$CACHE_DIR" @@ -82,7 +82,7 @@ TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -De # HADOOP FETCH LOGIC # ###################### HADOOP_FILE_NAME="hadoop-$HADOOP_VERSION.tar.gz" -HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"} +HADOOP_URL=${HADOOP_URL:-"https://dlcdn.apache.org/hadoop/common/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"} if [ ! -f "$CACHE_DIR/$HADOOP_FILE_NAME" ]; then echo "Downloading Hadoop from $HADOOP_URL..." if ! curl --fail -L "$HADOOP_URL" -o "$CACHE_DIR/$HADOOP_FILE_NAME.tmp"; then @@ -112,17 +112,17 @@ fi # ------------------------------------------------------------------------- cp "$CACHE_DIR/$HADOOP_FILE_NAME" "$WORK_DIR/" cp -R "$SCRIPT_DIR/conf" "$WORK_DIR/" 2>/dev/null || mkdir -p "$WORK_DIR/conf" -cp "$SCRIPT_DIR/entrypoint.sh" "$WORK_DIR/" +cp "$SCRIPT_DIR/tez-am-entrypoint.sh" "$WORK_DIR/" cp "$SCRIPT_DIR/Dockerfile" "$WORK_DIR/" echo "Building Docker image..." docker build \ "$WORK_DIR" \ -f "$WORK_DIR/Dockerfile" \ - -t "$repo/tez-am:$TEZ_VERSION" \ + -t "$REPO/tez-am:$TEZ_VERSION" \ --build-arg "BUILD_ENV=unarchive" \ --build-arg "HADOOP_VERSION=$HADOOP_VERSION" \ --build-arg "TEZ_VERSION=$TEZ_VERSION" rm -r "${WORK_DIR}" -echo "Docker image $repo/tez-am:$TEZ_VERSION built successfully." +echo "Docker image $REPO/tez-am:$TEZ_VERSION built successfully." diff --git a/tez-dist/src/docker/conf/tez-site.xml b/tez-dist/src/docker/conf/tez-site.xml index 681ecc30b7..b1b2b55caa 100644 --- a/tez-dist/src/docker/conf/tez-site.xml +++ b/tez-dist/src/docker/conf/tez-site.xml @@ -38,10 +38,12 @@ DEBUG + tez.local.mode diff --git a/tez-dist/src/docker/entrypoint.sh b/tez-dist/src/docker/tez-am-entrypoint.sh similarity index 99% rename from tez-dist/src/docker/entrypoint.sh rename to tez-dist/src/docker/tez-am-entrypoint.sh index 06715aba16..e4d96394ad 100644 --- a/tez-dist/src/docker/entrypoint.sh +++ b/tez-dist/src/docker/tez-am-entrypoint.sh @@ -63,7 +63,7 @@ if [[ -n "${TEZ_CUSTOM_CONF_DIR:-}" ]] && [[ -d "$TEZ_CUSTOM_CONF_DIR" ]]; then find "${TEZ_CUSTOM_CONF_DIR}" -type f -exec \ ln -sf {} "${TEZ_CONF_DIR}"/ \; - # Remove template keyword if it exist + # Remove template keyword if it exists if [[ -f "$TEZ_CONF_DIR/tez-site.xml.template" ]]; then envsubst < "$TEZ_CONF_DIR/tez-site.xml.template" > "$TEZ_CONF_DIR/tez-site.xml" fi diff --git a/tez-dist/src/docker/tez.env b/tez-dist/src/docker/tez.env index ed2d208f61..832bb986da 100644 --- a/tez-dist/src/docker/tez.env +++ b/tez-dist/src/docker/tez.env @@ -17,13 +17,16 @@ # Tez AM Container Environment Configuration -HADOOP_USER_NAME=tez -USER=tez CONTAINER_ID=container_1700000000000_0001_01_000001 +USER=tez +HADOOP_USER_NAME=tez NM_HOST=localhost NM_PORT=12345 NM_HTTP_PORT=8042 +LOG_DIRS=/opt/tez/logs TEZ_FRAMEWORK_MODE=STANDALONE_ZOOKEEPER -# TEZ_CUSTOM_CONF_DIR=/opt/tez/custom-conf +TEZ_CUSTOM_CONF_DIR=/opt/tez/custom-conf +# TEZ_AM_HEAP_OPTS configures the maximum heap size (Xmx) for the Tez AM. +TEZ_AM_HEAP_OPTS=-Xmx2048m # Enable remote debugging on port 5005 -#JAVA_TOOL_OPTIONS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' +# JAVA_TOOL_OPTIONS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005'