Skip to content

Commit 551075b

Browse files
TEZ-4682: [Cloud] Tez AM docker image
1 parent f09ba7f commit 551075b

9 files changed

Lines changed: 587 additions & 1 deletion

File tree

tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2429,7 +2429,7 @@ public static void main(String[] args) {
24292429
Objects.requireNonNull(appSubmitTimeStr,
24302430
ApplicationConstants.APP_SUBMIT_TIME_ENV + " is null");
24312431

2432-
Configuration conf = new Configuration();
2432+
Configuration conf = new TezConfiguration();
24332433

24342434
AMExtensions amExtensions = getFrameworkService(conf).getAMExtensions();
24352435
DAGProtos.ConfigurationProto confProto = amExtensions.loadConfigurationProto();

tez-dist/pom.xml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,38 @@
118118
</dependency>
119119
</dependencies>
120120
</profile>
121+
<profile>
122+
<id>docker</id>
123+
<build>
124+
<plugins>
125+
<plugin>
126+
<groupId>org.codehaus.mojo</groupId>
127+
<artifactId>exec-maven-plugin</artifactId>
128+
<executions>
129+
<execution>
130+
<id>build-docker-image</id>
131+
<phase>package</phase>
132+
<goals>
133+
<goal>exec</goal>
134+
</goals>
135+
<configuration>
136+
<executable>/bin/bash</executable>
137+
<arguments>
138+
<argument>${project.basedir}/src/docker/build-docker.sh</argument>
139+
<argument>-hadoop</argument>
140+
<argument>${hadoop.version}</argument>
141+
<argument>-tez</argument>
142+
<argument>${project.version}</argument>
143+
<argument>-repo</argument>
144+
<argument>apache</argument>
145+
</arguments>
146+
</configuration>
147+
</execution>
148+
</executions>
149+
</plugin>
150+
</plugins>
151+
</build>
152+
</profile>
121153
</profiles>
122154

123155
<build>

tez-dist/src/docker/Dockerfile

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
ARG BUILD_ENV=unarchive
19+
20+
# hadolint ignore=DL3006
21+
FROM ubuntu AS unarchive
22+
# hadolint ignore=DL3010
23+
ONBUILD COPY hadoop-*.tar.gz /opt
24+
# hadolint ignore=DL3010
25+
ONBUILD COPY tez-*.tar.gz /opt
26+
27+
# hadolint ignore=DL3006
28+
FROM ${BUILD_ENV} AS env
29+
ARG HADOOP_VERSION
30+
ARG TEZ_VERSION
31+
32+
RUN mkdir -p /opt/hadoop \
33+
&& tar -xzv \
34+
--exclude="hadoop-$HADOOP_VERSION/share/doc" \
35+
--exclude="*/jdiff" \
36+
--exclude="*/sources" \
37+
--exclude="*tests.jar" \
38+
--exclude="*/webapps" \
39+
-f /opt/hadoop-$HADOOP_VERSION.tar.gz \
40+
-C /opt/hadoop --strip-components 1 \
41+
&& mkdir -p /opt/tez \
42+
&& tar -xzv \
43+
-f /opt/tez-$TEZ_VERSION.tar.gz \
44+
-C /opt/tez \
45+
&& rm -rf /opt/hadoop-$HADOOP_VERSION.tar.gz /opt/tez-$TEZ_VERSION.tar.gz
46+
47+
FROM eclipse-temurin:21.0.3_9-jre-ubi9-minimal AS run
48+
49+
ARG UID=1000
50+
ARG HADOOP_VERSION
51+
ARG TEZ_VERSION
52+
53+
# Install dependencies
54+
# hadolint ignore=DL3041
55+
RUN set -ex; \
56+
microdnf update -y; \
57+
microdnf -y install procps gettext findutils; \
58+
microdnf clean all; \
59+
useradd --no-create-home -s /sbin/nologin -c "" --uid $UID tez
60+
61+
# Set necessary environment variables
62+
ENV HADOOP_HOME=/opt/hadoop \
63+
TEZ_HOME=/opt/tez \
64+
TEZ_CONF_DIR=/opt/tez/conf \
65+
HADOOP_CONF_DIR=/opt/tez/conf
66+
67+
ENV TEZ_CLIENT_VERSION=$TEZ_VERSION
68+
69+
ENV PATH=$TEZ_HOME/bin:$HADOOP_HOME/bin:$PATH
70+
71+
COPY --from=env --chown=tez /opt/hadoop $HADOOP_HOME
72+
# UPDATED: Copy from the normalized directory name created in 'env' stage
73+
COPY --from=env --chown=tez /opt/tez $TEZ_HOME
74+
75+
RUN mkdir -p $TEZ_CONF_DIR && chown tez:tez $TEZ_CONF_DIR
76+
77+
COPY --chown=tez entrypoint.sh /
78+
COPY --chown=tez conf $TEZ_CONF_DIR
79+
80+
# Create Extension Point Directory
81+
RUN mkdir -p /opt/tez/plugins && chown tez:tez /opt/tez/plugins && chmod 755 /opt/tez/plugins
82+
83+
RUN chmod +x /entrypoint.sh
84+
85+
USER tez
86+
WORKDIR $TEZ_HOME
87+
88+
# Expose AM ports via -p flag in docker command
89+
# EXPOSE 10001 10002 10003 8042
90+
91+
ENTRYPOINT ["/entrypoint.sh"]

tez-dist/src/docker/README.md

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
-->
18+
19+
# Tez AM Docker
20+
21+
1. Building the docker image:
22+
23+
```bash
24+
mvn clean install -DskipTests -Pdocker,tools
25+
```
26+
27+
2. Install zookeeper in mac by:
28+
29+
```bash
30+
brew install zookeeper
31+
zkServer start
32+
```
33+
34+
3. Running the Tez AM container:
35+
36+
```bash
37+
docker run \
38+
-p 10001:10001 -p 8042:8042 \
39+
--name tez-am \
40+
apache/tez-am:1.0.0-SNAPSHOT
41+
```
42+
43+
4. Debugging the Tez AM container:
44+
Uncomment the JAVA_TOOL_OPTIONS in tez.env and expose 5005 port using -p flag
45+
46+
```bash
47+
docker run --rm \
48+
-p 10001:10001 -p 8042:8042 -p 5005:5005 \
49+
-e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \
50+
--env-file tez.env \
51+
--name tez-am \
52+
apache/tez-am:1.0.0-SNAPSHOT
53+
```
54+
55+
5. To override the tez-site.xml in docker image use:
56+
57+
```bash
58+
docker run --rm \
59+
-p 10001:10001 -p 8042:8042 -p 5005:5005 \
60+
-e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \
61+
--env-file tez.env \
62+
-v "$(pwd)/conf/tez-site.xml:/opt/tez/custom-conf/tez-site.xml" \
63+
--name tez-am \
64+
apache/tez-am:1.0.0-SNAPSHOT
65+
```
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
set -xeou pipefail
21+
22+
HADOOP_VERSION=
23+
TEZ_VERSION=
24+
REPO=
25+
26+
usage() {
27+
cat <<EOF 1>&2
28+
Usage: $0 [-h] [-hadoop <Hadoop version>] [-tez <Tez version>] [-repo <Docker repo>]
29+
Build the Apache Tez AM Docker image
30+
-help Display help
31+
-hadoop Build image with the specified Hadoop version
32+
-tez Build image with the specified Tez version
33+
-repo Docker repository
34+
EOF
35+
}
36+
37+
while [ $# -gt 0 ]; do
38+
case "$1" in
39+
-h)
40+
usage
41+
exit 0
42+
;;
43+
-hadoop)
44+
shift
45+
HADOOP_VERSION=$1
46+
shift
47+
;;
48+
-tez)
49+
shift
50+
TEZ_VERSION=$1
51+
shift
52+
;;
53+
-repo)
54+
shift
55+
REPO=$1
56+
shift
57+
;;
58+
*)
59+
shift
60+
;;
61+
esac
62+
done
63+
64+
SCRIPT_DIR=$(
65+
cd "$(dirname "$0")"
66+
pwd
67+
)
68+
69+
DIST_DIR=${DIST_DIR:-"$SCRIPT_DIR/../.."}
70+
PROJECT_ROOT=${PROJECT_ROOT:-"$SCRIPT_DIR/../../.."}
71+
72+
repo=${REPO:-apache}
73+
WORK_DIR="$(mktemp -d)"
74+
CACHE_DIR="$SCRIPT_DIR/cache"
75+
mkdir -p "$CACHE_DIR"
76+
77+
# Defaults Hadoop and Tez versions from pom.xml if not provided
78+
HADOOP_VERSION=${HADOOP_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=hadoop.version -DforceStdout)}
79+
TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout)}
80+
81+
######################
82+
# HADOOP FETCH LOGIC #
83+
######################
84+
HADOOP_FILE_NAME="hadoop-$HADOOP_VERSION.tar.gz"
85+
HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"}
86+
if [ ! -f "$CACHE_DIR/$HADOOP_FILE_NAME" ]; then
87+
echo "Downloading Hadoop from $HADOOP_URL..."
88+
if ! curl --fail -L "$HADOOP_URL" -o "$CACHE_DIR/$HADOOP_FILE_NAME.tmp"; then
89+
echo "Fail to download Hadoop, exiting...."
90+
exit 1
91+
fi
92+
mv "$CACHE_DIR/$HADOOP_FILE_NAME.tmp" "$CACHE_DIR/$HADOOP_FILE_NAME"
93+
fi
94+
95+
#####################################
96+
# Pick tez tarball from local build #
97+
#####################################
98+
TEZ_FILE_NAME="tez-$TEZ_VERSION.tar.gz"
99+
LOCAL_DIST_PATH="$DIST_DIR/target/$TEZ_FILE_NAME"
100+
101+
if [ -f "$LOCAL_DIST_PATH" ]; then
102+
echo "--> Found local Tez build artifact at: $LOCAL_DIST_PATH"
103+
cp "$LOCAL_DIST_PATH" "$WORK_DIR/"
104+
else
105+
echo "--> Error: Local Tez artifact not found at $LOCAL_DIST_PATH"
106+
echo "--> Please build the project first (e.g., mvn clean install -DskipTests)."
107+
exit 1
108+
fi
109+
110+
# -------------------------------------------------------------------------
111+
# BUILD CONTEXT PREPARATION
112+
# -------------------------------------------------------------------------
113+
cp "$CACHE_DIR/$HADOOP_FILE_NAME" "$WORK_DIR/"
114+
cp -R "$SCRIPT_DIR/conf" "$WORK_DIR/" 2>/dev/null || mkdir -p "$WORK_DIR/conf"
115+
cp "$SCRIPT_DIR/entrypoint.sh" "$WORK_DIR/"
116+
cp "$SCRIPT_DIR/Dockerfile" "$WORK_DIR/"
117+
118+
echo "Building Docker image..."
119+
docker build \
120+
"$WORK_DIR" \
121+
-f "$WORK_DIR/Dockerfile" \
122+
-t "$repo/tez-am:$TEZ_VERSION" \
123+
--build-arg "BUILD_ENV=unarchive" \
124+
--build-arg "HADOOP_VERSION=$HADOOP_VERSION" \
125+
--build-arg "TEZ_VERSION=$TEZ_VERSION"
126+
127+
rm -r "${WORK_DIR}"
128+
echo "Docker image $repo/tez-am:$TEZ_VERSION built successfully."
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
appender.console.type = Console
19+
appender.console.name = console
20+
appender.console.target = SYSTEM_ERR
21+
appender.console.layout.type = PatternLayout
22+
appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
23+
24+
rootLogger.level = INFO
25+
rootLogger.appenderRef.console.ref = console

0 commit comments

Comments
 (0)