diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/HadoopDocker.md b/hadoop-common-project/hadoop-common/src/site/markdown/HadoopDocker.md new file mode 100644 index 0000000000000..c92a4c64de1c9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/HadoopDocker.md @@ -0,0 +1,68 @@ + + +## Hadoop Docker + +### Running from existing setups + +There are special branches for running hadoop in docker. + +The `docker-hadoop-runner*` branches contain scripts that set up base images that can be used for running any Hadoop version. + +* [docker-hadoop-runner-latest](https://github.com/apache/hadoop/tree/docker-hadoop-runner-latest) +* [docker-hadoop-runner-jdk11](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk11) +* [docker-hadoop-runner-jdk8](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk8) +* [docker-hadoop-runner](https://github.com/apache/hadoop/tree/docker-hadoop-runner) + +The `docker-hadoop*` branches can be used for running a specific version. + +* [docker-hadoop-3](https://github.com/apache/hadoop/tree/docker-hadoop-3) + * `hadoop-3.3.6` +* [docker-hadoop-2](https://github.com/apache/hadoop/tree/docker-hadoop-2) + * `hadoop-2.10.2` + +### Running from the source code + +There is a setup under `hadoop-dist` that contains Docker Compose definitions +for running the current version of Hadoop in a multi-node docker environment. + +This is meant for testing code changes locally and debugging. + +The base image used by the Docker setup is built as part of the maven lifecycle. +The distribution files generated while building the project with the `-Pdist` profile enabled, +will be used for running hadoop inside the containers. + +In order to start the docker environment you need to do the following +* Build the project, using the `-Pdist` profile + ```shell + > mvn clean install -Dmaven.javadoc.skip=true -DskipTests -DskipShade -Pdist,src + ``` +* From the project root, navigate under the docker-compose dir under the generated dist directory + ```shell + > cd hadoop-dist/target/hadoop-/compose/hadoop + ``` +* Start the docker environment + ```shell + > docker-compose up -d --scale datanode=3 + ``` +* Connect to a container to execute commands + ```shell + > docker exec -it hadoop_datanode_1 bash + bash-4.2$ hdfs dfs -mkdir /test + ``` + +### Config files + +To add or remove properties from the `core-site.xml`, `hdfs-site.xml`, etc. files used in the docker environment, +simply edit the `config` file before starting the containers. The changes will be persisted in the docker environment. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm index 8153dce5c3f82..ad0698a03eb07 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm @@ -236,3 +236,9 @@ Fully-Distributed Operation --------------------------- For information on setting up fully-distributed, non-trivial clusters see [Cluster Setup](./ClusterSetup.html). + +Hadoop in Docker containers +--------------------------- + +For information on setting up hadoop in docker, using either official releases or the main source code, +check [Hadoop Docker](./HadoopDocker.html). diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index dc92d44010961..2229a89fe7a0e 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -29,6 +29,13 @@ Apache Hadoop Distribution jar + + UTF-8 + true + jdk8 + true + + @@ -151,6 +158,43 @@ + + maven-resources-plugin + + + copy-compose-files + package + + copy-resources + + + ${project.build.directory}/hadoop-${project.version}/compose + + + src/main/compose + true + + + + + + copy-and-filter-dockerfile + package + + copy-resources + + + ${project.build.directory}/hadoop-${project.version} + + + src/main/docker + true + + + + + + @@ -230,6 +274,56 @@ + + docker-build + + + + io.fabric8 + docker-maven-plugin + + + + build + + package + + + + + + ${docker.image} + + + ${project.build.directory}/hadoop-${project.version} + + + + + + + + + + + docker-push + + + + io.fabric8 + docker-maven-plugin + + + + push + + package + + + + + + diff --git a/hadoop-dist/src/main/compose/hadoop/.env b/hadoop-dist/src/main/compose/hadoop/.env new file mode 100644 index 0000000000000..838efcdebf792 --- /dev/null +++ b/hadoop-dist/src/main/compose/hadoop/.env @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HADOOP_IMAGE=apache/hadoop +HADOOP_RUNNER_VERSION=${docker.hadoop-runner.version} +HADOOP_RUNNER_IMAGE=apache/hadoop-runner diff --git a/hadoop-dist/src/main/compose/hadoop/config b/hadoop-dist/src/main/compose/hadoop/config new file mode 100644 index 0000000000000..1fac879f7eaea --- /dev/null +++ b/hadoop-dist/src/main/compose/hadoop/config @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CORE-SITE.XML_fs.default.name=hdfs://namenode +CORE-SITE.XML_fs.defaultFS=hdfs://namenode + +HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020 +HDFS-SITE.XML_dfs.replication=1 + +MAPRED-SITE.XML_mapreduce.framework.name=yarn +MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME +MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME +MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME + +YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager +YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false +YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600 +YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false +YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle + +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=* +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=* +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings= +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false + +LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout +LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender +LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n diff --git a/hadoop-dist/src/main/compose/hadoop/docker-compose.yaml b/hadoop-dist/src/main/compose/hadoop/docker-compose.yaml new file mode 100644 index 0000000000000..f999c39300e7d --- /dev/null +++ b/hadoop-dist/src/main/compose/hadoop/docker-compose.yaml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.8" + +x-common-config: + &common-config + image: ${HADOOP_RUNNER_IMAGE}:${HADOOP_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + env_file: + - ./config + +services: + namenode: + <<: *common-config + hostname: namenode + command: ["hdfs", "namenode"] + ports: + - 9870:9870 + environment: + ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name" + datanode: + <<: *common-config + command: ["hdfs", "datanode"] + resourcemanager: + <<: *common-config + hostname: resourcemanager + command: ["yarn", "resourcemanager"] + ports: + - 8088:8088 + nodemanager: + <<: *common-config + command: ["yarn", "nodemanager"] diff --git a/hadoop-dist/src/main/docker/Dockerfile b/hadoop-dist/src/main/docker/Dockerfile new file mode 100644 index 0000000000000..7ea60ed7f658f --- /dev/null +++ b/hadoop-dist/src/main/docker/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM apache/hadoop-runner:@docker.hadoop-runner.version@ + +COPY . /opt/hadoop + +WORKDIR /opt/hadoop + +USER root + +RUN chown -R hadoop:users /opt/hadoop + +USER hadoop diff --git a/pom.xml b/pom.xml index 1d4fda5067798..5d2775a813c96 100644 --- a/pom.xml +++ b/pom.xml @@ -82,6 +82,8 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x 3.5.0-SNAPSHOT + apache/hadoop:${project.version} + apache.snapshots.https Apache Development Snapshot Repository https://repository.apache.org/content/repositories/snapshots @@ -119,6 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x 1.1.1 3.10.1 2.7.10 + 0.29.0 bash @@ -150,6 +153,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x maven-dependency-plugin ${maven-dependency-plugin.version} + + io.fabric8 + docker-maven-plugin + ${docker-maven-plugin.version} + org.apache.maven.plugins maven-enforcer-plugin @@ -892,5 +900,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + docker-build + + ${user.name}/hadoop:${project.version} + +