k8s部署hadoop

软件发布|下载排行|最新软件

当前位置:首页IT学院IT技术

k8s部署hadoop

Oh宝贝儿   2022-05-28 我要评论

环境+版本
k8s: v1.21.1
hadoop: 3.2.2

dockerfile

FROM openjdk:8-jdk
# 如果要通过ssh连接容器内部,添加自己的公钥(非必须)
ARG SSH_PUB='ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3nTRJ/aVb67l1xMaN36jmIbabU7Hiv/xpZ8bwLVvNO3Bj7kUzYTp7DIbPcHQg4d6EsPC6j91E8zW6CrV2fo2Ai8tDO/rCq9Se/64F3+8oEIiI6E/OfUZfXD1mPbG7M/kcA3VeQP6wxNPhWBbKRisqgUc6VTKhl+hK6LwRTZgeShxSNcey+HZst52wJxjQkNG+7CAEY5bbmBzAlHCSl4Z0RftYTHR3q8LcEg7YLNZasUogX68kBgRrb+jw1pRMNo7o7RI9xliDAGX+E4C3vVZL0IsccKgr90222axsADoEjC9O+Q6uwKjahemOVaau+9sHIwkelcOcCzW5SuAwkezv 805899926@qq.com'
RUN apt-get update;
RUN apt-get install -y openssh-server net-tools vim git;
RUN sed -i -r 's/^\s*UseDNS\s+\w+/#\0/; s/^\s*PasswordAuthentication\s+\w+/#\0/; s/^\s*ClientAliveInterval\s+\w+/#\0/' /etc/ssh/sshd_config;
RUN echo 'UseDNS no \nPermitRootLogin yes \nPasswordAuthentication yes \nClientAliveInterval 30' >> /etc/ssh/sshd_config;
RUN cat /etc/ssh/sshd_config
RUN su root bash -c 'cd;mkdir .ssh;chmod 700 .ssh;echo ${SSH_PUB} > .ssh/authorized_keys;chmod 644 .ssh/authorized_keys'
RUN su root bash -c 'cd;ssh-keygen -t rsa -f ~/.ssh/id_rsa; cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys'
# hadoop
ENV HADOOP_TGZ_URL=https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz
ENV HADOOP_HOME=/opt/hadoop
ENV PATH=$HADOOP_HOME/bin:$PATH
RUN set -ex; \
    mkdir -p $HADOOP_HOME; \
    wget -nv -O $HADOOP_HOME/src.tgz $HADOOP_TGZ_URL; \
    tar -xf $HADOOP_HOME/src.tgz --strip-components=1 -C $HADOOP_HOME; \
    rm $HADOOP_HOME/src.tgz; \
    chown -R root:root $HADOOP_HOME; \
RUN mkdir -p $HADOOP_HOME/hdfs/name/ && mkdir -p $HADOOP_HOME/hdfs/data/
# clean trash file or dir
RUN rm -rf $HADOOP_HOME/share/doc/;
COPY docker-entrypoint.sh /
EXPOSE 22 9870 9000
ENTRYPOINT ["/docker-entrypoint.sh"]

docker-entrypoint.sh

#!/bin/bash
set -e
service ssh start
hdfs_dir=$HADOOP_HOME/hdfs/
if [ $HADOOP_NODE_TYPE = "datanode" ]; then
  echo -e "\033[32m start datanode \033[0m"
  $HADOOP_HOME/bin/hdfs datanode -regular
fi
if [ $HADOOP_NODE_TYPE = "namenode" ]; then
  if [ -z $(ls -A ${hdfs_dir}) ]; then
    echo -e "\033[32m start hdfs namenode format \033[0m"
    $HADOOP_HOME/bin/hdfs namenode -format
  fi
  echo -e "\033[32m start hdfs namenode \033[0m"
  $HADOOP_HOME/bin/hdfs namenode

pod template

apiVersion: v1
kind: ConfigMap
metadata:
  name: hadoop
  namespace: big-data
  labels:
    app: hadoop
data:
  hadoop-env.sh: |
    export HDFS_DATANODE_USER=root
    export HDFS_NAMENODE_USER=root
    export HDFS_SECONDARYNAMENODE_USER=root
    export JAVA_HOME=/usr/local/openjdk-8
    export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
    export HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib/native"
  core-site.xml: |
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl" rel="external nofollow"  rel="external nofollow" ?>
    <configuration>
        <property>
            <name>fs.defaultFS</name>
            <value>hdfs://hadoop-master:9000</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-bind-host</name>
            <value>0.0.0.0</value>
        </property>
    </configuration>
  hdfs-site.xml: |
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl" rel="external nofollow"  rel="external nofollow" ?>
    <configuration>
        <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:///opt/hadoop/hdfs/name</value>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>file:///opt/hadoop/hdfs/data</value>
        </property>
        <property>
            <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
            <value>false</value>
        </property>
        <property>
            <name>dfs.replication</name>
            <value>1</value>
        </property>
    </configuration>
---
# namenode svc
apiVersion: v1
kind: Service
metadata:
  name: hadoop-master
  namespace: big-data
spec:
  selector:
    app: hadoop-namenode
  type: NodePort
  ports:
    - name: rpc
      port: 9000
      targetPort: 9000
    - name: http
      port: 9870
      targetPort: 9870
      nodePort: 9870
# namenode pod
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hadoop-namenode
  namespace: big-data
spec:
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: hadoop-namenode
  template:
    metadata:
      labels:
        app: hadoop-namenode
    spec:
      volumes:
        - name: hadoop-env
          configMap:
            name: hadoop
            items:
              - key: hadoop-env.sh
                path: hadoop-env.sh
        - name: core-site
          configMap:
            name: hadoop
            items:
              - key: core-site.xml
                path: core-site.xml
        - name: hdfs-site
          configMap:
            name: hadoop
            items:
              - key: hdfs-site.xml
                path: hdfs-site.xml
        - name: hadoop-data
          persistentVolumeClaim:
            claimName: data-hadoop-namenode
      containers:
        - name: hadoop
          image: registry:5000/hadoop
          imagePullPolicy: Always
          ports:
            - containerPort: 22
            - containerPort: 9000
            - containerPort: 9870
          volumeMounts:
            - name: hadoop-env
              mountPath: /opt/hadoop/etc/hadoop/hadoop-env.sh
              subPath: hadoop-env.sh
            - name: core-site
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: hdfs-site
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: hadoop-data
              mountPath: /opt/hadoop/hdfs/
              subPath: hdfs
            - name: hadoop-data
              mountPath: /opt/hadoop/logs/
              subPath: logs
          env:
            - name: HADOOP_NODE_TYPE
              value: namenode
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: data-hadoop-namenode
  namespace: big-data
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 256Gi
  storageClassName: "managed-nfs-storage"
# datanode pod
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: hadoop-datanode
  namespace: big-data
spec:
  replicas: 2
  selector:
    matchLabels:
      app: hadoop-datanode
  serviceName: hadoop-datanode
  template:
    metadata:
      labels:
        app: hadoop-datanode
    spec:
      volumes:
        - name: hadoop-env
          configMap:
            name: hadoop
            items:
              - key: hadoop-env.sh
                path: hadoop-env.sh
        - name: core-site
          configMap:
            name: hadoop
            items:
              - key: core-site.xml
                path: core-site.xml
        - name: hdfs-site
          configMap:
            name: hadoop
            items:
              - key: hdfs-site.xml
                path: hdfs-site.xml
      containers:
        - name: hadoop
          image: registry:5000/hadoop
          imagePullPolicy: Always
          ports:
            - containerPort: 22
            - containerPort: 9000
            - containerPort: 9870
          volumeMounts:
            - name: hadoop-env
              mountPath: /opt/hadoop/etc/hadoop/hadoop-env.sh
              subPath: hadoop-env.sh
            - name: core-site
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: hdfs-site
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: data
              mountPath: /opt/hadoop/hdfs/
              subPath: hdfs
            - name: data
              mountPath: /opt/hadoop/logs/
              subPath: logs
          env:
            - name: HADOOP_NODE_TYPE
              value: datanode
  volumeClaimTemplates:
    - metadata:
        name: data
        namespace: big-data
      spec:
        accessModes:
          - ReadWriteMany
        resources:
          requests:
            storage: 256Gi
        storageClassName: "managed-nfs-storage"

Copyright 2022 版权所有 软件发布 访问手机版

声明:所有软件和文章来自软件开发商或者作者 如有异议 请与本站联系 联系我们