zoukankan      html  css  js  c++  java
  • k8s-部署HADOOP-3.2.2(HDFS)

    • 环境+版本
      k8s: v1.21.1
      hadoop: 3.2.2

    dockerfile

    FROM openjdk:8-jdk
    # 如果要通过ssh连接容器内部,添加自己的公钥(非必须)
    ARG SSH_PUB='ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3nTRJ/aVb67l1xMaN36jmIbabU7Hiv/xpZ8bwLVvNO3Bj7kUzYTp7DIbPcHQg4d6EsPC6j91E8zW6CrV2fo2Ai8tDO/rCq9Se/64F3+8oEIiI6E/OfUZfXD1mPbG7M/kcA3VeQP6wxNPhWBbKRisqgUc6VTKhl+hK6LwRTZgeShxSNcey+HZst52wJxjQkNG+7CAEY5bbmBzAlHCSl4Z0RftYTHR3q8LcEg7YLNZasUogX68kBgRrb+jw1pRMNo7o7RI9xliDAGX+E4C3vVZL0IsccKgr90222axsADoEjC9O+Q6uwKjahemOVaau+9sHIwkelcOcCzW5SuAwkezv 805899926@qq.com'
    RUN apt-get update;
    RUN apt-get install -y openssh-server net-tools vim git;
    RUN sed -i -r 's/^s*UseDNSs+w+/#/; s/^s*PasswordAuthentications+w+/#/; s/^s*ClientAliveIntervals+w+/#/' /etc/ssh/sshd_config;
    RUN echo 'UseDNS no 
    PermitRootLogin yes 
    PasswordAuthentication yes 
    ClientAliveInterval 30' >> /etc/ssh/sshd_config;
    RUN cat /etc/ssh/sshd_config
    RUN su root bash -c 'cd;mkdir .ssh;chmod 700 .ssh;echo ${SSH_PUB} > .ssh/authorized_keys;chmod 644 .ssh/authorized_keys'
    RUN su root bash -c 'cd;ssh-keygen -t rsa -f ~/.ssh/id_rsa; cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys'
    
    # hadoop
    ENV HADOOP_TGZ_URL=https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz
    ENV HADOOP_HOME=/opt/hadoop
    ENV PATH=$HADOOP_HOME/bin:$PATH
    RUN set -ex; 
        mkdir -p $HADOOP_HOME; 
        wget -nv -O $HADOOP_HOME/src.tgz $HADOOP_TGZ_URL; 
        tar -xf $HADOOP_HOME/src.tgz --strip-components=1 -C $HADOOP_HOME; 
        rm $HADOOP_HOME/src.tgz; 
        chown -R root:root $HADOOP_HOME; 
    RUN mkdir -p $HADOOP_HOME/hdfs/name/ && mkdir -p $HADOOP_HOME/hdfs/data/
    
    # clean trash file or dir
    RUN rm -rf $HADOOP_HOME/share/doc/;
    
    COPY docker-entrypoint.sh /
    EXPOSE 22 9870 9000
    ENTRYPOINT ["/docker-entrypoint.sh"]
    

    docker-entrypoint.sh

    #!/bin/bash
    set -e
    
    service ssh start
    
    hdfs_dir=$HADOOP_HOME/hdfs/
    
    if [ $HADOOP_NODE_TYPE = "datanode" ]; then
      echo -e "33[32m start datanode 33[0m"
      $HADOOP_HOME/bin/hdfs datanode -regular
    fi
    
    if [ $HADOOP_NODE_TYPE = "namenode" ]; then
      if [ -z $(ls -A ${hdfs_dir}) ]; then
        echo -e "33[32m start hdfs namenode format 33[0m"
        $HADOOP_HOME/bin/hdfs namenode -format
      fi
      echo -e "33[32m start hdfs namenode 33[0m"
      $HADOOP_HOME/bin/hdfs namenode
    fi
    

    pod template

    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: hadoop
      namespace: big-data
      labels:
        app: hadoop
    data:
      hadoop-env.sh: |
        export HDFS_DATANODE_USER=root
        export HDFS_NAMENODE_USER=root
        export HDFS_SECONDARYNAMENODE_USER=root
        export JAVA_HOME=/usr/local/openjdk-8
        export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
        export HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib/native"
      core-site.xml: |
        <?xml version="1.0" encoding="UTF-8"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>fs.defaultFS</name>
                <value>hdfs://hadoop-master:9000</value>
            </property>
            <property>
                <name>dfs.namenode.rpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
        </configuration>
      hdfs-site.xml: |
        <?xml version="1.0" encoding="UTF-8"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>dfs.namenode.name.dir</name>
                <value>file:///opt/hadoop/hdfs/name</value>
            </property>
            <property>
                <name>dfs.datanode.data.dir</name>
                <value>file:///opt/hadoop/hdfs/data</value>
            </property>
            <property>
                <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
                <value>false</value>
            </property>
            <property>
                <name>dfs.replication</name>
                <value>1</value>
            </property>
        </configuration>
    ---
    # namenode svc
    apiVersion: v1
    kind: Service
    metadata:
      name: hadoop-master
      namespace: big-data
    spec:
      selector:
        app: hadoop-namenode
      type: NodePort
      ports:
        - name: rpc
          port: 9000
          targetPort: 9000
        - name: http
          port: 9870
          targetPort: 9870
          nodePort: 9870
    # namenode pod
    ---
    apiVersion: apps/v1
    kind: Deployment
    metadata:
      name: hadoop-namenode
      namespace: big-data
    spec:
      strategy:
        type: Recreate
      selector:
        matchLabels:
          app: hadoop-namenode
      template:
        metadata:
          labels:
            app: hadoop-namenode
        spec:
          volumes:
            - name: hadoop-env
              configMap:
                name: hadoop
                items:
                  - key: hadoop-env.sh
                    path: hadoop-env.sh
            - name: core-site
              configMap:
                name: hadoop
                items:
                  - key: core-site.xml
                    path: core-site.xml
            - name: hdfs-site
              configMap:
                name: hadoop
                items:
                  - key: hdfs-site.xml
                    path: hdfs-site.xml
            - name: hadoop-data
              persistentVolumeClaim:
                claimName: data-hadoop-namenode
          containers:
            - name: hadoop
              image: registry:5000/hadoop
              imagePullPolicy: Always
              ports:
                - containerPort: 22
                - containerPort: 9000
                - containerPort: 9870
              volumeMounts:
                - name: hadoop-env
                  mountPath: /opt/hadoop/etc/hadoop/hadoop-env.sh
                  subPath: hadoop-env.sh
                - name: core-site
                  mountPath: /opt/hadoop/etc/hadoop/core-site.xml
                  subPath: core-site.xml
                - name: hdfs-site
                  mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
                  subPath: hdfs-site.xml
                - name: hadoop-data
                  mountPath: /opt/hadoop/hdfs/
                  subPath: hdfs
                - name: hadoop-data
                  mountPath: /opt/hadoop/logs/
                  subPath: logs
              env:
                - name: HADOOP_NODE_TYPE
                  value: namenode
    ---
    apiVersion: v1
    kind: PersistentVolumeClaim
    metadata:
      name: data-hadoop-namenode
      namespace: big-data
    spec:
      accessModes:
        - ReadWriteMany
      resources:
        requests:
          storage: 256Gi
      storageClassName: "managed-nfs-storage"
    # datanode pod
    ---
    apiVersion: apps/v1
    kind: StatefulSet
    metadata:
      name: hadoop-datanode
      namespace: big-data
    spec:
      replicas: 2
      selector:
        matchLabels:
          app: hadoop-datanode
      serviceName: hadoop-datanode
      template:
        metadata:
          labels:
            app: hadoop-datanode
        spec:
          volumes:
            - name: hadoop-env
              configMap:
                name: hadoop
                items:
                  - key: hadoop-env.sh
                    path: hadoop-env.sh
            - name: core-site
              configMap:
                name: hadoop
                items:
                  - key: core-site.xml
                    path: core-site.xml
            - name: hdfs-site
              configMap:
                name: hadoop
                items:
                  - key: hdfs-site.xml
                    path: hdfs-site.xml
          containers:
            - name: hadoop
              image: registry:5000/hadoop
              imagePullPolicy: Always
              ports:
                - containerPort: 22
                - containerPort: 9000
                - containerPort: 9870
              volumeMounts:
                - name: hadoop-env
                  mountPath: /opt/hadoop/etc/hadoop/hadoop-env.sh
                  subPath: hadoop-env.sh
                - name: core-site
                  mountPath: /opt/hadoop/etc/hadoop/core-site.xml
                  subPath: core-site.xml
                - name: hdfs-site
                  mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
                  subPath: hdfs-site.xml
                - name: data
                  mountPath: /opt/hadoop/hdfs/
                  subPath: hdfs
                - name: data
                  mountPath: /opt/hadoop/logs/
                  subPath: logs
              env:
                - name: HADOOP_NODE_TYPE
                  value: datanode
      volumeClaimTemplates:
        - metadata:
            name: data
            namespace: big-data
          spec:
            accessModes:
              - ReadWriteMany
            resources:
              requests:
                storage: 256Gi
            storageClassName: "managed-nfs-storage"
    
  • 相关阅读:
    【学习总结】测试开发工程师面试指南-软件测试行业分析与职业解析
    【学习总结】测试开发工程师面试指南-汇总
    【JAVA】java中char类型数组用数组名打印结果不是地址值而是数组内容
    Python常见问题合集
    操作系统常见问题合集
    算法题常见问题合集
    个人向常见问题合集
    Linux常见问题合集
    数据结构常见问题合集
    网络常见问题合集
  • 原文地址:https://www.cnblogs.com/chenzhaoyu/p/15141679.html
Copyright © 2011-2022 走看看