zoukankan      html  css  js  c++  java
  • CentOS 7 install slurm cluster

    //slurm install
    //CentOS 7 system
    //192.168.159.141 node01
    //192.168.159.142 node02
    systemctl stop firewalld.service
    systemctl disable firewalld.service
    systemctl disable NetworkManager
    systemctl stop NetworkManager
    sed -i '7 s/enforcing/disabled/' /etc/sysconfig/selinux
    getenforce #is disabled--->>continue or reboot
    setenforce 0
    yum -y update
    yum -y remove mariadb-server mariadb-devel
    cat /etc/passwd | grep slurm
    userdel -r slurm
    userdel -r munge
    yum -y install mariadb-server mariadb-devel
    export MUNGEUSER=1216
    groupadd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge -s /sbin/nologin munge
    export SLURMUSER=1217
    groupadd -g $SLURMUSER slurm
    useradd -m -c "SLURM workload manger" -d /var/lib/slurm -u $SLURMUSER -g slurm -s /bin/bash slurm
    yum -y install epel-release
    yum -y install munge munge-libs munge-devel
    yum -y install rng-tools #node01
    rngd -r /dev/urandom #node01
    /usr/sbin/create-munge-key -r #node01
    dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key #node01
    chown munge: /etc/munge/munge.key #node01
    chmod 400 /etc/munge/munge.key #node01
    scp /etc/munge/munge.key root@node02:/etc/munge #node01
    chown -R munge: /etc/munge/ /var/log/munge/
    chmod 0700 /etc/munge/ /var/log/munge/
    munge -n
    munge -n | unmunge
    munge -n | ssh node01/02 unmunge
    remunge
    yum -y install openssl openssl-devel pam-devel numactl numactl-devel hwloc hwloc-devel lua lua-devel readline-devel rrdtool-devel ncurses-devel man2html libmad libibumad
    mkdir -p /nfs/slurm-rpms
    cd /nfs
    ### wget https://www.schedmd.com/downloads.php/slurm-17.02.6.tar.bz2
    yum -y install rpm-build
    rpmbuild -ta slurm-16.08.9.tar.bz2
    cd /root/rpmbuild/RPMS/x86_64
    cp -p *.rpm /nfs/slurm-rpms
    yum -y install --nogpgcheck loclainstall *.rpm
    cd /etc/slurm ; cp -p slurm.conf.example slurm.conf #node01
    vim slurm.conf #node01
    ControlMachine=node01
    ControlAddr=node01
    StatSaveLocation=/var/spool/slurmctld
    SlurmctldLogFile=/var/log/slurmctld.log
    SlurmdLogFile=/var/log/slurmd.log
    NodeName=node01 CPUs=1 State=UNKOWN
    NodeName=node02 CPUs=1 State=UNKOWN
    ParttionName=debug Nodes=node01,node02 Default=YES MaxTime=INFINITE State=UP
    :wq
    mkdir /var/spool/slurmctld
    mkdir slurm: /var/spool/slurmctld
    chmod 755 /var/spool/slutmctld
    touch /var/log/slurmctld.log
    chown slurm: /var/log/slurmctld.log
    touch /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
    chown slurm: /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
    mkdir /var/spool/slurmd
    chown slurm: /var/spool/slurmd
    chmod 755 /var/spool/slurmd
    touch /var/log/slurmd.log
    chown slurm: /var/log/slurmd.log
    slurmd -C
    systemctl stop friewalld.service
    systemctl diable firewalld.service
    yum -y install ntp
    chkconfig ntpd on
    ntpdate pool.ntp.org
    systemctl start ntpd
    systemctl enable slurmd.service
    systemctl start slurmd.service
    systemctl status slurmd.service
    systemctl enable munge.service
    systemctl start munge.service
    systemctl status munge.service
    systemctl enable slurmctld.service #node01
    systemctl start slurmctld.service #node01
    systemctl status slurmctld.service #node01
    scontrol show nodes
    sinfo --Node
     
     -----------------------Have questions to contact me :QQ 1394466404----------------------------
  • 相关阅读:
    控制反转(IOC)/依赖注入(DI)理解
    MySQL常用命令总结
    Java继承与清理
    Java组合与继承生成的类中构造函数的执行顺序
    Java中4种权限的理解
    C# 选项卡控件
    USMART 组件移植到STM32
    c# 规范用户输入控件
    c# 图像呈现控件PictureBox
    C# 制作软件启动界面
  • 原文地址:https://www.cnblogs.com/S--S/p/6562641.html
Copyright © 2011-2022 走看看