zoukankan      html  css  js  c++  java
  • 零起步的Hadoop实践日记(更改hadoop数据存储位置)

    用的是阿里云主机,发现系统盘只有20G,但是送了一块130G数据盘(要是给我直接一块150G的系统盘就好了,阿里云的说法是,数据系统分开互不干扰)本来打算要升级硬盘,后来启动了130G硬盘并挂载在某目录下(/ad)。需要修改hadoop配置,不需要修改hive配置。下面是CDH4默认给我们配置的 

    (1) /etc/hadoop/conf/hdfs-site.xml 

     <?xml version="1.0"?>
     <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
     
     <configuration>
       <property>
         <name>dfs.replication</name>
         <value>1</value>
       </property>
       <property>
         <name>dfs.safemode.extension</name>
         <value>0</value>
       </property>
       <property>
          <name>dfs.safemode.min.datanodes</name>
          <value>1</value>
       </property>
       <property>
          <name>hadoop.tmp.dir</name>
          <value>/var/lib/hadoop-hdfs/cache/${user.name}</value>
       </property>
       <property>
          <name>dfs.namenode.name.dir</name>
          <value>file:///var/lib/hadoop-hdfs/cache/${user.name}/dfs/name</value>
       </property>
       <property>
          <name>dfs.namenode.checkpoint.dir</name>
          <value>file:///var/lib/hadoop-hdfs/cache/${user.name}/dfs/namesecondary</value>
       </property>
       <property>
          <name>dfs.datanode.data.dir</name>
          <value>file:///var/lib/hadoop-hdfs/cache/${user.name}/dfs/data</value>
       </property>
     </configuration>

    我实际修改了上图红色部分,修改后为:

     <?xml version="1.0"?>
     <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
     
     <configuration>
       <property>
         <name>dfs.replication</name>
         <value>1</value>
       </property>
       <property>
         <name>dfs.safemode.extension</name>
         <value>0</value>
       </property>
       <property>
          <name>dfs.safemode.min.datanodes</name>
          <value>1</value>
       </property>
       <property>
          <name>hadoop.tmp.dir</name>
          <value>/ad/hadoop-hdfs/cache/${user.name}</value>
       </property>
       <property>
          <name>dfs.namenode.name.dir</name>
          <value>file:///ad/hadoop-hdfs/cache/${user.name}/dfs/name</value>
       </property>
       <property>
          <name>dfs.namenode.checkpoint.dir</name>
          <value>file:///ad/hadoop-hdfs/cache/${user.name}/dfs/namesecondary</value>
       </property>
       <property>
          <name>dfs.datanode.data.dir</name>
          <value>file:///ad/hadoop-hdfs/cache/${user.name}/dfs/data</value>
       </property>
     </configuration>

    (2) /etc/hadoop/conf/mapred-site.xml

     <?xml version="1.0"?>
     <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
     
     <configuration>
       <property>
         <name>mapred.job.tracker</name>
         <value>localhost:8021</value>
       </property>
     
       <property>
         <name>mapreduce.framework.name</name>
         <value>yarn</value>
       </property>
     
       <property>
         <name>mapreduce.jobhistory.address</name>
         <value>localhost:10020</value>
       </property>
       <property>
         <name>mapreduce.jobhistory.webapp.address</name>
         <value>localhost:19888</value>
       </property>
     
       <property>
         <description>To set the value of tmp directory for map and reduce tasks.</description>
         <name>mapreduce.task.tmp.dir</name>
         <value>/var/lib/hadoop-mapreduce/cache/${user.name}/tasks</value>
       </property>
     
     </configuration>

    实际修改了上图红色部分,修改后为:

     <?xml version="1.0"?>
     <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
     
     <configuration>
       <property>
         <name>mapred.job.tracker</name>
         <value>localhost:8021</value>
       </property>
     
       <property>
         <name>mapreduce.framework.name</name>
         <value>yarn</value>
       </property>
     
       <property>
         <name>mapreduce.jobhistory.address</name>
         <value>localhost:10020</value>
       </property>
       <property>
         <name>mapreduce.jobhistory.webapp.address</name>
         <value>localhost:19888</value>
       </property>
     
       <property>
         <description>To set the value of tmp directory for map and reduce tasks.</description>
         <name>mapreduce.task.tmp.dir</name>
         <value>/ad/hadoop-mapreduce/cache/${user.name}/tasks</value>
       </property>
     
     </configuration>

    (3) /etc/hadoop/conf/mapred-site.xml

     <?xml version="1.0"?>
     <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
     
     <configuration>
     <property>
     <name>yarn.resourcemanager.resource-tracker.address</name>
     <value>127.0.0.1:8031</value>
     <description>
     host is the hostname of the resource manager and port is the port on which the NodeManagers contact the  Resource Manager.
     </description>
     </property>
     
       <property>
         <name>yarn.nodemanager.aux-services</name>
         <value>mapreduce.shuffle</value>
       </property>
     
       <property>
         <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
         <value>org.apache.hadoop.mapred.ShuffleHandler</value>
       </property>
     
       <property>
         <name>yarn.log-aggregation-enable</name>
         <value>true</value>
       </property>
     
       <property>
         <name>yarn.dispatcher.exit-on-error</name>
         <value>true</value>
       </property>
     
       <property>
         <description>List of directories to store localized files in.</description>
         <name>yarn.nodemanager.local-dirs</name>
         <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
       </property>
     
       <property>
         <description>Where to store container logs.</description>
         <name>yarn.nodemanager.log-dirs</name>
         <value>/var/log/hadoop-yarn/containers</value>
       </property>
     
       <property>
         <description>Where to aggregate logs to.</description>
         <name>yarn.nodemanager.remote-app-log-dir</name>
         <value>/var/log/hadoop-yarn/apps</value>
       </property>
     
       <property>
         <description>Classpath for typical applications.</description>
          <name>yarn.application.classpath</name>
          <value>
             $HADOOP_CONF_DIR,
             $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
             $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
             $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
             $YARN_HOME/*,$YARN_HOME/lib/*
          </value>
       </property>
     </configuration>

    实际修改了上图红色部分,修改后为:

     <?xml version="1.0"?>
     <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
     
     <configuration>
     <property>
     <name>yarn.resourcemanager.resource-tracker.address</name>
     <value>127.0.0.1:8031</value>
     <description>
     host is the hostname of the resource manager and port is the port on which the NodeManagers contact the  Resource Manager.
     </description>
     </property>
     
       <property>
         <name>yarn.nodemanager.aux-services</name>
         <value>mapreduce.shuffle</value>
       </property>
     
       <property>
         <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
         <value>org.apache.hadoop.mapred.ShuffleHandler</value>
       </property>
     
       <property>
         <name>yarn.log-aggregation-enable</name>
         <value>true</value>
       </property>
     
       <property>
         <name>yarn.dispatcher.exit-on-error</name>
         <value>true</value>
       </property>
     
       <property>
         <description>List of directories to store localized files in.</description>
         <name>yarn.nodemanager.local-dirs</name>
         <value>/ad/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
       </property>
     
       <property>
         <description>Where to store container logs.</description>
         <name>yarn.nodemanager.log-dirs</name>
         <value>/var/log/hadoop-yarn/containers</value>
       </property>
     
       <property>
         <description>Where to aggregate logs to.</description>
         <name>yarn.nodemanager.remote-app-log-dir</name>
         <value>/var/log/hadoop-yarn/apps</value>
       </property>
     
       <property>
         <description>Classpath for typical applications.</description>
          <name>yarn.application.classpath</name>
          <value>
             $HADOOP_CONF_DIR,
             $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
             $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
             $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
             $YARN_HOME/*,$YARN_HOME/lib/*
          </value>
       </property>
     </configuration>

    PS:另外分享我重启hadoop和hive的脚本

     # stop hive, yarn and hdfs first
     echo "@@@ stop yarn and hdfs first"
     sudo service hive-metastore stop
     sudo service hive-server stop
     sudo service hadoop-yarn-resourcemanager stop
     sudo service hadoop-yarn-nodemanager stop
     sudo service hadoop-mapreduce-historyserver stop
     for x in `cd /etc/init.d ; ls hadoop-hdfs-*`
     do 
         sudo service $x stop
     done
     
     # clear and format
     echo "@@@ clear and format"
     sudo rm -rf /tmp/*
     sudo rm -rf /ad/hadoop-hdfs/cache/*
     sudo rm -rf /ad/hadoop-yarn/cache/*
     sudo rm -rf /ad/hadoop-mapreduce/cache/*
     sudo -u hdfs hdfs namenode -format
     
     # start hdfs
     echo "@@@ start hdfs"
     for x in `cd /etc/init.d ; ls hadoop-hdfs-*`
     do 
         sudo service $x start
     done
     
     # mkdir
     echo  "@@@ mkdir"
     sudo -u hdfs hadoop fs -rm -r /tmp
     sudo -u hdfs hadoop fs -mkdir /tmp
     sudo -u hdfs hadoop fs -chmod -R 1777 /tmp 
     sudo -u hdfs hadoop fs -mkdir /tmp/hadoop-yarn/staging
     sudo -u hdfs hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging
     sudo -u hdfs hadoop fs -mkdir /tmp/hadoop-yarn/staging/history/done_intermediate
     sudo -u hdfs hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging/history/done_intermediate
     sudo -u hdfs hadoop fs -chown -R mapred:mapred /tmp/hadoop-yarn/staging
     sudo -u hdfs hadoop fs -mkdir /var/log/hadoop-yarn
     sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn
     
     sudo -u hdfs hadoop fs -ls -R /
     
     # start yarn
     echo "@@@ start yarn"
     sudo service hadoop-yarn-resourcemanager start 
     sudo service hadoop-yarn-nodemanager start 
     sudo service hadoop-mapreduce-historyserver start
     
     sudo -u hdfs hadoop fs -mkdir /user/maminghan
     sudo -u hdfs hadoop fs -chown maminghan /user/maminghan
     
     # start hive
    sudo service hive-metastore start sudo service hive-server start sudo -u hdfs hadoop fs -mkdir /user/hive sudo -u hdfs hadoop fs -chown hive /user/hive sudo -u hdfs hadoop fs -mkdir /tmp sudo -u hdfs hadoop fs -chmod 777 /tmp #already exist sudo -u hdfs hadoop fs -chmod o+t /tmp sudo -u hdfs hadoop fs -mkdir /data sudo -u hdfs hadoop fs -chown hdfs /data sudo -u hdfs hadoop fs -chmod 777 /data sudo -u hdfs hadoop fs -chmod o+t /data sudo chown -R hive:hive /ad/hive
  • 相关阅读:
    iOS之NSString类型为什么要用copy修饰
    【转】高频使用的git清单
    【转】把Git Repository建到U盘上去
    python程序的打开运行方式
    python运算符的优先级顺序
    iOS之正则表达式(一)
    iOS之input file调用相册控制器消失跳转到登陆页
    监督学习与无监督学习
    常用的文件打开模式
    编码格式
  • 原文地址:https://www.cnblogs.com/aquastar/p/3607570.html
Copyright © 2011-2022 走看看