zoukankan      html  css  js  c++  java
  • 【Hadoop离线基础总结】oozie任务串联


    • 需求

      执行shell脚本 → 执行MR程序 → 执行hive程序

    • 1.准备工作目录
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works
      mkdir -p sereval-actions
      
    • 2.准备调度文件
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works
      cp hive2/script.q sereval-actions/
      cp shell/hello.sh sereval-actions/
      cp -ra map-reduce/lib sereval-actions/
      
    • 3.开发调度的配置文件
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/sereval-actions
      vim workflow.xml
      
      <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
      <start to="shell-node"/>
      <action name="shell-node">
          <shell xmlns="uri:oozie:shell-action:0.2">
              <job-tracker>${jobTracker}</job-tracker>
              <name-node>${nameNode}</name-node>
              <configuration>
                  <property>
                      <name>mapred.job.queue.name</name>
                      <value>${queueName}</value>
                  </property>
              </configuration>
              <exec>${EXEC}</exec>
              <!-- <argument>my_output=Hello Oozie</argument> -->
              <file>/user/root/oozie_works/sereval-actions/${EXEC}#${EXEC}</file>
      
              <capture-output/>
          </shell>
          <ok to="mr-node"/>
          <error to="mr-node"/>
      </action>
      
      
      
      
      <action name="mr-node">
              <map-reduce>
                  <job-tracker>${jobTracker}</job-tracker>
                  <name-node>${nameNode}</name-node>
                  <prepare>
                      <delete path="${nameNode}/${outputDir}"/>
                  </prepare>
                  <configuration>
                      <property>
                          <name>mapred.job.queue.name</name>
                          <value>${queueName}</value>
                      </property>
      				<!--  
                      <property>
                          <name>mapred.mapper.class</name>
                          <value>org.apache.oozie.example.SampleMapper</value>
                      </property>
                      <property>
                          <name>mapred.reducer.class</name>
                          <value>org.apache.oozie.example.SampleReducer</value>
                      </property>
                      <property>
                          <name>mapred.map.tasks</name>
                          <value>1</value>
                      </property>
                      <property>
                          <name>mapred.input.dir</name>
                          <value>/user/${wf:user()}/${examplesRoot}/input-data/text</value>
                      </property>
                      <property>
                          <name>mapred.output.dir</name>
                          <value>/user/${wf:user()}/${examplesRoot}/output-data/${outputDir}</value>
                      </property>
      				-->
      				
      				   <!-- 开启使用新的API来进行配置 -->
                      <property>
                          <name>mapred.mapper.new-api</name>
                          <value>true</value>
                      </property>
      
                      <property>
                          <name>mapred.reducer.new-api</name>
                          <value>true</value>
                      </property>
      
                      <!-- 指定MR的输出key的类型 -->
                      <property>
                          <name>mapreduce.job.output.key.class</name>
                          <value>org.apache.hadoop.io.Text</value>
                      </property>
      
                      <!-- 指定MR的输出的value的类型-->
                      <property>
                          <name>mapreduce.job.output.value.class</name>
                          <value>org.apache.hadoop.io.IntWritable</value>
                      </property>
      
                      <!-- 指定输入路径 -->
                      <property>
                          <name>mapred.input.dir</name>
                          <value>${nameNode}/${inputdir}</value>
                      </property>
      
                      <!-- 指定输出路径 -->
                      <property>
                          <name>mapred.output.dir</name>
                          <value>${nameNode}/${outputDir}</value>
                      </property>
      
                      <!-- 指定执行的map类 -->
                      <property>
                          <name>mapreduce.job.map.class</name>
                          <value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value>
                      </property>
      
                      <!-- 指定执行的reduce类 -->
                      <property>
                          <name>mapreduce.job.reduce.class</name>
                          <value>org.apache.hadoop.examples.WordCount$IntSumReducer</value>
                      </property>
      				<!--  配置map task的个数 -->
                      <property>
                          <name>mapred.map.tasks</name>
                          <value>1</value>
                      </property>
      
                  </configuration>
              </map-reduce>
              <ok to="hive2-node"/>
              <error to="fail"/>
          </action>
      
      
      
      
      
      
       <action name="hive2-node">
              <hive2 xmlns="uri:oozie:hive2-action:0.1">
                  <job-tracker>${jobTracker}</job-tracker>
                  <name-node>${nameNode}</name-node>
                  <prepare>
                      <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive2"/>
                      <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/>
                  </prepare>
                  <configuration>
                      <property>
                          <name>mapred.job.queue.name</name>
                          <value>${queueName}</value>
                      </property>
                  </configuration>
                  <jdbc-url>${jdbcURL}</jdbc-url>
                  <script>script.q</script>
                  <param>INPUT=/user/${wf:user()}/${examplesRoot}/input-data/table</param>
                  <param>OUTPUT=/user/${wf:user()}/${examplesRoot}/output-data/hive2</param>
              </hive2>
              <ok to="end"/>
              <error to="fail"/>
          </action>
      <decision name="check-output">
          <switch>
              <case to="end">
                  ${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'}
              </case>
              <default to="fail-output"/>
          </switch>
      </decision>
      <kill name="fail">
          <message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
      </kill>
      <kill name="fail-output">
          <message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message>
      </kill>
      <end name="end"/>
      </workflow-app>
      

      开发job.properties配置文件

      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/sereval-actions
      vim  job.properties
      
      nameNode=hdfs://node01:8020
      jobTracker=node01:8032
      queueName=default
      examplesRoot=oozie_works
      EXEC=hello.sh
      outputDir=/oozie/output
      inputdir=/oozie/input
      jdbcURL=jdbc:hive2://node03:10000/default
      oozie.use.system.libpath=true
      # 配置我们文件上传到hdfs的保存路径 实际上就是在hdfs 的/user/root/oozie_works/sereval-actions这个路径下
      oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/sereval-actions/workflow.xml
      
    • 4.上传资源文件夹到hdfs对应路径
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/
      hdfs dfs -put sereval-actions/ /user/root/oozie_works/
      
    • 5.执行调度任务
      cd /export/servers/oozie-4.1.0-cdh5.14.0/
      bin/oozie job -oozie http://node03:11000/oozie -config oozie_works/serveral-actions/job.properties -run
      
  • 相关阅读:
    java将string转化为int Yannis
    vm虚拟机启动报The VMware Authorization Service is not running错误 Yannis
    [org.hibernate.util.JDBCExceptionReporter] Cannot load JDBC driver class 'net. Yannis
    前台页面分页对总页数的判断 Yannis
    事务及其特性 Yannis
    iReport报表的简单函数及部分操作 Yannis
    spring aop与事务配置 Yannis
    大数据的验证和插入数据库 Yannis
    唔哇哈哈,拉霸机
    bindebug放到别的目录后不能看?编译器参数设置一下
  • 原文地址:https://www.cnblogs.com/zzzsw0412/p/12772456.html
Copyright © 2011-2022 走看看