zoukankan      html  css  js  c++  java
  • 【Hadoop离线基础总结】oozie任务串联


    • 需求

      执行shell脚本 → 执行MR程序 → 执行hive程序

    • 1.准备工作目录
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works
      mkdir -p sereval-actions
      
    • 2.准备调度文件
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works
      cp hive2/script.q sereval-actions/
      cp shell/hello.sh sereval-actions/
      cp -ra map-reduce/lib sereval-actions/
      
    • 3.开发调度的配置文件
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/sereval-actions
      vim workflow.xml
      
      <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
      <start to="shell-node"/>
      <action name="shell-node">
          <shell xmlns="uri:oozie:shell-action:0.2">
              <job-tracker>${jobTracker}</job-tracker>
              <name-node>${nameNode}</name-node>
              <configuration>
                  <property>
                      <name>mapred.job.queue.name</name>
                      <value>${queueName}</value>
                  </property>
              </configuration>
              <exec>${EXEC}</exec>
              <!-- <argument>my_output=Hello Oozie</argument> -->
              <file>/user/root/oozie_works/sereval-actions/${EXEC}#${EXEC}</file>
      
              <capture-output/>
          </shell>
          <ok to="mr-node"/>
          <error to="mr-node"/>
      </action>
      
      
      
      
      <action name="mr-node">
              <map-reduce>
                  <job-tracker>${jobTracker}</job-tracker>
                  <name-node>${nameNode}</name-node>
                  <prepare>
                      <delete path="${nameNode}/${outputDir}"/>
                  </prepare>
                  <configuration>
                      <property>
                          <name>mapred.job.queue.name</name>
                          <value>${queueName}</value>
                      </property>
      				<!--  
                      <property>
                          <name>mapred.mapper.class</name>
                          <value>org.apache.oozie.example.SampleMapper</value>
                      </property>
                      <property>
                          <name>mapred.reducer.class</name>
                          <value>org.apache.oozie.example.SampleReducer</value>
                      </property>
                      <property>
                          <name>mapred.map.tasks</name>
                          <value>1</value>
                      </property>
                      <property>
                          <name>mapred.input.dir</name>
                          <value>/user/${wf:user()}/${examplesRoot}/input-data/text</value>
                      </property>
                      <property>
                          <name>mapred.output.dir</name>
                          <value>/user/${wf:user()}/${examplesRoot}/output-data/${outputDir}</value>
                      </property>
      				-->
      				
      				   <!-- 开启使用新的API来进行配置 -->
                      <property>
                          <name>mapred.mapper.new-api</name>
                          <value>true</value>
                      </property>
      
                      <property>
                          <name>mapred.reducer.new-api</name>
                          <value>true</value>
                      </property>
      
                      <!-- 指定MR的输出key的类型 -->
                      <property>
                          <name>mapreduce.job.output.key.class</name>
                          <value>org.apache.hadoop.io.Text</value>
                      </property>
      
                      <!-- 指定MR的输出的value的类型-->
                      <property>
                          <name>mapreduce.job.output.value.class</name>
                          <value>org.apache.hadoop.io.IntWritable</value>
                      </property>
      
                      <!-- 指定输入路径 -->
                      <property>
                          <name>mapred.input.dir</name>
                          <value>${nameNode}/${inputdir}</value>
                      </property>
      
                      <!-- 指定输出路径 -->
                      <property>
                          <name>mapred.output.dir</name>
                          <value>${nameNode}/${outputDir}</value>
                      </property>
      
                      <!-- 指定执行的map类 -->
                      <property>
                          <name>mapreduce.job.map.class</name>
                          <value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value>
                      </property>
      
                      <!-- 指定执行的reduce类 -->
                      <property>
                          <name>mapreduce.job.reduce.class</name>
                          <value>org.apache.hadoop.examples.WordCount$IntSumReducer</value>
                      </property>
      				<!--  配置map task的个数 -->
                      <property>
                          <name>mapred.map.tasks</name>
                          <value>1</value>
                      </property>
      
                  </configuration>
              </map-reduce>
              <ok to="hive2-node"/>
              <error to="fail"/>
          </action>
      
      
      
      
      
      
       <action name="hive2-node">
              <hive2 xmlns="uri:oozie:hive2-action:0.1">
                  <job-tracker>${jobTracker}</job-tracker>
                  <name-node>${nameNode}</name-node>
                  <prepare>
                      <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive2"/>
                      <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/>
                  </prepare>
                  <configuration>
                      <property>
                          <name>mapred.job.queue.name</name>
                          <value>${queueName}</value>
                      </property>
                  </configuration>
                  <jdbc-url>${jdbcURL}</jdbc-url>
                  <script>script.q</script>
                  <param>INPUT=/user/${wf:user()}/${examplesRoot}/input-data/table</param>
                  <param>OUTPUT=/user/${wf:user()}/${examplesRoot}/output-data/hive2</param>
              </hive2>
              <ok to="end"/>
              <error to="fail"/>
          </action>
      <decision name="check-output">
          <switch>
              <case to="end">
                  ${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'}
              </case>
              <default to="fail-output"/>
          </switch>
      </decision>
      <kill name="fail">
          <message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
      </kill>
      <kill name="fail-output">
          <message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message>
      </kill>
      <end name="end"/>
      </workflow-app>
      

      开发job.properties配置文件

      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/sereval-actions
      vim  job.properties
      
      nameNode=hdfs://node01:8020
      jobTracker=node01:8032
      queueName=default
      examplesRoot=oozie_works
      EXEC=hello.sh
      outputDir=/oozie/output
      inputdir=/oozie/input
      jdbcURL=jdbc:hive2://node03:10000/default
      oozie.use.system.libpath=true
      # 配置我们文件上传到hdfs的保存路径 实际上就是在hdfs 的/user/root/oozie_works/sereval-actions这个路径下
      oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/sereval-actions/workflow.xml
      
    • 4.上传资源文件夹到hdfs对应路径
      cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/
      hdfs dfs -put sereval-actions/ /user/root/oozie_works/
      
    • 5.执行调度任务
      cd /export/servers/oozie-4.1.0-cdh5.14.0/
      bin/oozie job -oozie http://node03:11000/oozie -config oozie_works/serveral-actions/job.properties -run
      
  • 相关阅读:
    Unique Binary Search Trees 解答
    Unique Paths II 解答
    Unique Paths 解答
    Maximum Subarray 解答
    Climbing Stairs 解答
    House Robber II 解答
    House Robber 解答
    Valid Palindrome 解答
    Container With Most Water 解答
    Remove Duplicates from Sorted List II 解答
  • 原文地址:https://www.cnblogs.com/zzzsw0412/p/12772456.html
Copyright © 2011-2022 走看看