zoukankan      html  css  js  c++  java
  • oozie捕获标准输出&异常capture-output

    对于普通的java-action或者shell-action 都是支持的只要标准输出是"k1=v1"这中格式的就行:

    现用test.py进行测试:

     1 ##test.py
     2 #! /opt/anaconda3/bin/python
     3 
     4 import re
     5 import os
     6 import sys
     7 import traceback
     8 
     9 if __name__ == '__main__':
    10     try:
    11         print("k1=v1")
    12         
    13         print(aaa) ##这里是个故意设置的错误
    14     except Exception as e:
    15    
    16         print(traceback.format_exc())
    17         exit(0)  ##这个地方要特别注意,当异常退出时capture-output将会失效,所以要想获取异常信息,一定要正常退出,然后在decison节点处理错误退出
    
    
    #workflow.xml
    <workflow-app xmlns="uri:oozie:workflow:0.4" name="adaf4df46a6597914b9ff6cd80eff542c6a">
        <start to="python-node"/>
        <action name="python-node">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>108412.server.bigdata.com.cn:8032</job-tracker>
                <name-node>hdfs://108474.server.bigdata.com.cn:8020</name-node>
                <configuration>
                    <property>
                        <name>oozie.launcher.mapred.job.queue.name</name>
                        <value>ada.oozielauncher</value>
                    </property>
                </configuration>
                <exec>model.py</exec>
                <file>model.py</file>
                <capture-output/>
            </shell>
            <ok to="python-node1"/>
            <error to="fail"/>
        </action>
        <action name="python-node1">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>108412.server.bigdata.com.cn:8032</job-tracker>
                <name-node>hdfs://108474.server.bigdata.com.cn:8020</name-node>
                <configuration>
                    <property>
                        <name>oozie.launcher.mapred.job.queue.name</name>
                        <value>ada.oozielauncher</value>
                    </property>
                </configuration>
                <exec>echo</exec>
                <argument>k1=${wf:actionData("python-node")["k1"]}</argument>
                <capture-output/>
            </shell>
            <ok to="check-output"/>
            <error to="fail"/>
        </action>
        <decision name="check-output">
            <switch>
                <case to="end">
                    ${wf:actionData('python-node1')['k1'] eq 'Hello Oozie'}
                </case>
                <default to="fail"/>
            </switch>
        </decision>
        <kill name="fail">
            <message>Python action failed, error message[${wf:actionData('python-node')['k1']}]</message>
            <!--message>Python action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message-->
        </kill>
        <end name="end"/>
    </workflow-app>
     1 #job.properties
     2 oozie.use.system.libpath=True
     3 security_enabled=False
     4 dryrun=False
     5 jobTracker=108412.server.bigdata.com.cn:8032
     6 nameNode=hdfs://108474.server.bigdata.com.cn:8020
     7 user.name=root
     8 queueName=test
     9 
    10 #sharelib配置不能配置hive,会报错
    11 #oozie.action.sharelib.for.spark=spark,hive  spark-action
    12 #oozie.action.sharelib.for.sqoop=sqoop,hbase
    13 oozie.wf.application.path=${nameNode}/user/lyy/oozie/test

    将以上test.py和workflow.xml放到hdfs的/user/lyy/oozie/test目录下,使用一下命令提交:

    oozie job -oozie http://10.8.4.46:11000/oozie -config job.properties -run

    另外如果代码中有标准输出,但是格式不是"k=v"类型的,则用el函数wf:actionData无法获取,然而capture-output依旧把标准输出的信息捕获了,存储在oozie元数据表oozie.WF_ACTIONS中data字段存放,这个字段是mediumblob类型不能直接查看,可以通过下面restfulAPI获取json格式的数据,如下:

    http://108446.server.bigdata.com.cn:11000/oozie/v1/job/0000106-181129152008300-oozie-oozi-W

    {
        "appPath":"hdfs://108474.server.bigdata.com.cn:8020/user/lyy/oozie/3a0c7d3a2ed5468087d93c69db651f3f",
        "acl":null,
        "status":"KILLED",
        "createdTime":"Mon, 10 Dec 2018 03:50:13 GMT",
        "conf":"<configuration>
    <property>
    <name>user.name</name>
    <value>root</value>
    </property>
    <property>
    <name>oozie.use.system.libpath</name>
    <value>True</value>
    </property>
    <property>
    <name>mapreduce.job.user.name</name>
    <value>root</value>
    </property>
    <property>
    <name>security_enabled</name>
    <value>False</value>
    </property>
    <property>
    <name>queueName</name>
    <value>ada.spark</value>
    </property>
    <property>
    <name>nameNode</name>
    <value>hdfs://108474.server.bigdata.com.cn:8020</value>
    </property>
    <property>
    <name>dryrun</name>
    <value>False</value>
    </property>
    <property>
    <name>jobTracker</name>
    <value>108412.server.bigdata.com.cn:8032</value>
    </property>
    <property>
    <name>oozie.wf.application.path</name>
    <value>hdfs://108474.server.bigdata.com.cn:8020/user/lyy/oozie/3a0c7d3a2ed5468087d93c69db651f3f</value>
    </property>
    </configuration>",
        "lastModTime":"Mon, 10 Dec 2018 03:51:17 GMT",
        "run":0,
        "endTime":"Mon, 10 Dec 2018 03:51:17 GMT",
        "externalId":null,
        "appName":"adaf4df46a6597914b9ff6cd80eff542c6a",
        "id":"0000106-181129152008300-oozie-oozi-W",
        "startTime":"Mon, 10 Dec 2018 03:50:13 GMT",
        "parentId":null,
        "toString":"Workflow id[0000106-181129152008300-oozie-oozi-W] status[KILLED]",
        "group":null,
        "consoleUrl":"http://108446.server.bigdata.com.cn:11000/oozie?job=0000106-181129152008300-oozie-oozi-W",
        "user":"root",
        "actions":[
            {
                "errorMessage":null,
                "status":"OK",
                "stats":null,
                "data":null,
                "transition":"python-node",
                "externalStatus":"OK",
                "cred":"null",
                "conf":"",
                "type":":START:",
                "endTime":"Mon, 10 Dec 2018 03:50:14 GMT",
                "externalId":"-",
                "id":"0000106-181129152008300-oozie-oozi-W@:start:",
                "startTime":"Mon, 10 Dec 2018 03:50:13 GMT",
                "userRetryCount":0,
                "externalChildIDs":null,
                "name":":start:",
                "errorCode":null,
                "trackerUri":"-",
                "retries":0,
                "userRetryInterval":10,
                "toString":"Action name[:start:] status[OK]",
                "consoleUrl":"-",
                "userRetryMax":0
            },
            {
                "errorMessage":null,
                "status":"OK",
                "stats":null,
                "data":"#
    #Mon Dec 10 11:50:24 CST 2018
    File="./model.py", line 12, in <module>
    Traceback=(most recent call last):
    print(aaa)=
    NameError=name 'aaa' is not defined    ####这个就是出错的栈信息
    k1=v1  ##这个是标准输出的信息
    ",
                "transition":"python-node1",
                "externalStatus":"SUCCEEDED",
                "cred":"null",
                "conf":"<shell xmlns="uri:oozie:shell-action:0.2">
    <job-tracker>108412.server.bigdata.com.cn:8032</job-tracker>
    <name-node>hdfs://108474.server.bigdata.com.cn:8020</name-node>
    <configuration>
    <property xmlns="">
    <name>oozie.launcher.mapred.job.queue.name</name>
    <value>ada.oozielauncher</value>
    <source>programatically</source>
    </property>
    </configuration>
    <exec>model.py</exec>
    <file>model.py</file>
    <capture-output />
    </shell>",
                "type":"shell",
                "endTime":"Mon, 10 Dec 2018 03:50:24 GMT",
                "externalId":"job_1542533868365_0510",
                "id":"0000106-181129152008300-oozie-oozi-W@python-node",
                "startTime":"Mon, 10 Dec 2018 03:50:14 GMT",
                "userRetryCount":0,
                "externalChildIDs":null,
                "name":"python-node",
                "errorCode":null,
                "trackerUri":"108412.server.bigdata.com.cn:8032",
                "retries":0,
                "userRetryInterval":10,
                "toString":"Action name[python-node] status[OK]",
                "consoleUrl":"http://108412.server.bigdata.com.cn:8088/proxy/application_1542533868365_0510/",
                "userRetryMax":0
            },
            {
                "errorMessage":null,
                "status":"OK",
                "stats":null,
                "data":"#
    #Mon Dec 10 11:51:16 CST 2018
    k1=v1
    ",
                "transition":"check-output",
                "externalStatus":"SUCCEEDED",
                "cred":"null",
                "conf":"<shell xmlns="uri:oozie:shell-action:0.2">
    <job-tracker>108412.server.bigdata.com.cn:8032</job-tracker>
    <name-node>hdfs://108474.server.bigdata.com.cn:8020</name-node>
    <configuration>
    <property xmlns="">
    <name>oozie.launcher.mapred.job.queue.name</name>
    <value>ada.oozielauncher</value>
    <source>programatically</source>
    </property>
    </configuration>
    <exec>echo</exec>
    <argument>k1=v1</argument>   ##这个就是正常的k1=v1标准输出传递到了python-node1节点了
    <capture-output />
    </shell>",
                "type":"shell",
                "endTime":"Mon, 10 Dec 2018 03:51:16 GMT",
                "externalId":"job_1542533868365_0511",
                "id":"0000106-181129152008300-oozie-oozi-W@python-node1",
                "startTime":"Mon, 10 Dec 2018 03:50:24 GMT",
                "userRetryCount":0,
                "externalChildIDs":null,
                "name":"python-node1",
                "errorCode":null,
                "trackerUri":"108412.server.bigdata.com.cn:8032",
                "retries":0,
                "userRetryInterval":10,
                "toString":"Action name[python-node1] status[OK]",
                "consoleUrl":"http://108412.server.bigdata.com.cn:8088/proxy/application_1542533868365_0511/",
                "userRetryMax":0
            },
            {
                "errorMessage":null,
                "status":"OK",
                "stats":null,
                "data":null,
                "transition":"fail",
                "externalStatus":"fail",
                "cred":"null",
                "conf":"<switch xmlns="uri:oozie:workflow:0.4">
    <case to="end">false</case>
    <default to="fail" />
    </switch>",
                "type":"switch",
                "endTime":"Mon, 10 Dec 2018 03:51:17 GMT",
                "externalId":"-",
                "id":"0000106-181129152008300-oozie-oozi-W@check-output",
                "startTime":"Mon, 10 Dec 2018 03:51:16 GMT",
                "userRetryCount":0,
                "externalChildIDs":null,
                "name":"check-output",
                "errorCode":null,
                "trackerUri":"-",
                "retries":0,
                "userRetryInterval":10,
                "toString":"Action name[check-output] status[OK]",
                "consoleUrl":"-",
                "userRetryMax":0
            },
            {
                "errorMessage":"Python action failed, error message[v1]",
                "status":"OK",
                "stats":null,
                "data":null,
                "transition":null,
                "externalStatus":"OK",
                "cred":"null",
                "conf":"Python action failed, error message[${wf:actionData('python-node')['k1']}]",
                "type":":KILL:",
                "endTime":"Mon, 10 Dec 2018 03:51:17 GMT",
                "externalId":"-",
                "id":"0000106-181129152008300-oozie-oozi-W@fail",
                "startTime":"Mon, 10 Dec 2018 03:51:17 GMT",
                "userRetryCount":0,
                "externalChildIDs":null,
                "name":"fail",
                "errorCode":"E0729",
                "trackerUri":"-",
                "retries":0,
                "userRetryInterval":10,
                "toString":"Action name[fail] status[OK]",
                "consoleUrl":"-",
                "userRetryMax":0
            }
        ]
    }

     spark使用oozie提交的两种方式的workflow.xml:

    #shell-action:
    <workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
        <start to="shell-node"/>
        <action name="shell-node">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <configuration>
                    <property>
                        <name>mapreduce.job.queue.name</name>
                        <value>${queueName}</value>
                    </property>
                </configuration>
                <exec>spark2-submit</exec>
                <argument>--master</argument>
                <argument>yarn</argument>
                <argument>--deploy-mode</argument>
                <argument>cluster</argument>
                <argument>--queue</argument>
                <argument>ada.spark</argument>
                <argument>--name</argument>
                <argument>testYarn</argument>
                <argument>--conf</argument>
                <argument>spark.yarn.appMasterEnv.JAVA_HOME=/usr/java/jdk1.8</argument>
                <argument>--conf</argument>
                <argument>spark.executorEnv.JAVA_HOME=/usr/java/jdk1.8</argument>
                <argument>--jars</argument>
                <argument>hdfs://10.8.18.74:8020/ada/spark/share/tech_component/tc.plat.spark.jar,hdfs://10.8.18.74:8020/ada/spark/share/tech_component/bigdata4i-1.0.jar,hdfs://10.8.18.74:8020/ada/spark/share/tech_component/bigdata-sparklog-1.0.jar</argument>
                <argument>--files</argument>
                <argument>/etc/hive/conf/hive-site.xml</argument>
                <argument>--class</argument>
                <argument>testYarn.test.Ttest</argument>
                <argument>hdfs://10.8.18.74:8020/user/lyy/App/testYarn.test.jar</argument>
                <capture-output/>
            </shell>
            <ok to="end"/>
            <error to="fail"/>
        </action>
        <kill name="fail">
            <message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
        </kill>
        <end name="end"/>
    </workflow-app>
    
    ##spark-action:
    <workflow-app xmlns="uri:oozie:workflow:0.4" name="spark-action">
        <start to="spark-node"/>
        <action name="spark-node">
            <spark xmlns="uri:oozie:spark-action:0.1">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <configuration>
                    <property>
                        <name>mapreduce.job.queue.name</name>
                        <value>${queueName}</value>
                    </property>
                </configuration>
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Spark-Action</name>
            <class>testYarn.test.Ttest</class>
            <jar>${nameNode}/user/lyy/App/testYarn.test.jar</jar>
                <spark-opts>--conf spark.yarn.appMasterEnv.JAVA_HOME=/usr/java/jdk1.8 --conf spark.executorEnv.JAVA_HOME=/usr/java/jdk1.8 --jars hdfs://10.8.18.74:8020/ada/spark/share/tech_component/tc.plat.spark.jar,hdfs://10.8.18.74:8020/ada/spark/share/tech_component/bigdata4i-1.0.jar, hdfs://10.8.18.74:8020/ada/spark/share/tech_component/bigdata-sparklog-1.0.jar --conf spark.executor.extraJavaOptions=-Dlog4j.configuration=/etc/hadoop/conf/log4j.properties --conf spark.driver.extraJavaOptions=-Dlog4j.configuration=/etc/hadoop/conf/log4j.properties --conf spark.yarn.queue=ada.spark --files /etc/hive/conf/hive-site.xml</spark-opts>
            </spark>
            <ok to="end"/>
            <error to="fail"/>
        </action>
        <kill name="fail">
            <message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
        </kill>
        <end name="end"/>
    </workflow-app>
  • 相关阅读:
    python中list/tuple/dict/set的区别
    jquery修改ajax的header的字段origin方法,均被浏览器拒绝
    js判断上传文件的大小、类型、修改日期等信息
    js调试方法
    sqlmapapi的跨域访问Access-Control-Allow-Origin:*;ajax
    flask的文件上传和下载
    flask中的g、add_url_rule、send_from_directory、static_url_path、static_folder的用法
    python读写csv时中文乱码问题解决办法
    css中!important的作用
    项目经验——Sql server 数据库的备份和还原____还原数据库提示“介质集有2个介质簇,但只提供了1个。必须提供所有成员” .
  • 原文地址:https://www.cnblogs.com/lyy-blog/p/10095674.html
Copyright © 2011-2022 走看看