Oozie Shell Action 配置

 
 Oozie Shell Action 配置 

Shell Action 运行Shell命令或者shell脚本，每个流程任务只有在当前的Shell Action命令执行完成之后才会继续执行后面的节点
要运行一个shell任务，需要给Shell Action节点配置job-tracker，name-node，shell执行命令执行参数和一些必要的参数
Shell Action 可以用来配置成在执行一个Shell任务之前去创建或者删除HDFS目录
Shell 应用的配置可以使用job-xml文件中的元素，也可以使用内部元素来配置，像EL表达式也支持在内部元素中的配置，内部元素的配置可以覆盖外部文件中的配置。
跟mr任务一样，在Shell任务中也可以使用文件和附件具体参见【http://archive.cloudera.com/cdh/3/oozie/WorkflowFunctionalSpec.html#a3.2.2.1_Adding_Files_and_Archives_for_the_Job】
当Shell任务执行完成之后，其产生的输出文件可以被流程任务使用，这些信息可以被决策结点使用，如果要是Shell 任务产生的输出对流程任务可用，必须满足两个条件:1,输出的格式必须是合法的java属性文件2,输出的大小不能超过2KB

 
 Shell Action格式 

38

 
      < 
      workflow-app  
      name 
      = 
      "[WF-DEF-NAME]"  
      xmlns 
      = 
      "uri:oozie:workflow:0.3" 
      > 
     
      ... 
     
      < 
      action  
      name 
      = 
      "[NODE-NAME]" 
      > 
     
      < 
      shell  
      xmlns 
      = 
      "uri:oozie:shell-action:0.1" 
      > 
     
      < 
      job-tracker 
      >[JOB-TRACKER]</ 
      job-tracker 
      > 
     
      < 
      name-node 
      >[NAME-NODE]</ 
      name-node 
      > 
     
      < 
      prepare 
      > 
     
      < 
      delete  
      path 
      = 
      "[PATH]" 
      /> 
     
      ... 
     
      < 
      mkdir  
      path 
      = 
      "[PATH]" 
      /> 
     
      ... 
     
      </ 
      prepare 
      > 
     
      < 
      job-xml 
      >[SHELL SETTINGS FILE]</ 
      job-xml 
      > 
     
      < 
      configuration 
      > 
     
      < 
      property 
      > 
     
      < 
      name 
      >[PROPERTY-NAME]</ 
      name 
      > 
     
      < 
      value 
      >[PROPERTY-VALUE]</ 
      value 
      > 
     
      </ 
      property 
      > 
     
      ... 
     
      </ 
      configuration 
      > 
     
      < 
      exec 
      >[SHELL-COMMAND]</ 
      exec 
      > 
     
      < 
      argument 
      >[ARG-VALUE]</ 
      argument 
      > 
     
      ... 
     
      < 
      argument 
      >[ARG-VALUE]</ 
      argument 
      > 
     
      < 
      env-var 
      >[VAR1=VALUE1]</ 
      env-var 
      > 
     
      ... 
     
      < 
      env-var 
      >[VARN=VALUEN]</ 
      env-var 
      > 
     
      < 
      file 
      >[FILE-PATH]</ 
      file 
      > 
     
      ... 
     
      < 
      archive 
      >[FILE-PATH]</ 
      archive 
      > 
     
      ... 
     
      < 
      capture-output 
      /> 
     
      </ 
      shell 
      > 
     
      < 
      ok  
      to 
      = 
      "[NODE-NAME]" 
      /> 
     
      < 
      error  
      to 
      = 
      "[NODE-NAME]" 
      /> 
     
      </ 
      action 
      > 
     
      ... 
     
      </ 
      workflow-app 
      >

prepare元素里面配置启动job前要删除或者创建的文件夹，文件夹路径必须是以hdfs://HOST:PORT开头。
job-xml指定一个存在的配置文件。
configuration里面配置传递给shell job的参数。
exec元素包含要执行的shell命令的路径。可以给shell命令添加参数。
argument元素指定要传递给shell脚本的参数。
env-var包含传递给shell命令的环境变量。env-var只能包含一个环境变量和值。如果这个环境变量包含像$PATH一样的，那它必须写成PATH=$PATH:mypath。不能用${PATH}，因为它将会被EL解析。
capture-output元素指定用来捕获shell脚本的标准输出。可以通过String action:output(String node, String key)函数【EL函数】来获得输出。

 
 Shell Action 使用实例一：Oozie自带shell案例运行 

  首先下载Oozie自带的例子，解压，打开到 examples\apps\shell 目录，根据自己的安装环境修改之后的job.properties文件如下 

5

 
      nameNode 
      = 
      hdfs: 
      // 
      hadoop 
      - 
      node1.novalocal: 
      8020 
     
 
      jobTracker 
      = 
      hadoop 
      - 
      node1.novalocal: 
      8050 
     
 
      queueName 
      = 
      default 
     
 
      examplesRoot 
      = 
      xwj_test 
     
 
      oozie.wf.application.path 
      = 
      ${nameNode} 
      / 
      user 
      / 
      ${user.name} 
      / 
      ${examplesRoot} 
      / 
      apps 
      / 
      shell 
      / 
      oozie_example 
      / 
      workflow.xml 
     

  官网自带的workflow.xml 

35

 
      < 
      workflow-app  
      xmlns 
      = 
      "uri:oozie:workflow:0.4"  
      name 
      = 
      "shell-wf" 
      > 
     
 
           
      < 
      start  
      to 
      = 
      "shell-node" 
      /> 
     
 
           
      < 
      action  
      name 
      = 
      "shell-node" 
      > 
     
 
               
      < 
      shell  
      xmlns 
      = 
      "uri:oozie:shell-action:0.2" 
      > 
     
 
                   
      < 
      job-tracker 
      >${jobTracker}</ 
      job-tracker 
      > 
     
 
                   
      < 
      name-node 
      >${nameNode}</ 
      name-node 
      > 
     
 
                   
      < 
      configuration 
      > 
     
 
                       
      < 
      property 
      > 
     
 
                           
      < 
      name 
      >mapred.job.queue.name</ 
      name 
      > 
     
 
                           
      < 
      value 
      >${queueName}</ 
      value 
      > 
     
 
                       
      </ 
      property 
      > 
     
 
                   
      </ 
      configuration 
      > 
     
 
                   
      < 
      exec 
      >echo</ 
      exec 
      > 
     
 
                   
      < 
      argument 
      >my_output=Hello Oozie</ 
      argument 
      > 
     
 
                   
      < 
      capture-output 
      /> 
     
 
               
      </ 
      shell 
      > 
     
 
               
      < 
      ok  
      to 
      = 
      "check-output" 
      /> 
     
 
               
      < 
      error  
      to 
      = 
      "fail" 
      /> 
     
 
           
      </ 
      action 
      > 
     
 
           
      < 
      decision  
      name 
      = 
      "check-output" 
      > 
     
 
               
      < 
      switch 
      > 
     
 
                   
      < 
      case  
      to 
      = 
      "end" 
      > 
     
 
                       
      ${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'} 
     
 
                   
      </ 
      case 
      > 
     
 
                   
      < 
      default  
      to 
      = 
      "fail-output" 
      /> 
     
 
               
      </ 
      switch 
      > 
     
 
           
      </ 
      decision 
      > 
     
 
           
      < 
      kill  
      name 
      = 
      "fail" 
      > 
     
 
               
      < 
      message 
      >Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</ 
      message 
      > 
     
 
           
      </ 
      kill 
      > 
     
 
           
      < 
      kill  
      name 
      = 
      "fail-output" 
      > 
     
 
               
      < 
      message 
      >Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</ 
      message 
      > 
     
 
           
      </ 
      kill 
      > 
     
 
           
      < 
      end  
      name 
      = 
      "end" 
      /> 
     
 
      </ 
      workflow-app 
      > 
     

 
 运行步骤 

  1，首先在本地的测试节点上创建文件夹 

  mkdir -p /opt/mydata/user/oozie/xwj_test/apps/shell/oozie_example 

  2，将上述job.properties,workflow.xml 上传到新建好的目录中 

  cd /opt/mydata/user/oozie/xwj_test/apps/shell/oozie_example 

  3，在hdfs上创建目录 hdfs dfs -mkdir -p /user/oozie/xwj_test/apps/shell/oozie_example 

  4，将本地文件job.properties,workflow.xml 上传到hdfs目录中 

  hdfs dfs -put ../oozie_example/* /user/oozie/xwj_test/apps/shell/oozie_example 

  5，查看hdfs上的目录文件是否存在 

  6，使用oozie用户执行该任务 

  su - oozie -c "oozie job -oozie http://hadoop-node0.novalocal:11000/oozie -config /opt/mydata/user/oozie/xwj_test/apps/shell/oozie_example/job.properties -run" 

  7，查看任务执行状态 

 
 Shell Action 使用实例二：运行shell脚本接收传递参数，根据参数名称在hdfs上创建目录 

  修改之后的job.properties文件如下 

6

 
      nameNode= 
      hdfs://hadoop-node1.novalocal:8020 
     
      jobTracker=hadoop-node1.novalocal:8050 
     
      queueName=default 
     
      examplesRoot=xwj_test 
     
      oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/shell/shell_script/workflow.xml 
     
      EXEC=script.sh

  workflow.xml 

24

 
      < 
      workflow-app  
      xmlns 
      = 
      "uri:oozie:workflow:0.4"  
      name 
      = 
      "shell-wf" 
      > 
     
 
           
      < 
      start  
      to 
      = 
      "shell-node" 
      /> 
     
 
           
      < 
      action  
      name 
      = 
      "shell-node" 
      > 
     
 
               
      < 
      shell  
      xmlns 
      = 
      "uri:oozie:shell-action:0.2" 
      > 
     
 
                   
      < 
      job-tracker 
      >${jobTracker}</ 
      job-tracker 
      > 
     
 
                   
      < 
      name-node 
      >${nameNode}</ 
      name-node 
      > 
     
 
                   
      < 
      configuration 
      > 
     
 
                       
      < 
      property 
      > 
     
 
                           
      < 
      name 
      >mapred.job.queue.name</ 
      name 
      > 
     
 
                           
      < 
      value 
      >${queueName}</ 
      value 
      > 
     
 
                       
      </ 
      property 
      > 
     
 
                   
      </ 
      configuration 
      > 
     
 
                   
      < 
      exec 
      >${EXEC}</ 
      exec 
      > 
     
 
                   
      < 
      argument 
      >A</ 
      argument 
      > 
     
 
                   
      < 
      file 
      >${EXEC}#${EXEC}</ 
      file 
      > 
     
 
               
      </ 
      shell 
      > 
     
 
               
      < 
      ok  
      to 
      = 
      "end" 
      /> 
     
 
               
      < 
      error  
      to 
      = 
      "fail" 
      /> 
     
 
           
      </ 
      action 
      > 
     
 
           
      < 
      kill  
      name 
      = 
      "fail" 
      > 
     
 
               
      < 
      message 
      >Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</ 
      message 
      > 
     
 
           
      </ 
      kill 
      > 
     
 
           
      < 
      end  
      name 
      = 
      "end" 
      /> 
     
 
      </ 
      workflow-app 
      > 
     

  script.sh脚本 

 
      #!/bin/bash 
     
      dir_name=/user/oozie/xwj_test/apps/shell/shell_script/$1 
     
      hdfs dfs -mkdir -p $dir_name

  1，首先在本地的测试节点上创建文件夹 

  mkdir -p /opt/mydata/user/oozie/xwj_test/apps/shell/shell_script 

  2，将上述job.properties,workflow.xml 上传到新建好的目录中 

  cd /opt/mydata/user/oozie/xwj_test/apps/shell/shell_script 

  3，在hdfs上创建目录 hdfs dfs -mkdir -p /user/oozie/xwj_test/apps/shell/shell_script 

  4，将本地文件job.properties,workflow.xml 上传到hdfs目录中 

  hdfs dfs -put ../shell_script/* /user/oozie/xwj_test/apps/shell/shell_script 

  5，查看hdfs上的目录文件是否存在 

  6，使用 
 root用户执行该任务 
  这里需要注意提交的用户需要有hdfs相关目录的操作权限 

  oozie job -oozie http://hadoop-node0.novalocal:11000/oozie -config /opt/mydata/user/oozie/xwj_test/apps/shell/shell_script/job.properties -run 

  7，查看执行结果 

  8，hdfs上的目录文件 hdfs dfs -ls -r /user/oozie/xwj_test/apps/shell/shell_script 

 
 Shell Action 使用实例三：Oozie运行 sqoop命令shell脚本 

  1，本地测试sqoop脚本是否正确 

  /usr/hdp/2.6.3.0-235/sqoop/bin/sqoop import --delete-target-dir --fields-terminated-by '^' --connect jdbc:mysql://10.166.224.66:3306/zsy_node1 --username sys --password netease --table agreement_category --where "update_time < '`date -d 'yesterday' +%Y-%m-%d`'" --null-string '\\N' --null-non-string '\\N' --hive-drop-import-delims -m 1 --target-dir /hive/xwj_test/ddb/$tablename/tmp/p_date=`date -d 'yesterday' +%Y-%m-%d`/node1 

  2，准备执行文件 

  2.1 job.properties 

6

 
      nameNode= 
      hdfs://hadoop-node1.novalocal:8020 
     
      jobTracker=hadoop-node1.novalocal:8050 
     
      queueName=default 
     
      examplesRoot=xwj_test 
     
      oozie.wf.application.path=${nameNode}/user/oozie/${examplesRoot}/apps/shell/shell_sqoop/workflow.xml 
     
      EXEC=shell_sqoop.sh

  2.2 workflow.xml 

24

 
      < 
      workflow-app  
      xmlns 
      = 
      "uri:oozie:workflow:0.4"  
      name 
      = 
      "shell-wf" 
      > 
     
 
           
      < 
      start  
      to 
      = 
      "shell-node" 
      /> 
     
 
           
      < 
      action  
      name 
      = 
      "shell-node" 
      > 
     
 
               
      < 
      shell  
      xmlns 
      = 
      "uri:oozie:shell-action:0.2" 
      > 
     
 
                   
      < 
      job-tracker 
      >${jobTracker}</ 
      job-tracker 
      > 
     
 
                   
      < 
      name-node 
      >${nameNode}</ 
      name-node 
      > 
     
 
                   
      < 
      configuration 
      > 
     
 
                       
      < 
      property 
      > 
     
 
                           
      < 
      name 
      >mapred.job.queue.name</ 
      name 
      > 
     
 
                           
      < 
      value 
      >${queueName}</ 
      value 
      > 
     
 
                       
      </ 
      property 
      > 
     
 
                   
      </ 
      configuration 
      > 
     
 
                   
      < 
      exec 
      >${EXEC}</ 
      exec 
      > 
     
 
                   
      < 
      argument 
      >A</ 
      argument 
      > 
     
 
                   
      < 
      file 
      >${EXEC}#${EXEC}</ 
      file 
      > 
     
 
               
      </ 
      shell 
      > 
     
 
               
      < 
      ok  
      to 
      = 
      "end" 
      /> 
     
 
               
      < 
      error  
      to 
      = 
      "fail" 
      /> 
     
 
           
      </ 
      action 
      > 
     
 
           
      < 
      kill  
      name 
      = 
      "fail" 
      > 
     
 
               
      < 
      message 
      >Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</ 
      message 
      > 
     
 
           
      </ 
      kill 
      > 
     
 
           
      < 
      end  
      name 
      = 
      "end" 
      /> 
     
 
      </ 
      workflow-app 
      > 
     

  2.3 
 shell_sqoop.sh 

 
 #!/bin/bash 

 
 sqoop import --delete-target-dir --fields-terminated-by  
 '^'  
 --connect jdbc:mysql: 
 //10.166.224.66:3306/zsy_node1 --username sys --password netease --table agreement_category  --where "update_time < '`date -d 'yesterday' +%Y-%m-%d`'" --null-string '\\N' --null-non-string '\\N' --hive-drop-import-delims -m 1 --target-dir /hive/xwj_test/ddb/$tablename/tmp/p_date=`date -d 'yesterday' +%Y-%m-%d`/node1 

  3，首先在本地的测试节点上创建文件夹 

  mkdir -p /opt/mydata/user/oozie/xwj_test/apps/shell/shell_sqoop 

  4，在hdfs上创建目录 hdfs dfs -mkdir -p /user/oozie/xwj_test/apps/shell/shell_sqoop 

  5，将上述job.properties,workflow.xml, 
 shell_sqoop.sh上传到新建好的目录中 

  cd /opt/mydata/user/oozie/xwj_test/apps/shell/shell_sqoop 

  6，将本地文件job.properties,workflow.xml 上传到hdfs目录中 

  hdfs dfs -put ../shell_sqoop/* /user/oozie/xwj_test/apps/shell/shell_sqoop 

  7，查看hdfs上的目录文件是否存在 

  hdfs dfs -ls -r /user/oozie/xwj_test/apps/shell/shell_sqoop 

  8，使用 
 root用户执行该任务 
  这里需要注意提交的用户需要有hdfs相关目录的操作权限 

  oozie job -oozie http://hadoop-node0.novalocal:11000/oozie -config /opt/mydata/user/oozie/xwj_test/apps/shell/shell_sqoop/job.properties -run 

  9, 在oozie界面查看执行结果 

  查看错误消息:2018-04-11 19:02:33,976 WARN ShellActionExecutor:523 - SERVER[hadoop-node0.novalocal] USER[root] GROUP[-] TOKEN[] APP[shell-wf] JOB[0000023-180410191229126-oozie-root-W] ACTION[0000023-180410191229126-oozie-root-W@shell-node] Launcher ERROR, reason: Main class [org.apache.oozie.action.hadoop.ShellMain], exit code [1] 

  10,根据oozie生成的jobId 到hadoop界面上查看执行日志 

  最后定位到的日志信息： 

 
 Job init failed : org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.io.FileNotFoundException: File does not exist: hdfs://hadoop-node1.novalocal:8020/user/root/.staging/job_1523330140918_0150/job.splitmetainfo 

 
 at org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl$InitTransition.createSplits(JobImpl.java:1583) 

 
 at org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl$InitTransition.transition(JobImpl.java:1447) 

 
 at org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl$InitTransition.transition(JobImpl.java:1405) 

 
 at org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385) 

 
 at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) 

 
 at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) 

 
 at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) 

 
 at org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl.handle(JobImpl.java:999) 

 
 at org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl.handle(JobImpl.java:139) 

 
 at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$JobEventDispatcher.handle(MRAppMaster.java:1385) 

 
 at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.serviceStart(MRAppMaster.java:1160) 

 
 at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) 

 
 at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$5.run(MRAppMaster.java:1599) 

 
 at java.security.AccessController.doPrivileged(Native Method) 

 
 at javax.security.auth.Subject.doAs(Subject.java:422) 

 
 at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866) 

 
 at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.initAndStartAppMaster(MRAppMaster.java:1595) 

 
 at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.main(MRAppMaster.java:1526) 

 
 Caused by: java.io.FileNotFoundException: File does not exist: hdfs://hadoop-node1.novalocal:8020/user/root/.staging/job_1523330140918_0150/job.splitmetainfo 

 
 at org.apache.hadoop.hdfs.DistributedFileSystem$26.doCall(DistributedFileSystem.java:1446) 

 
 at org.apache.hadoop.hdfs.DistributedFileSystem$26.doCall(DistributedFileSystem.java:1438) 

 
 at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) 

 
 at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1454) 

 
 at org.apache.hadoop.mapreduce.split.SplitMetaInfoReader.readSplitMetaInfo(SplitMetaInfoReader.java:51) 

 
 at org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl$InitTransition.createSplits(JobImpl.java:1578) 

 
 ... 17 more 

 
 最终参考  
 https://community.hortonworks.com/questions/17489/job-init-fail-job-splitmetainfo-file-does-not-exis.html 
 提示需要切换 yarn用户提交任务 

  11，切换yarn用户重新提交任务 

  su yarn 

  oozie job -oozie http://hadoop-node0.novalocal:11000/oozie -config /opt/mydata/user/oozie/xwj_test/apps/shell/shell_sqoop/job.properties -run 

  结果执行成功 

 
 Shell Action 使用实例四：Oozie运行 依赖本地文件的shell脚本 

  1，写好shell脚本 shell_file.sh 其中脚本会循环本地ddbtables.txt 中的文件，从中获取表的名字 调用sqoop 同步业务库的数据 

17

 
      #!/bin/bash 
     
      for 
       line  
      in 
       `cat ddbtables 
      . 
      txt` 
     
      do 
     
      tablename= 
      $line 
     
      echo  
      $tablename 
     
      sqoop import --delete-target-dir \ 
     
      --fields-terminated-by  
      '^' 
       \ 
     
      --connect jdbc:mysql: 
      //10.166.224.66:3306/zsy_node1 \ 
     
      --username sys \ 
     
      --password netease \ 
     
      --table  
      $tablename 
        \ 
     
      --where "update_time <  
      '`date -d ' 
      yesterday 
      ' +%Y-%m-%d`' 
      " \ 
     
      --null- 
      string  
      '\\N' 
       \ 
     
      --null-non- 
      string  
      '\\N' 
       \ 
     
      --hive-drop-import-delims -m  
      1 
       \ 
     
      --target-dir /hive/xwj_test/ddb/ 
      $tablename 
      /tmp/p_date=`date -d  
      'yesterday' 
       +%Y-%m-%d`/node1 
     
      done

  2，ddbtables.txt 文件内容 

5

 
      agreement_sign 
     
      agreement_template 
     
      agreement_template_property 
     
      customer_trust_project 
     
      agreement_category

  3， job.properties 

6

 
      nameNode= 
      hdfs://hadoop-node1.novalocal:8020 
     
      jobTracker=hadoop-node1.novalocal:8050 
     
      queueName=default 
     
      examplesRoot=xwj_test 
     
      oozie.wf.application.path=${nameNode}/user/oozie/${examplesRoot}/apps/shell/shell_file/workflow.xml 
     
      EXEC=shell_file.sh

  4， workflow.xml 

24

 
      < 
      workflow-app  
      xmlns 
      = 
      "uri:oozie:workflow:0.4"  
      name 
      = 
      "shell-wf" 
      > 
     
 
           
      < 
      start  
      to 
      = 
      "shell-node" 
      /> 
     
 
           
      < 
      action  
      name 
      = 
      "shell-node" 
      > 
     
 
               
      < 
      shell  
      xmlns 
      = 
      "uri:oozie:shell-action:0.2" 
      > 
     
 
                   
      < 
      job-tracker 
      >${jobTracker}</ 
      job-tracker 
      > 
     
 
                   
      < 
      name-node 
      >${nameNode}</ 
      name-node 
      > 
     
 
                   
      < 
      configuration 
      > 
     
 
                       
      < 
      property 
      > 
     
 
                           
      < 
      name 
      >mapred.job.queue.name</ 
      name 
      > 
     
 
                           
      < 
      value 
      >${queueName}</ 
      value 
      > 
     
 
                       
      </ 
      property 
      > 
     
 
                   
      </ 
      configuration 
      > 
     
 
                   
      < 
      exec 
      >${EXEC}</ 
      exec 
      > 
     
 
                   
      < 
      file 
      >${EXEC}#${EXEC}</ 
      file 
      > 
     
 
                   
      < 
      file 
      >ddbtables.txt</ 
      file 
      > 
     
 
               
      </ 
      shell 
      > 
     
 
               
      < 
      ok  
      to 
      = 
      "end" 
      /> 
     
 
               
      < 
      error  
      to 
      = 
      "fail" 
      /> 
     
 
           
      </ 
      action 
      > 
     
 
           
      < 
      kill  
      name 
      = 
      "fail" 
      > 
     
 
               
      < 
      message 
      >Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</ 
      message 
      > 
     
 
           
      </ 
      kill 
      > 
     
 
           
      < 
      end  
      name 
      = 
      "end" 
      /> 
     
 
      </ 
      workflow-app 
      > 
     

  5，首先在本地的测试节点上创建文件夹 

  mkdir -p /opt/mydata/user/oozie/xwj_test/apps/shell/shell_file 

  6，在hdfs上创建目录 hdfs dfs -mkdir -p /user/oozie/xwj_test/apps/shell/shell_file 

  7，将上述文件上传到新建好的目录中 

  cd /opt/mydata/user/oozie/xwj_test/apps/shell/shell_file 

  8，将本地文件 上传到hdfs目录中 

  hdfs dfs -put ../shell_file/* /user/oozie/xwj_test/apps/shell/shell_file 

  9，查看hdfs上的目录文件是否存在 

  hdfs dfs -ls -r /user/oozie/xwj_test/apps/shell/shell_file 

  10，切换yarn用户重新提交任务 

  su yarn 

  oozie job -oozie http://hadoop-node0.novalocal:11000/oozie -config /opt/mydata/user/oozie/xwj_test/apps/shell/shell_file/job.properties -run 

  查看节点执行成功 

 
 Shell Action 综合实例 ：使用shell脚本调用sqoop 命令从不同业务库获取业务数据，合并业务数据到hdfs 

  1,写好shell采集脚本 shell_app.sh 

73

 
      #!/bin/bash 
     
      # 获取表名 同步不同节点的表数据 
     
      for 
       line  
      in 
       `cat ddbtables 
      . 
      txt` 
     
      do 
     
      tablename= 
      $line 
     
      echo  
      $tablename 
     
      sqoop import --delete-target-dir \ 
     
      --fields-terminated-by  
      '^' 
       \ 
     
      --connect jdbc:mysql: 
      //10.166.224.66:3306/zsy_node1?useUnicode=true&characterEncoding=utf-8 \ 
     
      --username sys \ 
     
      --password netease \ 
     
      --table  
      $tablename 
        \ 
     
      --where "update_time <  
      '`date -d ' 
      yesterday 
      ' +%Y-%m-%d`' 
      " \ 
     
      --null- 
      string  
      '\\N' 
       \ 
     
      --null-non- 
      string  
      '\\N' 
       \ 
     
      --hive-drop-import-delims -m  
      1 
       \ 
     
      --target-dir /hive/xwj_test/ddb/ 
      $tablename 
      /tmp/p_date=`date -d  
      'yesterday' 
       +%Y-%m-%d`/node1 
     
      sqoop import --delete-target-dir \ 
     
      --fields-terminated-by  
      '^' 
       \ 
     
      --connect jdbc:mysql: 
      //10.166.224.66:3306/zsy_node2?useUnicode=true&characterEncoding=utf-8 \ 
     
      --username sys \ 
     
      --password netease \ 
     
      --table  
      $tablename 
        \ 
     
      --where "update_time <  
      '`date -d ' 
      yesterday 
      ' +%Y-%m-%d`' 
      " \ 
     
      --null- 
      string  
      '\\N' 
       \ 
     
      --null-non- 
      string  
      '\\N' 
       \ 
     
      --hive-drop-import-delims -m  
      1 
       \ 
     
      --target-dir /hive/xwj_test/ddb/ 
      $tablename 
      /tmp/p_date=`date -d  
      'yesterday' 
       +%Y-%m-%d`/node2 
     
      sqoop import --delete-target-dir \ 
     
      --fields-terminated-by  
      '^' 
       \ 
     
      --connect jdbc:mysql: 
      //10.166.224.68:3306/zsy_node3?useUnicode=true&characterEncoding=utf-8 \ 
     
      --username sys \ 
     
      --password netease \ 
     
      --table  
      $tablename 
        \ 
     
      --where "update_time <  
      '`date -d ' 
      yesterday 
      ' +%Y-%m-%d`' 
      " \ 
     
      --null- 
      string  
      '\\N' 
       \ 
     
      --null-non- 
      string  
      '\\N' 
       \ 
     
      --hive-drop-import-delims -m  
      1 
       \ 
     
      --target-dir /hive/xwj_test/ddb/ 
      $tablename 
      /tmp/p_date=`date -d  
      'yesterday' 
       +%Y-%m-%d`/node3 
     
      sqoop import --delete-target-dir \ 
     
      --fields-terminated-by  
      '^' 
       \ 
     
      --connect jdbc:mysql: 
      //10.166.224.68:3306/zsy_node4?useUnicode=true&characterEncoding=utf-8 \ 
     
      --username sys \ 
     
      --password netease \ 
     
      --table  
      $tablename 
        \ 
     
      --where "update_time <  
      '`date -d ' 
      yesterday 
      ' +%Y-%m-%d`' 
      " \ 
     
      --null- 
      string  
      '\\N' 
       \ 
     
      --null-non- 
      string  
      '\\N' 
       \ 
     
      --hive-drop-import-delims -m  
      1 
       \ 
     
      --target-dir /hive/xwj_test/ddb/ 
      $tablename 
      /tmp/p_date=`date -d  
      'yesterday' 
       +%Y-%m-%d`/node4 
     
      done 
     
      # 首先删除 目标合并表（兼容重复执行的逻辑） 
     
      for 
       line  
      in 
       `cat ddbtables 
      . 
      txt` 
     
      do 
     
      tablename= 
      $line 
     
      echo  
      $tablename 
     
      hadoop fs -rm -r /hive/xwj_test/ddb/ 
      $tablename 
      /p_date=`date -d  
      'yesterday' 
       +%Y-%m-%d` 
     
      done 
     
      # 使用脚本合并 不同节点的临时数据 
     
      /usr/hdp/ 
      2.6.3.0 
      - 
      235 
      /spark2/bin/spark-shell < merge 
      . 
      scala 
     
      # 删除临时节点的数据 
     
      for 
       line  
      in 
       `cat ddbtables 
      . 
      txt` 
     
      do 
     
      tablename= 
      $line 
     
      echo  
      $tablename 
     
      hadoop fs -rm -r /hive/xwj_test/ddb/ 
      $tablename 
      /tmp 
     
      done

  2，ddbtables.txt 文件内容 

5

 
      agreement_sign 
     
      agreement_template 
     
      agreement_template_property 
     
      customer_trust_project 
     
      agreement_category

  3， job.properties 

6

 
      nameNode= 
      hdfs://hadoop-node1.novalocal:8020 
     
      jobTracker=hadoop-node1.novalocal:8050 
     
      queueName=default 
     
      examplesRoot=xwj_test 
     
      oozie.wf.application.path=${nameNode}/user/oozie/${examplesRoot}/apps/shell/shell_app/workflow.xml 
     
      EXEC=shell_file.sh

  4， workflow.xml 

25

 
      < 
      workflow-app  
      xmlns 
      = 
      "uri:oozie:workflow:0.4"  
      name 
      = 
      "shell-wf" 
      > 
     
 
           
      < 
      start  
      to 
      = 
      "shell-node" 
      /> 
     
 
           
      < 
      action  
      name 
      = 
      "shell-node" 
      > 
     
 
               
      < 
      shell  
      xmlns 
      = 
      "uri:oozie:shell-action:0.2" 
      > 
     
 
                   
      < 
      job-tracker 
      >${jobTracker}</ 
      job-tracker 
      > 
     
 
                   
      < 
      name-node 
      >${nameNode}</ 
      name-node 
      > 
     
 
                   
      < 
      configuration 
      > 
     
 
                       
      < 
      property 
      > 
     
 
                           
      < 
      name 
      >mapred.job.queue.name</ 
      name 
      > 
     
 
                           
      < 
      value 
      >${queueName}</ 
      value 
      > 
     
 
                       
      </ 
      property 
      > 
     
 
                   
      </ 
      configuration 
      > 
     
 
                   
      < 
      exec 
      >${EXEC}</ 
      exec 
      > 
     
 
                   
      < 
      file 
      >${EXEC}#${EXEC}</ 
      file 
      > 
     
 
                   
      < 
      file 
      >ddbtables.txt</ 
      file 
      > 
     
 
                   
      < 
      file 
      >merge.scala</ 
      file 
      > 
     
 
               
      </ 
      shell 
      > 
     
 
               
      < 
      ok  
      to 
      = 
      "end" 
      /> 
     
 
               
      < 
      error  
      to 
      = 
      "fail" 
      /> 
     
 
           
      </ 
      action 
      > 
     
 
           
      < 
      kill  
      name 
      = 
      "fail" 
      > 
     
 
               
      < 
      message 
      >Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</ 
      message 
      > 
     
 
           
      </ 
      kill 
      > 
     
 
           
      < 
      end  
      name 
      = 
      "end" 
      /> 
     
 
      </ 
      workflow-app 
      > 
     

  5，首先在本地的测试节点上创建文件夹 

  mkdir -p /opt/mydata/user/oozie/xwj_test/apps/shell/ 
 shell_app 

  6，在hdfs上创建目录 hdfs dfs -mkdir -p /user/oozie/xwj_test/apps/shell/ 
 shell_app 

  7，将上述文件上传到新建好的目录中 

  cd /opt/mydata/user/oozie/xwj_test/apps/shell/ 
 shell_app 

  8，将本地文件 上传到hdfs目录中 

  hdfs dfs -put ../ 
 shell_app/* /user/oozie/xwj_test/apps/shell/ 
 shell_app 

  9，查看hdfs上的目录文件是否存在 

  hdfs dfs -ls -r /user/oozie/xwj_test/apps/shell/ 
 shell_app 

  10，切换yarn用户重新提交任务 

  su yarn 

  oozie job -oozie http://hadoop-node0.novalocal:11000/oozie -config /opt/mydata/user/oozie/xwj_test/apps/shell/ 
 shell_app/job.properties -run 

Oozie操作篇--Oozie Shell Action 配置

Oozie Shell Action 配置

猜你喜欢