dataX 在Linux的安装部署与测试方案

  1. 环境准备

Hadoop2.7.3

Hive1.2.1

JDK1.8

Python2.7 centOS系统默认自带

Mysql5.7

dataX3.0

下载地址:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz

  1. 测试脚本准备
    1. Mysql建表脚本与数据脚本

DROP TABLE IF EXISTS dim_area;

CREATE TABLE IF NOT EXISTS dim_area (id BIGINT COMMENT '',name STRING COMMENT '地区名称',parent_id BIGINT COMMENT '') COMMENT '' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

 

INSERT INTO dim_area (id, name, parent_id) VALUES (110000, '北京市', null);

INSERT INTO dim_area (id, name, parent_id) VALUES (110100, '北京市', 110000);

INSERT INTO dim_area (id, name, parent_id) VALUES (110101, '东城区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110102, '西城区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110103, '朝阳区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110104, '丰台区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110105, '石景山区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110106, '海淀区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110107, '门头沟区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110108, '房山区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110109, '通州区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110110, '顺义区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110111, '昌平区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110112, '大兴区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110113, '怀柔区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110114, '平谷区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110115, '密云县', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110116, '延庆县', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120000, '天津市', null);

INSERT INTO dim_area (id, name, parent_id) VALUES (120100, '天津市', 120000);

INSERT INTO dim_area (id, name, parent_id) VALUES (120101, '和平区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120102, '河东区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120103, '河西区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120104, '南开区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120105, '河北区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120106, '红桥区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120107, '滨海新区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120108, '东丽区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120109, '西青区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120110, '津南区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120111, '北辰区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120112, '武清区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120113, '宝坻区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120114, '宁河县', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120115, '静海县', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120116, '蓟县', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130000, '河北省', null);

INSERT INTO dim_area (id, name, parent_id) VALUES (130100, '石家庄市', 130000);

INSERT INTO dim_area (id, name, parent_id) VALUES (130102, '长安区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130103, '桥东区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130104, '桥西区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130105, '新华区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130107, '井陉矿区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130108, '裕华区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130121, '井陉县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130123, '正定县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130124, '栾城县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130125, '行唐县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130126, '灵寿县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130127, '高邑县', 130100);

    1. Hive的建表语句

DROP TABLE IF EXISTS dim_area;

CREATE TABLE

    IF NOT EXISTS dim_area

    (

        id BIGINT COMMENT '',

        name STRING COMMENT '地区名称',

        parent_id BIGINT COMMENT ''

    )

    COMMENT '' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

    1. Mysql到Hive的迁移Json脚本(dim_area2.json)

{

    "job": {

        "setting": {

            "speed": {

                "channel": 10

            }

        },

        "content": [

            {

                "reader": {

                    "name": "mysqlreader",

                    "parameter": {

                        "username": "root",

                        "password": "root",

                        "connection": [

                            {

                                "querySql": [

                                    "SELECT id, name, parent_id FROM dim_area;"

                                ],

                                "jdbcUrl": [

                                    "jdbc:mysql://192.168.10.107:3306/zmdwdb"

                                ]

                            }

                        ]

                    }

                },

                "writer": {

                    "name": "hdfswriter",

                    "parameter": {

                        "defaultFS": "hdfs://192.168.1.181:9000/",

                        "fileType": "text",

                        "path": "/user/hive/warehouse/zmdw.db/dim_area/",

                        "fileName": "tmp",

                        "column": [

                            {

                                "name": "id",

                                "type": "bigint"

                            },

                            {

                                "name": "name",

                                "type": "string"

                            },

                            {

                                "name": "parent_id",

                                "type": "bigint"

                            }

                        ],

                        "writeMode": "append",

                        "fieldDelimiter": "\t",

                        "compress":""

                    }

                }

            }

        ]

    }

}

    1. Hive到Mysql的迁移脚本(dim_area3.json)

{

    "job": {

        "content": [

            {

                "reader": {

                    "name": "hdfsreader",

                    "parameter": {

                        "column": [

                {

                                "index": 0,

                                "type": "string"

                            },

                            {

                                "index": 1,

                                "type": "string"

                            },

                            {

                                "index": 2,

                                "type": "string"

                            }

            ],

                        "defaultFS": "hdfs://192.168.1.181:9000/",

                        "encoding": "UTF-8",

                        "fieldDelimiter": "\t",

                        "fileType": "text",

                        "path": "/user/hive/warehouse/zmdw.db/dim_area/"

                    }

                },

                "writer": {

                    "name": "mysqlwriter",

                    "parameter": {

                        "column": ["id","name","parent_id"],

                        "connection": [

                            {

                                "jdbcUrl": "jdbc:mysql://192.168.10.107:3306/zmdwdb",

                                "table": ["dim_area"]

                            }

                        ],

                        "password": "root",

                        "preSql": [],

                        "session": [],

                        "username": "root",

                        "writeMode": "insert"

                    }

                }

            }

        ],

        "setting": {

            "speed": {

                "channel": "1"

            }

        }

    }

}

 

  1. Datax安装

下载好datax.tar.gz上传到服务器解压即可使用

  1. Mysql到Hive数据迁移测试

将dim_area2.json 脚本放置在dataX/bin目录下

执行命令:[root@BIGDATA bin]# python datax.py dim_area2.json

正常迁移结果如下

DataX (DATAX-OPENSOURCE-3.0), From Alibaba !

Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.

 

 

2019-08-24 02:46:17.454 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl

2019-08-24 02:46:17.465 [main] INFO  Engine - the machine info  =>

 

        osInfo: Oracle Corporation 1.8 25.131-b11

        jvmInfo:        Linux amd64 2.6.32-642.11.1.el6.x86_64

        cpu num:        1

 

        totalPhysicalMemory:    -0.00G

        freePhysicalMemory:     -0.00G

        maxFileDescriptorCount: -1

        currentOpenFileDescriptorCount: -1

 

        GC Names        [Copy, MarkSweepCompact]

 

        MEMORY_NAME                    | allocation_size                | init_size                     

        Eden Space                     | 273.06MB                       | 273.06MB                      

        Code Cache                     | 240.00MB                       | 2.44MB                        

        Survivor Space                 | 34.13MB                        | 34.13MB                        

        Compressed Class Space         | 1,024.00MB                     | 0.00MB                        

        Metaspace                      | -0.00MB                        | 0.00MB                        

        Tenured Gen                    | 682.69MB                       | 682.69MB                      

 

 

2019-08-24 02:46:17.487 [main] INFO  Engine -

{

        "content":[

                {

                        "reader":{

                                "name":"mysqlreader",

                                "parameter":{

                                        "connection":[

                                                {

                                                        "jdbcUrl":[

                                                                "jdbc:mysql://192.168.10.107:3306/zmdwdb"

                                                        ],

                                                        "querySql":[

                                                                "SELECT id, name, parent_id FROM dim_area;"

                                                        ]

                                                }

                                        ],

                                        "password":"****",

                                        "username":"root"

                                }

                        },

                        "writer":{

                                "name":"hdfswriter",

                                "parameter":{

                                        "column":[

                                                {

                                                        "name":"id",

                                                        "type":"bigint"

                                                },

                                                {

                                                        "name":"name",

                                                        "type":"string"

                                                },

                                                {

                                                        "name":"parent_id",

                                                        "type":"bigint"

                                                }

                                        ],

                                        "compress":"",

                                        "defaultFS":"hdfs://192.168.1.181:9000/",

                                        "fieldDelimiter":"\t",

                                        "fileName":"tmp",

                                        "fileType":"text",

                                        "path":"/user/hive/warehouse/zmdw.db/dim_area/",

                                        "writeMode":"append"

                                }

                        }

                }

        ],

        "setting":{

                "speed":{

                        "channel":10

                }

        }

}

 

2019-08-24 02:46:17.512 [main] WARN  Engine - prioriy set to 0, because NumberFormatException, the value is: null

2019-08-24 02:46:17.514 [main] INFO  PerfTrace - PerfTrace traceId=job_-1, isEnable=false, priority=0

2019-08-24 02:46:17.514 [main] INFO  JobContainer - DataX jobContainer starts job.

2019-08-24 02:46:17.522 [main] INFO  JobContainer - Set jobId = 0

2019-08-24 02:46:18.014 [job-0] INFO  OriginalConfPretreatmentUtil - Available jdbcUrl:jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true.

Aug 24, 2019 2:46:18 AM org.apache.hadoop.util.NativeCodeLoader <clinit>

WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

2019-08-24 02:46:19.441 [job-0] INFO  JobContainer - jobContainer starts to do prepare ...

2019-08-24 02:46:19.446 [job-0] INFO  JobContainer - DataX Reader.Job [mysqlreader] do prepare work .

2019-08-24 02:46:19.447 [job-0] INFO  JobContainer - DataX Writer.Job [hdfswriter] do prepare work .

2019-08-24 02:46:19.618 [job-0] INFO  HdfsWriter$Job - 由于您配置了writeMode append, 写入前不做清理工作, [/user/hive/warehouse/zmdw.db/dim_area/] 目录下写入相应文件名前缀  [tmp] 的文件

2019-08-24 02:46:19.618 [job-0] INFO  JobContainer - jobContainer starts to do split ...

2019-08-24 02:46:19.618 [job-0] INFO  JobContainer - Job set Channel-Number to 10 channels.

2019-08-24 02:46:19.622 [job-0] INFO  JobContainer - DataX Reader.Job [mysqlreader] splits to [1] tasks.

2019-08-24 02:46:19.623 [job-0] INFO  HdfsWriter$Job - begin do split...

2019-08-24 02:46:19.628 [job-0] INFO  HdfsWriter$Job - splited write file name:[hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]

2019-08-24 02:46:19.629 [job-0] INFO  HdfsWriter$Job - end do split.

2019-08-24 02:46:19.629 [job-0] INFO  JobContainer - DataX Writer.Job [hdfswriter] splits to [1] tasks.

2019-08-24 02:46:19.725 [job-0] INFO  JobContainer - jobContainer starts to do schedule ...

2019-08-24 02:46:19.741 [job-0] INFO  JobContainer - Scheduler starts [1] taskGroups.

2019-08-24 02:46:19.748 [job-0] INFO  JobContainer - Running by standalone Mode.

2019-08-24 02:46:19.765 [taskGroup-0] INFO  TaskGroupContainer - taskGroupId=[0] start [1] channels for [1] tasks.

2019-08-24 02:46:19.778 [taskGroup-0] INFO  Channel - Channel set byte_speed_limit to -1, No bps activated.

2019-08-24 02:46:19.778 [taskGroup-0] INFO  Channel - Channel set record_speed_limit to -1, No tps activated.

2019-08-24 02:46:19.820 [taskGroup-0] INFO  TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started

2019-08-24 02:46:19.826 [0-0-0-reader] INFO  CommonRdbmsReader$Task - Begin to read record by Sql: [SELECT id, name, parent_id FROM dim_area;

] jdbcUrl:[jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true].

2019-08-24 02:46:19.915 [0-0-0-writer] INFO  HdfsWriter$Task - begin do write...

2019-08-24 02:46:19.916 [0-0-0-writer] INFO  HdfsWriter$Task - write to file : [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]

2019-08-24 02:46:20.179 [0-0-0-reader] INFO  CommonRdbmsReader$Task - Finished read record by Sql: [SELECT id, name, parent_id FROM dim_area;

] jdbcUrl:[jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true].

2019-08-24 02:46:20.431 [0-0-0-writer] INFO  HdfsWriter$Task - end do write

2019-08-24 02:46:20.526 [taskGroup-0] INFO  TaskGroupContainer - taskGroup[0] taskId[0] is successed, used[711]ms

2019-08-24 02:46:20.526 [taskGroup-0] INFO  TaskGroupContainer - taskGroup[0] completed it's tasks.

2019-08-24 02:46:29.871 [job-0] INFO  StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.229s |  All Task WaitReaderTime 0.000s | Percentage 100.00%

2019-08-24 02:46:29.873 [job-0] INFO  AbstractScheduler - Scheduler accomplished all tasks.

2019-08-24 02:46:29.873 [job-0] INFO  JobContainer - DataX Writer.Job [hdfswriter] do post work.

2019-08-24 02:46:29.875 [job-0] INFO  HdfsWriter$Job - start rename file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] to file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c].

2019-08-24 02:46:29.887 [job-0] INFO  HdfsWriter$Job - finish rename file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] to file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c].

2019-08-24 02:46:29.888 [job-0] INFO  HdfsWriter$Job - start delete tmp dir [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9] .

2019-08-24 02:46:29.895 [job-0] INFO  HdfsWriter$Job - finish delete tmp dir [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9] .

2019-08-24 02:46:29.896 [job-0] INFO  JobContainer - DataX Reader.Job [mysqlreader] do post work.

2019-08-24 02:46:29.896 [job-0] INFO  JobContainer - DataX jobId [0] completed successfully.

2019-08-24 02:46:29.897 [job-0] INFO  HookInvoker - No hook invoked, because base dir not exists or is a file: /app/datax/hook

2019-08-24 02:46:29.904 [job-0] INFO  JobContainer -

         [total cpu info] =>

                averageCpu                     | maxDeltaCpu                    | minDeltaCpu                   

                -1.00%                         | -1.00%                         | -1.00%

                       

 

         [total gc info] =>

                 NAME                 | totalGCCount       | maxDeltaGCCount    | minDeltaGCCount    | totalGCTime        | maxDeltaGCTime     | minDeltaGCTime    

                 Copy                 | 0                  | 0                  | 0                  | 0.000s             | 0.000s             | 0.000s            

                 MarkSweepCompact     | 1                  | 1                  | 1                  | 0.035s             | 0.035s             | 0.035s            

 

2019-08-24 02:46:29.904 [job-0] INFO  JobContainer - PerfTrace not enable!

2019-08-24 02:46:29.904 [job-0] INFO  StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.229s |  All Task WaitReaderTime 0.000s | Percentage 100.00%

2019-08-24 02:46:29.909 [job-0] INFO  JobContainer -

任务启动时刻                    : 2019-08-24 02:46:17

任务结束时刻                    : 2019-08-24 02:46:29

任务总计耗时                    :                 12s

任务平均流量                    :            4.86KB/s

记录写入速度                    :            325rec/s

读出记录总数                    :                3258

读写失败总数                    :                   0

 

  1. Hive到Mysql数据迁移测试

Hive回迁到Mysql注意事项:

Hive数据类型如bigint可以转换为string,因为hdfs文件都是文本类型,容易导致脏数据,如果提示脏数据,可以改成兼容性最好的string类型,而Mysql则设置为varchar类型

[root@BIGDATA bin]# python datax.py dim_area3.json

 

DataX (DATAX-OPENSOURCE-3.0), From Alibaba !

Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.

 

 

2019-08-24 03:25:03.592 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl

2019-08-24 03:25:03.606 [main] INFO  Engine - the machine info  =>

 

        osInfo: Oracle Corporation 1.8 25.131-b11

        jvmInfo:        Linux amd64 2.6.32-642.11.1.el6.x86_64

        cpu num:        1

 

        totalPhysicalMemory:    -0.00G

        freePhysicalMemory:     -0.00G

        maxFileDescriptorCount: -1

        currentOpenFileDescriptorCount: -1

 

        GC Names        [Copy, MarkSweepCompact]

 

        MEMORY_NAME                    | allocation_size                | init_size                     

        Eden Space                     | 273.06MB                       | 273.06MB                       

        Code Cache                     | 240.00MB                       | 2.44MB                        

        Survivor Space                 | 34.13MB                        | 34.13MB                       

        Compressed Class Space         | 1,024.00MB                     | 0.00MB                        

        Metaspace                      | -0.00MB                        | 0.00MB                        

        Tenured Gen                    | 682.69MB                       | 682.69MB                      

 

 

2019-08-24 03:25:03.630 [main] INFO  Engine -

{

        "content":[

                {

                        "reader":{

                                "name":"hdfsreader",

                                "parameter":{

                                        "column":[

                                                {

                                                        "index":0,

                                                        "type":"string"

                                                },

                                                {

                                                        "index":1,

                                                        "type":"string"

                                                },

                                                {

                                                        "index":2,

                                                        "type":"string"

                                                }

                                        ],

                                        "defaultFS":"hdfs://192.168.1.181:9000/",

                                        "encoding":"UTF-8",

                                        "fieldDelimiter":"\t",

                                        "fileType":"text",

                                        "path":"/user/hive/warehouse/zmdw.db/dim_area/"

                                }

                        },

                        "writer":{

                                "name":"mysqlwriter",

                                "parameter":{

                                        "column":[

                                                "id",

                                                "name",

                                                "parent_id"

                                        ],

                                        "connection":[

                                                {

                                                        "jdbcUrl":"jdbc:mysql://192.168.10.107:3306/zmdwdb",

                                                        "table":[

                                                                "dim_area"

                                                        ]

                                                }

                                        ],

                                        "password":"****",

                                        "preSql":[],

                                        "session":[],

                                        "username":"root",

                                        "writeMode":"insert"

                                }

                        }

                }

        ],

        "setting":{

                "speed":{

                        "channel":"1"

                }

        }

}

 

2019-08-24 03:25:03.651 [main] WARN  Engine - prioriy set to 0, because NumberFormatException, the value is: null

2019-08-24 03:25:03.655 [main] INFO  PerfTrace - PerfTrace traceId=job_-1, isEnable=false, priority=0

2019-08-24 03:25:03.655 [main] INFO  JobContainer - DataX jobContainer starts job.

2019-08-24 03:25:03.662 [main] INFO  JobContainer - Set jobId = 0

2019-08-24 03:25:03.694 [job-0] INFO  HdfsReader$Job - init() begin...

2019-08-24 03:25:04.109 [job-0] INFO  HdfsReader$Job - hadoopConfig details:{"finalParameters":[]}

2019-08-24 03:25:04.109 [job-0] INFO  HdfsReader$Job - init() ok and end...

2019-08-24 03:25:04.500 [job-0] INFO  OriginalConfPretreatmentUtil - table:[dim_area] all columns:[

id,name,parent_id

].

2019-08-24 03:25:04.519 [job-0] INFO  OriginalConfPretreatmentUtil - Write data [

insert INTO %s (id,name,parent_id) VALUES(?,?,?)

], which jdbcUrl like:[jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true]

2019-08-24 03:25:04.520 [job-0] INFO  JobContainer - jobContainer starts to do prepare ...

2019-08-24 03:25:04.521 [job-0] INFO  JobContainer - DataX Reader.Job [hdfsreader] do prepare work .

2019-08-24 03:25:04.522 [job-0] INFO  HdfsReader$Job - prepare(), start to getAllFiles...

2019-08-24 03:25:04.522 [job-0] INFO  HdfsReader$Job - get HDFS all files in path = [/user/hive/warehouse/zmdw.db/dim_area/]

Aug 24, 2019 3:25:04 AM org.apache.hadoop.util.NativeCodeLoader <clinit>

WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

2019-08-24 03:25:05.861 [job-0] INFO  HdfsReader$Job - [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]是[text]类型的文件, 将该文件加入source files列表

2019-08-24 03:25:05.868 [job-0] INFO  HdfsReader$Job - 您即将读取的文件数为: [1], 列表为: [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]

2019-08-24 03:25:05.869 [job-0] INFO  JobContainer - DataX Writer.Job [mysqlwriter] do prepare work .

2019-08-24 03:25:05.871 [job-0] INFO  JobContainer - jobContainer starts to do split ...

2019-08-24 03:25:05.871 [job-0] INFO  JobContainer - Job set Channel-Number to 1 channels.

2019-08-24 03:25:05.872 [job-0] INFO  HdfsReader$Job - split() begin...

2019-08-24 03:25:05.877 [job-0] INFO  JobContainer - DataX Reader.Job [hdfsreader] splits to [1] tasks.

2019-08-24 03:25:05.877 [job-0] INFO  JobContainer - DataX Writer.Job [mysqlwriter] splits to [1] tasks.

2019-08-24 03:25:05.899 [job-0] INFO  JobContainer - jobContainer starts to do schedule ...

2019-08-24 03:25:05.908 [job-0] INFO  JobContainer - Scheduler starts [1] taskGroups.

2019-08-24 03:25:05.914 [job-0] INFO  JobContainer - Running by standalone Mode.

2019-08-24 03:25:05.936 [taskGroup-0] INFO  TaskGroupContainer - taskGroupId=[0] start [1] channels for [1] tasks.

2019-08-24 03:25:05.948 [taskGroup-0] INFO  Channel - Channel set byte_speed_limit to -1, No bps activated.

2019-08-24 03:25:05.948 [taskGroup-0] INFO  Channel - Channel set record_speed_limit to -1, No tps activated.

2019-08-24 03:25:05.980 [taskGroup-0] INFO  TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started

2019-08-24 03:25:06.051 [0-0-0-reader] INFO  HdfsReader$Job - hadoopConfig details:{"finalParameters":["mapreduce.job.end-notification.max.retry.interval","mapreduce.job.end-notification.max.attempts"]}

2019-08-24 03:25:06.053 [0-0-0-reader] INFO  Reader$Task - read start

2019-08-24 03:25:06.059 [0-0-0-reader] INFO  Reader$Task - reading file : [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]

2019-08-24 03:25:06.103 [0-0-0-reader] INFO  UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":"\t","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":false,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]

2019-08-24 03:25:06.285 [0-0-0-reader] INFO  Reader$Task - end read source files...

2019-08-24 03:25:06.382 [taskGroup-0] INFO  TaskGroupContainer - taskGroup[0] taskId[0] is successed, used[411]ms

2019-08-24 03:25:06.382 [taskGroup-0] INFO  TaskGroupContainer - taskGroup[0] completed it's tasks.

2019-08-24 03:25:15.961 [job-0] INFO  StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.129s |  All Task WaitReaderTime 0.137s | Percentage 100.00%

2019-08-24 03:25:15.961 [job-0] INFO  AbstractScheduler - Scheduler accomplished all tasks.

2019-08-24 03:25:15.961 [job-0] INFO  JobContainer - DataX Writer.Job [mysqlwriter] do post work.

2019-08-24 03:25:15.962 [job-0] INFO  JobContainer - DataX Reader.Job [hdfsreader] do post work.

2019-08-24 03:25:15.962 [job-0] INFO  JobContainer - DataX jobId [0] completed successfully.

2019-08-24 03:25:15.964 [job-0] INFO  HookInvoker - No hook invoked, because base dir not exists or is a file: /app/datax/hook

2019-08-24 03:25:15.967 [job-0] INFO  JobContainer -

         [total cpu info] =>

                averageCpu                     | maxDeltaCpu                    | minDeltaCpu                   

                -1.00%                         | -1.00%                         | -1.00%

                       

 

         [total gc info] =>

                 NAME                 | totalGCCount       | maxDeltaGCCount    | minDeltaGCCount    | totalGCTime        | maxDeltaGCTime     | minDeltaGCTime    

                 Copy                 | 0                  | 0                  | 0                  | 0.000s             | 0.000s             | 0.000s             

                 MarkSweepCompact     | 1                  | 1                  | 1                  | 0.036s             | 0.036s             | 0.036s            

 

2019-08-24 03:25:15.967 [job-0] INFO  JobContainer - PerfTrace not enable!

2019-08-24 03:25:15.968 [job-0] INFO  StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.129s |  All Task WaitReaderTime 0.137s | Percentage 100.00%

2019-08-24 03:25:15.973 [job-0] INFO  JobContainer -

任务启动时刻                    : 2019-08-24 03:25:03

任务结束时刻                    : 2019-08-24 03:25:15

任务总计耗时                    :                 12s

任务平均流量                    :            4.86KB/s

记录写入速度                    :            325rec/s

读出记录总数                    :                3258

读写失败总数                    :                   0

 

发布了35 篇原创文章 · 获赞 6 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/colby_chenlun/article/details/100055963