Hadoop2.7.3
Hive1.2.1
JDK1.8
Python2.7 centOS系统默认自带
Mysql5.7
dataX3.0
下载地址:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
DROP TABLE IF EXISTS dim_area; CREATE TABLE IF NOT EXISTS dim_area (id BIGINT COMMENT '',name STRING COMMENT '地区名称',parent_id BIGINT COMMENT '') COMMENT '' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; |
INSERT INTO dim_area (id, name, parent_id) VALUES (110000, '北京市', null); INSERT INTO dim_area (id, name, parent_id) VALUES (110100, '北京市', 110000); INSERT INTO dim_area (id, name, parent_id) VALUES (110101, '东城区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110102, '西城区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110103, '朝阳区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110104, '丰台区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110105, '石景山区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110106, '海淀区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110107, '门头沟区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110108, '房山区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110109, '通州区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110110, '顺义区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110111, '昌平区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110112, '大兴区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110113, '怀柔区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110114, '平谷区', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110115, '密云县', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (110116, '延庆县', 110100); INSERT INTO dim_area (id, name, parent_id) VALUES (120000, '天津市', null); INSERT INTO dim_area (id, name, parent_id) VALUES (120100, '天津市', 120000); INSERT INTO dim_area (id, name, parent_id) VALUES (120101, '和平区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120102, '河东区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120103, '河西区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120104, '南开区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120105, '河北区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120106, '红桥区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120107, '滨海新区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120108, '东丽区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120109, '西青区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120110, '津南区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120111, '北辰区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120112, '武清区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120113, '宝坻区', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120114, '宁河县', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120115, '静海县', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (120116, '蓟县', 120100); INSERT INTO dim_area (id, name, parent_id) VALUES (130000, '河北省', null); INSERT INTO dim_area (id, name, parent_id) VALUES (130100, '石家庄市', 130000); INSERT INTO dim_area (id, name, parent_id) VALUES (130102, '长安区', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130103, '桥东区', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130104, '桥西区', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130105, '新华区', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130107, '井陉矿区', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130108, '裕华区', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130121, '井陉县', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130123, '正定县', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130124, '栾城县', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130125, '行唐县', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130126, '灵寿县', 130100); INSERT INTO dim_area (id, name, parent_id) VALUES (130127, '高邑县', 130100); |
DROP TABLE IF EXISTS dim_area; CREATE TABLE IF NOT EXISTS dim_area ( id BIGINT COMMENT '', name STRING COMMENT '地区名称', parent_id BIGINT COMMENT '' ) COMMENT '' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; |
{ "job": { "setting": { "speed": { "channel": 10 } }, "content": [ { "reader": { "name": "mysqlreader", "parameter": { "username": "root", "password": "root", "connection": [ { "querySql": [ "SELECT id, name, parent_id FROM dim_area;" ], "jdbcUrl": [ "jdbc:mysql://192.168.10.107:3306/zmdwdb" ] } ] } }, "writer": { "name": "hdfswriter", "parameter": { "defaultFS": "hdfs://192.168.1.181:9000/", "fileType": "text", "path": "/user/hive/warehouse/zmdw.db/dim_area/", "fileName": "tmp", "column": [ { "name": "id", "type": "bigint" }, { "name": "name", "type": "string" }, { "name": "parent_id", "type": "bigint" } ], "writeMode": "append", "fieldDelimiter": "\t", "compress":"" } } } ] } } |
{ "job": { "content": [ { "reader": { "name": "hdfsreader", "parameter": { "column": [ { "index": 0, "type": "string" }, { "index": 1, "type": "string" }, { "index": 2, "type": "string" } ], "defaultFS": "hdfs://192.168.1.181:9000/", "encoding": "UTF-8", "fieldDelimiter": "\t", "fileType": "text", "path": "/user/hive/warehouse/zmdw.db/dim_area/" } }, "writer": { "name": "mysqlwriter", "parameter": { "column": ["id","name","parent_id"], "connection": [ { "jdbcUrl": "jdbc:mysql://192.168.10.107:3306/zmdwdb", "table": ["dim_area"] } ], "password": "root", "preSql": [], "session": [], "username": "root", "writeMode": "insert" } } } ], "setting": { "speed": { "channel": "1" } } } }
|
- Datax安装
下载好datax.tar.gz上传到服务器解压即可使用
- Mysql到Hive数据迁移测试
将dim_area2.json 脚本放置在dataX/bin目录下
执行命令:[root@BIGDATA bin]# python datax.py dim_area2.json
正常迁移结果如下
DataX (DATAX-OPENSOURCE-3.0), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
2019-08-24 02:46:17.454 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl 2019-08-24 02:46:17.465 [main] INFO Engine - the machine info =>
osInfo: Oracle Corporation 1.8 25.131-b11 jvmInfo: Linux amd64 2.6.32-642.11.1.el6.x86_64 cpu num: 1
totalPhysicalMemory: -0.00G freePhysicalMemory: -0.00G maxFileDescriptorCount: -1 currentOpenFileDescriptorCount: -1
GC Names [Copy, MarkSweepCompact]
MEMORY_NAME | allocation_size | init_size Eden Space | 273.06MB | 273.06MB Code Cache | 240.00MB | 2.44MB Survivor Space | 34.13MB | 34.13MB Compressed Class Space | 1,024.00MB | 0.00MB Metaspace | -0.00MB | 0.00MB Tenured Gen | 682.69MB | 682.69MB
2019-08-24 02:46:17.487 [main] INFO Engine - { "content":[ { "reader":{ "name":"mysqlreader", "parameter":{ "connection":[ { "jdbcUrl":[ "jdbc:mysql://192.168.10.107:3306/zmdwdb" ], "querySql":[ "SELECT id, name, parent_id FROM dim_area;" ] } ], "password":"****", "username":"root" } }, "writer":{ "name":"hdfswriter", "parameter":{ "column":[ { "name":"id", "type":"bigint" }, { "name":"name", "type":"string" }, { "name":"parent_id", "type":"bigint" } ], "compress":"", "defaultFS":"hdfs://192.168.1.181:9000/", "fieldDelimiter":"\t", "fileName":"tmp", "fileType":"text", "path":"/user/hive/warehouse/zmdw.db/dim_area/", "writeMode":"append" } } } ], "setting":{ "speed":{ "channel":10 } } }
2019-08-24 02:46:17.512 [main] WARN Engine - prioriy set to 0, because NumberFormatException, the value is: null 2019-08-24 02:46:17.514 [main] INFO PerfTrace - PerfTrace traceId=job_-1, isEnable=false, priority=0 2019-08-24 02:46:17.514 [main] INFO JobContainer - DataX jobContainer starts job. 2019-08-24 02:46:17.522 [main] INFO JobContainer - Set jobId = 0 2019-08-24 02:46:18.014 [job-0] INFO OriginalConfPretreatmentUtil - Available jdbcUrl:jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true. Aug 24, 2019 2:46:18 AM org.apache.hadoop.util.NativeCodeLoader <clinit> WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 2019-08-24 02:46:19.441 [job-0] INFO JobContainer - jobContainer starts to do prepare ... 2019-08-24 02:46:19.446 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] do prepare work . 2019-08-24 02:46:19.447 [job-0] INFO JobContainer - DataX Writer.Job [hdfswriter] do prepare work . 2019-08-24 02:46:19.618 [job-0] INFO HdfsWriter$Job - 由于您配置了writeMode append, 写入前不做清理工作, [/user/hive/warehouse/zmdw.db/dim_area/] 目录下写入相应文件名前缀 [tmp] 的文件 2019-08-24 02:46:19.618 [job-0] INFO JobContainer - jobContainer starts to do split ... 2019-08-24 02:46:19.618 [job-0] INFO JobContainer - Job set Channel-Number to 10 channels. 2019-08-24 02:46:19.622 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] splits to [1] tasks. 2019-08-24 02:46:19.623 [job-0] INFO HdfsWriter$Job - begin do split... 2019-08-24 02:46:19.628 [job-0] INFO HdfsWriter$Job - splited write file name:[hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] 2019-08-24 02:46:19.629 [job-0] INFO HdfsWriter$Job - end do split. 2019-08-24 02:46:19.629 [job-0] INFO JobContainer - DataX Writer.Job [hdfswriter] splits to [1] tasks. 2019-08-24 02:46:19.725 [job-0] INFO JobContainer - jobContainer starts to do schedule ... 2019-08-24 02:46:19.741 [job-0] INFO JobContainer - Scheduler starts [1] taskGroups. 2019-08-24 02:46:19.748 [job-0] INFO JobContainer - Running by standalone Mode. 2019-08-24 02:46:19.765 [taskGroup-0] INFO TaskGroupContainer - taskGroupId=[0] start [1] channels for [1] tasks. 2019-08-24 02:46:19.778 [taskGroup-0] INFO Channel - Channel set byte_speed_limit to -1, No bps activated. 2019-08-24 02:46:19.778 [taskGroup-0] INFO Channel - Channel set record_speed_limit to -1, No tps activated. 2019-08-24 02:46:19.820 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started 2019-08-24 02:46:19.826 [0-0-0-reader] INFO CommonRdbmsReader$Task - Begin to read record by Sql: [SELECT id, name, parent_id FROM dim_area; ] jdbcUrl:[jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true]. 2019-08-24 02:46:19.915 [0-0-0-writer] INFO HdfsWriter$Task - begin do write... 2019-08-24 02:46:19.916 [0-0-0-writer] INFO HdfsWriter$Task - write to file : [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] 2019-08-24 02:46:20.179 [0-0-0-reader] INFO CommonRdbmsReader$Task - Finished read record by Sql: [SELECT id, name, parent_id FROM dim_area; ] jdbcUrl:[jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true]. 2019-08-24 02:46:20.431 [0-0-0-writer] INFO HdfsWriter$Task - end do write 2019-08-24 02:46:20.526 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] is successed, used[711]ms 2019-08-24 02:46:20.526 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] completed it's tasks. 2019-08-24 02:46:29.871 [job-0] INFO StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.229s | All Task WaitReaderTime 0.000s | Percentage 100.00% 2019-08-24 02:46:29.873 [job-0] INFO AbstractScheduler - Scheduler accomplished all tasks. 2019-08-24 02:46:29.873 [job-0] INFO JobContainer - DataX Writer.Job [hdfswriter] do post work. 2019-08-24 02:46:29.875 [job-0] INFO HdfsWriter$Job - start rename file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] to file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]. 2019-08-24 02:46:29.887 [job-0] INFO HdfsWriter$Job - finish rename file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] to file [hdfs://192.168.1.181:9000//user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]. 2019-08-24 02:46:29.888 [job-0] INFO HdfsWriter$Job - start delete tmp dir [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9] . 2019-08-24 02:46:29.895 [job-0] INFO HdfsWriter$Job - finish delete tmp dir [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area__0ee4362b_a3b7_43ae_8256_ef9e4449e1b9] . 2019-08-24 02:46:29.896 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] do post work. 2019-08-24 02:46:29.896 [job-0] INFO JobContainer - DataX jobId [0] completed successfully. 2019-08-24 02:46:29.897 [job-0] INFO HookInvoker - No hook invoked, because base dir not exists or is a file: /app/datax/hook 2019-08-24 02:46:29.904 [job-0] INFO JobContainer - [total cpu info] => averageCpu | maxDeltaCpu | minDeltaCpu -1.00% | -1.00% | -1.00%
[total gc info] => NAME | totalGCCount | maxDeltaGCCount | minDeltaGCCount | totalGCTime | maxDeltaGCTime | minDeltaGCTime Copy | 0 | 0 | 0 | 0.000s | 0.000s | 0.000s MarkSweepCompact | 1 | 1 | 1 | 0.035s | 0.035s | 0.035s
2019-08-24 02:46:29.904 [job-0] INFO JobContainer - PerfTrace not enable! 2019-08-24 02:46:29.904 [job-0] INFO StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.229s | All Task WaitReaderTime 0.000s | Percentage 100.00% 2019-08-24 02:46:29.909 [job-0] INFO JobContainer - 任务启动时刻 : 2019-08-24 02:46:17 任务结束时刻 : 2019-08-24 02:46:29 任务总计耗时 : 12s 任务平均流量 : 4.86KB/s 记录写入速度 : 325rec/s 读出记录总数 : 3258 读写失败总数 : 0 |
- Hive到Mysql数据迁移测试
Hive回迁到Mysql注意事项:
Hive数据类型如bigint可以转换为string,因为hdfs文件都是文本类型,容易导致脏数据,如果提示脏数据,可以改成兼容性最好的string类型,而Mysql则设置为varchar类型
[root@BIGDATA bin]# python datax.py dim_area3.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
2019-08-24 03:25:03.592 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl 2019-08-24 03:25:03.606 [main] INFO Engine - the machine info =>
osInfo: Oracle Corporation 1.8 25.131-b11 jvmInfo: Linux amd64 2.6.32-642.11.1.el6.x86_64 cpu num: 1
totalPhysicalMemory: -0.00G freePhysicalMemory: -0.00G maxFileDescriptorCount: -1 currentOpenFileDescriptorCount: -1
GC Names [Copy, MarkSweepCompact]
MEMORY_NAME | allocation_size | init_size Eden Space | 273.06MB | 273.06MB Code Cache | 240.00MB | 2.44MB Survivor Space | 34.13MB | 34.13MB Compressed Class Space | 1,024.00MB | 0.00MB Metaspace | -0.00MB | 0.00MB Tenured Gen | 682.69MB | 682.69MB
2019-08-24 03:25:03.630 [main] INFO Engine - { "content":[ { "reader":{ "name":"hdfsreader", "parameter":{ "column":[ { "index":0, "type":"string" }, { "index":1, "type":"string" }, { "index":2, "type":"string" } ], "defaultFS":"hdfs://192.168.1.181:9000/", "encoding":"UTF-8", "fieldDelimiter":"\t", "fileType":"text", "path":"/user/hive/warehouse/zmdw.db/dim_area/" } }, "writer":{ "name":"mysqlwriter", "parameter":{ "column":[ "id", "name", "parent_id" ], "connection":[ { "jdbcUrl":"jdbc:mysql://192.168.10.107:3306/zmdwdb", "table":[ "dim_area" ] } ], "password":"****", "preSql":[], "session":[], "username":"root", "writeMode":"insert" } } } ], "setting":{ "speed":{ "channel":"1" } } }
2019-08-24 03:25:03.651 [main] WARN Engine - prioriy set to 0, because NumberFormatException, the value is: null 2019-08-24 03:25:03.655 [main] INFO PerfTrace - PerfTrace traceId=job_-1, isEnable=false, priority=0 2019-08-24 03:25:03.655 [main] INFO JobContainer - DataX jobContainer starts job. 2019-08-24 03:25:03.662 [main] INFO JobContainer - Set jobId = 0 2019-08-24 03:25:03.694 [job-0] INFO HdfsReader$Job - init() begin... 2019-08-24 03:25:04.109 [job-0] INFO HdfsReader$Job - hadoopConfig details:{"finalParameters":[]} 2019-08-24 03:25:04.109 [job-0] INFO HdfsReader$Job - init() ok and end... 2019-08-24 03:25:04.500 [job-0] INFO OriginalConfPretreatmentUtil - table:[dim_area] all columns:[ id,name,parent_id ]. 2019-08-24 03:25:04.519 [job-0] INFO OriginalConfPretreatmentUtil - Write data [ insert INTO %s (id,name,parent_id) VALUES(?,?,?) ], which jdbcUrl like:[jdbc:mysql://192.168.10.107:3306/zmdwdb?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true] 2019-08-24 03:25:04.520 [job-0] INFO JobContainer - jobContainer starts to do prepare ... 2019-08-24 03:25:04.521 [job-0] INFO JobContainer - DataX Reader.Job [hdfsreader] do prepare work . 2019-08-24 03:25:04.522 [job-0] INFO HdfsReader$Job - prepare(), start to getAllFiles... 2019-08-24 03:25:04.522 [job-0] INFO HdfsReader$Job - get HDFS all files in path = [/user/hive/warehouse/zmdw.db/dim_area/] Aug 24, 2019 3:25:04 AM org.apache.hadoop.util.NativeCodeLoader <clinit> WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 2019-08-24 03:25:05.861 [job-0] INFO HdfsReader$Job - [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c]是[text]类型的文件, 将该文件加入source files列表 2019-08-24 03:25:05.868 [job-0] INFO HdfsReader$Job - 您即将读取的文件数为: [1], 列表为: [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] 2019-08-24 03:25:05.869 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do prepare work . 2019-08-24 03:25:05.871 [job-0] INFO JobContainer - jobContainer starts to do split ... 2019-08-24 03:25:05.871 [job-0] INFO JobContainer - Job set Channel-Number to 1 channels. 2019-08-24 03:25:05.872 [job-0] INFO HdfsReader$Job - split() begin... 2019-08-24 03:25:05.877 [job-0] INFO JobContainer - DataX Reader.Job [hdfsreader] splits to [1] tasks. 2019-08-24 03:25:05.877 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] splits to [1] tasks. 2019-08-24 03:25:05.899 [job-0] INFO JobContainer - jobContainer starts to do schedule ... 2019-08-24 03:25:05.908 [job-0] INFO JobContainer - Scheduler starts [1] taskGroups. 2019-08-24 03:25:05.914 [job-0] INFO JobContainer - Running by standalone Mode. 2019-08-24 03:25:05.936 [taskGroup-0] INFO TaskGroupContainer - taskGroupId=[0] start [1] channels for [1] tasks. 2019-08-24 03:25:05.948 [taskGroup-0] INFO Channel - Channel set byte_speed_limit to -1, No bps activated. 2019-08-24 03:25:05.948 [taskGroup-0] INFO Channel - Channel set record_speed_limit to -1, No tps activated. 2019-08-24 03:25:05.980 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started 2019-08-24 03:25:06.051 [0-0-0-reader] INFO HdfsReader$Job - hadoopConfig details:{"finalParameters":["mapreduce.job.end-notification.max.retry.interval","mapreduce.job.end-notification.max.attempts"]} 2019-08-24 03:25:06.053 [0-0-0-reader] INFO Reader$Task - read start 2019-08-24 03:25:06.059 [0-0-0-reader] INFO Reader$Task - reading file : [hdfs://192.168.1.181:9000/user/hive/warehouse/zmdw.db/dim_area/tmp__868c08e3_cc1a_46d4_996a_1591d25a6f0c] 2019-08-24 03:25:06.103 [0-0-0-reader] INFO UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":"\t","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":false,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null] 2019-08-24 03:25:06.285 [0-0-0-reader] INFO Reader$Task - end read source files... 2019-08-24 03:25:06.382 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] is successed, used[411]ms 2019-08-24 03:25:06.382 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] completed it's tasks. 2019-08-24 03:25:15.961 [job-0] INFO StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.129s | All Task WaitReaderTime 0.137s | Percentage 100.00% 2019-08-24 03:25:15.961 [job-0] INFO AbstractScheduler - Scheduler accomplished all tasks. 2019-08-24 03:25:15.961 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do post work. 2019-08-24 03:25:15.962 [job-0] INFO JobContainer - DataX Reader.Job [hdfsreader] do post work. 2019-08-24 03:25:15.962 [job-0] INFO JobContainer - DataX jobId [0] completed successfully. 2019-08-24 03:25:15.964 [job-0] INFO HookInvoker - No hook invoked, because base dir not exists or is a file: /app/datax/hook 2019-08-24 03:25:15.967 [job-0] INFO JobContainer - [total cpu info] => averageCpu | maxDeltaCpu | minDeltaCpu -1.00% | -1.00% | -1.00%
[total gc info] => NAME | totalGCCount | maxDeltaGCCount | minDeltaGCCount | totalGCTime | maxDeltaGCTime | minDeltaGCTime Copy | 0 | 0 | 0 | 0.000s | 0.000s | 0.000s MarkSweepCompact | 1 | 1 | 1 | 0.036s | 0.036s | 0.036s
2019-08-24 03:25:15.967 [job-0] INFO JobContainer - PerfTrace not enable! 2019-08-24 03:25:15.968 [job-0] INFO StandAloneJobContainerCommunicator - Total 3258 records, 49722 bytes | Speed 4.86KB/s, 325 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.129s | All Task WaitReaderTime 0.137s | Percentage 100.00% 2019-08-24 03:25:15.973 [job-0] INFO JobContainer - 任务启动时刻 : 2019-08-24 03:25:03 任务结束时刻 : 2019-08-24 03:25:15 任务总计耗时 : 12s 任务平均流量 : 4.86KB/s 记录写入速度 : 325rec/s 读出记录总数 : 3258 读写失败总数 : 0 |