Skip to content

Commit ec6819f

Browse files
committed
for code scan merge from gitlib's master -> github's master
2 parents d261881 + f056fad commit ec6819f

File tree

48 files changed

+719
-547
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+719
-547
lines changed

.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,11 @@ hsf/
1313
*.pyc
1414
*.DS_Store
1515
.git
16+
/simulator/
17+
zsearchwriter/
18+
tairwriter/
19+
/tddlwriter/
20+
/mysqlwriter/
21+
/oceanbasereader/
22+
/oceanbasewriter/
23+
/otswriter-internal/

core/src/main/conf/.secret.properties

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
#ds basicAuth config
22
auth.user=
33
auth.pass=
4+
current.keyVersion=
5+
current.publicKey=
6+
current.privateKey=
7+
current.service.username=
8+
current.service.password=
9+

core/src/main/java/com/alibaba/datax/core/job/JobContainer.java

+1
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@ private void logStatistics() {
603603

604604
super.getContainerCommunicator().report(reportCommunication);
605605

606+
606607
LOG.info(String.format(
607608
"\n" + "%-26s: %-18s\n" + "%-26s: %-18s\n" + "%-26s: %19s\n"
608609
+ "%-26s: %19s\n" + "%-26s: %19s\n" + "%-26s: %19s\n"

core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java

-4
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,11 @@
2121
import com.alibaba.datax.core.transport.exchanger.BufferedRecordExchanger;
2222
import com.alibaba.datax.core.transport.exchanger.BufferedRecordTransformerExchanger;
2323
import com.alibaba.datax.core.transport.transformer.TransformerExecution;
24-
import com.alibaba.datax.core.transport.transformer.TransformerInfo;
2524
import com.alibaba.datax.core.util.ClassUtil;
2625
import com.alibaba.datax.core.util.FrameworkErrorCode;
2726
import com.alibaba.datax.core.util.TransformerUtil;
2827
import com.alibaba.datax.core.util.container.CoreConstant;
2928
import com.alibaba.datax.core.util.container.LoadUtil;
30-
import com.alibaba.datax.dataxservice.face.domain.enums.ExecuteMode;
3129
import com.alibaba.datax.dataxservice.face.domain.enums.State;
3230
import com.alibaba.fastjson.JSON;
3331
import org.apache.commons.lang3.Validate;
@@ -108,7 +106,6 @@ public void start() {
108106
/**
109107
* 2分钟汇报一次性能统计
110108
*/
111-
long perfReportIntervalMultiple = 12;
112109

113110
// 获取channel数目
114111
int channelNumber = this.configuration.getInt(
@@ -144,7 +141,6 @@ public void start() {
144141
Map<Integer, Long> taskStartTimeMap = new HashMap<Integer, Long>(); //任务开始时间
145142

146143
long lastReportTimeStamp = 0;
147-
long lastPerfReportTimeStamp = 0;
148144
Communication lastTaskGroupContainerCommunication = new Communication();
149145

150146
while (true) {

core/src/main/java/com/alibaba/datax/core/transport/channel/Channel.java

+1
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,5 @@ private void statPull(long recordSize, long byteSize) {
244244
currentCommunication.increaseCounter(
245245
CommunicationTool.WRITE_RECEIVED_BYTES, byteSize);
246246
}
247+
247248
}

ftpreader/src/main/java/com/alibaba/datax/plugin/reader/ftpreader/FtpReader.java

-6
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,6 @@
1414
import com.alibaba.datax.common.util.Configuration;
1515
import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil;
1616

17-
/**
18-
*
19-
* @ClassName: FtpFileReader
20-
* @date 2015年7月6日 上午9:24:57
21-
*
22-
*/
2317
public class FtpReader extends Reader {
2418
public static class Job extends Reader.Job {
2519
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
package com.alibaba.datax.plugin.reader.ftpreader;
2-
3-
public class Key {
4-
public static final String PROTOCOL = "protocol";
5-
public static final String HOST = "host";
6-
public static final String USERNAME = "username";
7-
public static final String PASSWORD = "password";
8-
public static final String PORT = "port";
9-
public static final String TIMEOUT = "timeout";
10-
public static final String CONNECTPATTERN = "connectPattern";
11-
public static final String PATH = "path";
12-
public static final String MAXTRAVERSALLEVEL = "maxTraversalLevel";
13-
}
1+
package com.alibaba.datax.plugin.reader.ftpreader;
2+
3+
public class Key {
4+
public static final String PROTOCOL = "protocol";
5+
public static final String HOST = "host";
6+
public static final String USERNAME = "username";
7+
public static final String PASSWORD = "password";
8+
public static final String PORT = "port";
9+
public static final String TIMEOUT = "timeout";
10+
public static final String CONNECTPATTERN = "connectPattern";
11+
public static final String PATH = "path";
12+
public static final String MAXTRAVERSALLEVEL = "maxTraversalLevel";
13+
}

ftpreader/src/main/resources/plugin.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
"class": "com.alibaba.datax.plugin.reader.ftpreader.FtpReader",
44
"description": "useScene: test. mechanism: use datax framework to transport data from txt file. warn: The more you know about the data, the less problems you encounter.",
55
"developer": "alibaba"
6-
}
6+
}
7+

hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java

+47-32
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
import com.alibaba.datax.common.plugin.RecordSender;
1414
import com.alibaba.datax.common.plugin.TaskPluginCollector;
1515
import com.alibaba.datax.common.util.Configuration;
16+
import com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry;
1617
import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode;
18+
import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil;
19+
import com.alibaba.fastjson.JSON;
20+
import com.alibaba.fastjson.JSONObject;
21+
1722
import org.apache.commons.lang3.StringUtils;
1823
import org.apache.hadoop.fs.*;
1924
import org.apache.hadoop.hive.ql.io.orc.*;
@@ -28,9 +33,6 @@
2833
import org.slf4j.Logger;
2934
import org.slf4j.LoggerFactory;
3035

31-
/**
32-
* Created by mingya.wmy on 2015/8/12.
33-
*/
3436
public class DFSUtil {
3537
private static final Logger LOG = LoggerFactory.getLogger(HdfsReader.Job.class);
3638

@@ -39,10 +41,21 @@ public class DFSUtil {
3941
private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
4042

4143
private String specifiedFileType = null;
42-
43-
public DFSUtil(String defaultFS){
44+
45+
public DFSUtil(Configuration taskConfig){
4446
hadoopConf = new org.apache.hadoop.conf.Configuration();
45-
hadoopConf.set("fs.defaultFS", defaultFS);
47+
//io.file.buffer.size 性能参数
48+
//http://blog.csdn.net/yangjl38/article/details/7583374
49+
Configuration hadoopSiteParams = taskConfig.getConfiguration(Key.HADOOP_CONFIG);
50+
JSONObject hadoopSiteParamsAsJsonObject = JSON.parseObject(taskConfig.getString(Key.HADOOP_CONFIG));
51+
if (null != hadoopSiteParams) {
52+
Set<String> paramKeys = hadoopSiteParams.getKeys();
53+
for (String each : paramKeys) {
54+
hadoopConf.set(each, hadoopSiteParamsAsJsonObject.getString(each));
55+
}
56+
}
57+
hadoopConf.set("fs.defaultFS", taskConfig.getString(Key.DEFAULT_FS));
58+
LOG.info(String.format("hadoopConfig details:%s", JSON.toJSONString(hadoopConf)));
4659
}
4760

4861

@@ -159,7 +172,7 @@ public InputStream getInputStream(String filepath){
159172
return null;
160173
}
161174

162-
public BufferedReader getBufferedReader(String filepath, HdfsFileType fileType, String encoding){
175+
public BufferedReader getBufferedReader(String filepath, HdfsFileType fileType, String encoding, int bufferSize){
163176
try {
164177
FileSystem fs = FileSystem.get(hadoopConf);
165178
Path path = new Path(filepath);
@@ -181,12 +194,12 @@ public BufferedReader getBufferedReader(String filepath, HdfsFileType fileType,
181194
//each time the retry interval for 20 seconds
182195
in = fs.open(path);
183196
cin = codec.createInputStream(in);
184-
br = new BufferedReader(new InputStreamReader(cin, encoding));
197+
br = new BufferedReader(new InputStreamReader(cin, encoding), bufferSize);
185198
} else {
186199
//If the network disconnected, this method will retry 45 times
187200
// each time the retry interval for 20 seconds
188201
in = fs.open(path);
189-
br = new BufferedReader(new InputStreamReader(in, encoding));
202+
br = new BufferedReader(new InputStreamReader(in, encoding), bufferSize);
190203
}
191204
return br;
192205
}catch (Exception e){
@@ -198,35 +211,37 @@ public BufferedReader getBufferedReader(String filepath, HdfsFileType fileType,
198211
public void orcFileStartRead(String sourceOrcFilePath, Configuration readerSliceConfig,
199212
RecordSender recordSender, TaskPluginCollector taskPluginCollector){
200213

201-
List<Configuration> columnConfigs = readerSliceConfig.getListConfiguration(Key.COLUMN);
202-
String nullFormat = readerSliceConfig.getString(Key.NULL_FORMAT);
203-
String allColumns = "";
204-
String allColumnTypes = "";
214+
//List<Configuration> columnConfigs = readerSliceConfig.getListConfiguration(Key.COLUMN);
215+
List<ColumnEntry> column = UnstructuredStorageReaderUtil
216+
.getListColumnEntry(readerSliceConfig, com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN);
217+
String nullFormat = readerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.NULL_FORMAT);
218+
StringBuilder allColumns = new StringBuilder();
219+
StringBuilder allColumnTypes = new StringBuilder();
205220
boolean isReadAllColumns = false;
206221
int columnIndexMax = -1;
207222
// 判断是否读取所有列
208-
if (null == columnConfigs || columnConfigs.size() == 0) {
223+
if (null == column || column.size() == 0) {
209224
int allColumnsCount = getAllColumnsCount(sourceOrcFilePath);
210225
columnIndexMax = allColumnsCount-1;
211226
isReadAllColumns = true;
212227
}
213228
else {
214-
columnIndexMax = getMaxIndex(columnConfigs);
229+
columnIndexMax = getMaxIndex(column);
215230
}
216231
for(int i=0; i<=columnIndexMax; i++){
217-
allColumns += "col";
218-
allColumnTypes += "string";
232+
allColumns.append("col");
233+
allColumnTypes.append("string");
219234
if(i!=columnIndexMax){
220-
allColumns += ",";
221-
allColumnTypes += ":";
235+
allColumns.append(",");
236+
allColumnTypes.append(":");
222237
}
223238
}
224239
if(columnIndexMax>=0) {
225240
JobConf conf = new JobConf(hadoopConf);
226241
Path orcFilePath = new Path(sourceOrcFilePath);
227242
Properties p = new Properties();
228-
p.setProperty("columns", allColumns);
229-
p.setProperty("columns.types", allColumnTypes);
243+
p.setProperty("columns", allColumns.toString());
244+
p.setProperty("columns.types", allColumnTypes.toString());
230245
try {
231246
OrcSerde serde = new OrcSerde();
232247
serde.initialize(conf, p);
@@ -236,6 +251,7 @@ public void orcFileStartRead(String sourceOrcFilePath, Configuration readerSlice
236251

237252
//If the network disconnected, will retry 45 times, each time the retry interval for 20 seconds
238253
//Each file as a split
254+
//TODO multy threads
239255
InputSplit[] splits = in.getSplits(conf, 1);
240256

241257
RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
@@ -252,7 +268,7 @@ public void orcFileStartRead(String sourceOrcFilePath, Configuration readerSlice
252268
Object field = inspector.getStructFieldData(value, fields.get(i));
253269
recordFields.add(field);
254270
}
255-
transportOneRecord(columnConfigs, recordFields, recordSender,
271+
transportOneRecord(column, recordFields, recordSender,
256272
taskPluginCollector, isReadAllColumns,nullFormat);
257273
}
258274
reader.close();
@@ -269,7 +285,7 @@ public void orcFileStartRead(String sourceOrcFilePath, Configuration readerSlice
269285
}
270286
}
271287

272-
private Record transportOneRecord(List<Configuration> columnConfigs, List<Object> recordFields
288+
private Record transportOneRecord(List<ColumnEntry> columnConfigs, List<Object> recordFields
273289
, RecordSender recordSender, TaskPluginCollector taskPluginCollector, boolean isReadAllColumns, String nullFormat){
274290
Record record = recordSender.createRecord();
275291
Column columnGenerated = null;
@@ -286,11 +302,10 @@ private Record transportOneRecord(List<Configuration> columnConfigs, List<Object
286302
}
287303
}
288304
else {
289-
for (Configuration columnConfig : columnConfigs) {
290-
String columnType = columnConfig
291-
.getNecessaryValue(Key.TYPE, HdfsReaderErrorCode.CONFIG_INVALID_EXCEPTION);
292-
Integer columnIndex = columnConfig.getInt(Key.INDEX);
293-
String columnConst = columnConfig.getString(Key.VALUE);
305+
for (ColumnEntry columnConfig : columnConfigs) {
306+
String columnType = columnConfig.getType();
307+
Integer columnIndex = columnConfig.getIndex();
308+
String columnConst = columnConfig.getValue();
294309

295310
String columnValue = null;
296311

@@ -343,7 +358,7 @@ private Record transportOneRecord(List<Configuration> columnConfigs, List<Object
343358
Date date = null;
344359
columnGenerated = new DateColumn(date);
345360
} else {
346-
String formatString = columnConfig.getString(Key.FORMAT);
361+
String formatString = columnConfig.getFormat();
347362
if (StringUtils.isNotBlank(formatString)) {
348363
// 用户自己配置的格式转换
349364
SimpleDateFormat format = new SimpleDateFormat(
@@ -410,10 +425,10 @@ private int getAllColumnsCount(String filePath){
410425
}
411426
}
412427

413-
private int getMaxIndex(List<Configuration> columnConfigs){
428+
private int getMaxIndex(List<ColumnEntry> columnConfigs){
414429
int maxIndex = -1;
415-
for (Configuration columnConfig : columnConfigs) {
416-
Integer columnIndex = columnConfig.getInt(Key.INDEX);
430+
for (ColumnEntry columnConfig : columnConfigs) {
431+
Integer columnIndex = columnConfig.getIndex();
417432
if (columnIndex != null && columnIndex < 0) {
418433
String message = String.format("您column中配置的index不能小于0,请修改为正确的index");
419434
LOG.error(message);

0 commit comments

Comments
 (0)