Skip to content

Commit

Permalink
Merge pull request #37 from epam/issue_34/file_label
Browse files Browse the repository at this point in the history
added ability to get file label
  • Loading branch information
Yana-Guseva authored Mar 13, 2019
2 parents 8fb1b45 + a9e6e2f commit 4b114cd
Show file tree
Hide file tree
Showing 92 changed files with 1,820 additions and 30 deletions.
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.12.4</version>
<configuration>
<argLine>-Dfile.encoding=${project.build.sourceEncoding}</argLine>
</configuration>
</plugin>
</plugins>
<resources>
<resource>
Expand Down
11 changes: 10 additions & 1 deletion src/main/java/com/epam/parso/DataWriterUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,16 @@
import java.text.DecimalFormatSymbols;
import java.text.Format;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TimeZone;

/**
* A helper class to allow re-use formatted values from sas7bdat file.
Expand Down
31 changes: 27 additions & 4 deletions src/main/java/com/epam/parso/SasFileProperties.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
/**
* *************************************************************************
* Copyright (C) 2015 EPAM
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* <p>
* *************************************************************************
*/

Expand Down Expand Up @@ -61,6 +61,11 @@ public class SasFileProperties {
*/
private String fileType;

/**
* The label of the sas7bdat file.
*/
private String fileLabel;

/**
* The date of the sas7bdat file creation.
*/
Expand Down Expand Up @@ -169,6 +174,24 @@ public void setFileType(String fileType) {
this.fileType = fileType;
}

/**
* The function to get fileLabel.
*
* @return the label of the sas7bdat file.
*/
public String getFileLabel() {
return fileLabel;
}

/**
* The method to specify fileLabel.
*
* @param fileLabel the label of the sas7bdat file.
*/
public void setFileLabel(String fileLabel) {
this.fileLabel = fileLabel;
}

/**
* The function to get dateCreated.
*
Expand Down
9 changes: 5 additions & 4 deletions src/main/java/com/epam/parso/impl/CSVMetadataWriterImpl.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
/**
* *************************************************************************
* Copyright (C) 2015 EPAM
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* <p>
* *************************************************************************
*/

Expand Down Expand Up @@ -171,6 +171,7 @@ public void writeSasFileProperties(SasFileProperties sasFileProperties) throws I
constructPropertiesString("Encoding: ", sasFileProperties.getEncoding());
constructPropertiesString("Name: ", sasFileProperties.getName());
constructPropertiesString("File type: ", sasFileProperties.getFileType());
constructPropertiesString("File label: ", sasFileProperties.getFileLabel());
constructPropertiesString("Date created: ", sasFileProperties.getDateCreated());
constructPropertiesString("Date modified: ", sasFileProperties.getDateModified());
constructPropertiesString("SAS release: ", sasFileProperties.getSasRelease());
Expand Down
52 changes: 46 additions & 6 deletions src/main/java/com/epam/parso/impl/SasFileConstants.java
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
/**
* *************************************************************************
* Copyright (C) 2015 EPAM
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* <p>
* *************************************************************************
*/

package com.epam.parso.impl;

import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
* This is an class to store constants for parsing the sas7bdat file (byte offsets, column formats, accuracy) as well as
Expand Down Expand Up @@ -931,6 +931,46 @@ interface SasFileConstants {
*/
int COLUMN_LABEL_LENGTH_LENGTH = 2;

/**
* The sas7bdat file stores the offset (in symbols) of the file label from
* the beginning of the text block of the {@link SasFileParser.ColumnTextSubheader} subheader
* where it belongs:
* - with the length of {@link SasFileConstants#FILE_FORMAT_OFFSET_LENGTH} bytes,
* - at an offset calculated as FILE_FORMAT_OFFSET_OFFSET bytes + 82 * the size of value types
* (int or long depending on {@link SasFileConstants#ALIGN_2_VALUE}) from the beginning of
* the {@link SasFileParser.RowSizeSubheader} subheader.
*/
long FILE_FORMAT_OFFSET_OFFSET = 24L;

/**
* The sas7bdat file stores the offset (in symbols) of the file label from
* the beginning of the text block of the {@link SasFileParser.ColumnTextSubheader}
* subheader where it belongs:
* - with the length of FILE_FORMAT_OFFSET_LENGTH bytes,
* - at an offset calculated as {@link SasFileConstants#FILE_FORMAT_OFFSET_OFFSET} bytes +
* + 82 * the size of value types (int or long depending on {@link SasFileConstants#ALIGN_2_VALUE}) from
* the beginning of the {@link SasFileParser.RowSizeSubheader} subheader.
*/
int FILE_FORMAT_OFFSET_LENGTH = 2;

/**
* The sas7bdat file stores the length of the file label (in symbols):
* - with the length of {@link SasFileConstants#FILE_FORMAT_LENGTH_LENGTH} bytes.
* - at an offset calculated as FILE_FORMAT_LENGTH_OFFSET bytes +
* 82 * the size of value types (int or long depending on {@link SasFileConstants#ALIGN_2_VALUE})
* from the beginning of the {@link SasFileParser.RowSizeSubheader} subheader.
*/
long FILE_FORMAT_LENGTH_OFFSET = 26L;

/**
* The sas7bdat file stores the length of the file label (in symbols):
* - with the length of FILE_FORMAT_LENGTH_LENGTH bytes.
* - at an offset calculated as FILE_FORMAT_LENGTH_OFFSET bytes +
* 82 * the size of value types (int or long depending on {@link SasFileConstants#ALIGN_2_VALUE})
* from the beginning of the {@link SasFileParser.RowSizeSubheader} subheader.
*/
int FILE_FORMAT_LENGTH_LENGTH = 2;

/**
* Accuracy to define whether the numeric result of {@link SasFileParser#convertByteArrayToNumber(byte[])} is
* a long or double value.
Expand Down
36 changes: 32 additions & 4 deletions src/main/java/com/epam/parso/impl/SasFileParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.epam.parso.impl.ParserMessageConstants.*;
import static com.epam.parso.impl.SasFileConstants.*;
Expand Down Expand Up @@ -180,6 +190,17 @@ public final class SasFileParser {
*/
private boolean eof;

/**
* The offset of the file label from the beginning of the {@link SasFileParser.ColumnTextSubheader} subheader.
*/
private int fileLabelOffset;

/**
* The length of file label which is stored in the {@link SasFileParser.ColumnTextSubheader} subheader
* with {@link SasFileParser#fileLabelOffset} offset.
*/
private int fileLabelLength;

/**
* The list of missing column information.
*/
Expand Down Expand Up @@ -1176,8 +1197,11 @@ public void processSubheader(long subheaderOffset, long subheaderLength) throws
int intOrLongLength = sasFileProperties.isU64() ? BYTES_IN_LONG : BYTES_IN_INT;
Long[] offset = {subheaderOffset + ROW_LENGTH_OFFSET_MULTIPLIER * intOrLongLength,
subheaderOffset + ROW_COUNT_OFFSET_MULTIPLIER * intOrLongLength,
subheaderOffset + ROW_COUNT_ON_MIX_PAGE_OFFSET_MULTIPLIER * intOrLongLength};
Integer[] length = {intOrLongLength, intOrLongLength, intOrLongLength};
subheaderOffset + ROW_COUNT_ON_MIX_PAGE_OFFSET_MULTIPLIER * intOrLongLength,
subheaderOffset + FILE_FORMAT_OFFSET_OFFSET + 82 * intOrLongLength,
subheaderOffset + FILE_FORMAT_LENGTH_OFFSET + 82 * intOrLongLength};
Integer[] length = {intOrLongLength, intOrLongLength, intOrLongLength, FILE_FORMAT_OFFSET_LENGTH,
FILE_FORMAT_LENGTH_LENGTH};
List<byte[]> vars = getBytesFromFile(offset, length);

if (sasFileProperties.getRowLength() == 0) {
Expand All @@ -1189,6 +1213,9 @@ public void processSubheader(long subheaderOffset, long subheaderLength) throws
if (sasFileProperties.getMixPageRowCount() == 0) {
sasFileProperties.setMixPageRowCount(bytesToLong(vars.get(2)));
}

fileLabelOffset = bytesToShort(vars.get(3));
fileLabelLength = bytesToShort(vars.get(4));
}
}

Expand Down Expand Up @@ -1268,6 +1295,7 @@ public void processSubheader(long subheaderOffset, long subheaderLength) throws
byte[] columnName = columnsNamesBytes.get(0);
String compessionLiteral = findCompressionLiteral(bytesToString(columnName));
sasFileProperties.setCompressionMethod(compessionLiteral); //might be null
sasFileProperties.setFileLabel(bytesToString(columnName, fileLabelOffset, fileLabelLength));
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions src/test/java/com/epam/parso/CSVDataWriterUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,4 @@ public void testData() {
}
}
}


}
51 changes: 44 additions & 7 deletions src/test/java/com/epam/parso/SasFileReaderUnitTest.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
/**
* *************************************************************************
* Copyright (C) 2015 EPAM
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* <p>
* *************************************************************************
*/

Expand All @@ -27,9 +27,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URL;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Locale;

import static com.epam.parso.TestUtils.getResourceAsStream;
import static org.fest.assertions.Assertions.assertThat;
Expand Down Expand Up @@ -73,6 +84,7 @@ public class SasFileReaderUnitTest {
COLON_SAS_FILE_PROPERTIES.setEncoding(null);
COLON_SAS_FILE_PROPERTIES.setName("colon");
COLON_SAS_FILE_PROPERTIES.setFileType("DATA");
COLON_SAS_FILE_PROPERTIES.setFileLabel("");
COLON_SAS_FILE_PROPERTIES.setDateCreated(new Date(854409600000L));
COLON_SAS_FILE_PROPERTIES.setDateModified(new Date(854409600000L));
COLON_SAS_FILE_PROPERTIES.setSasRelease("7.00.00B");
Expand Down Expand Up @@ -131,13 +143,14 @@ public void testMetadata() {
fileName.replace(".sas7bdat", "").replace("sas7bdat", "csv") + "_meta.csv")));
CSVMetadataWriter csvMetadataWriter = new CSVMetadataWriterImpl(writer);
csvMetadataWriter.writeMetadata(sasFileReader.getColumns());
csvMetadataWriter.writeSasFileProperties(sasFileReader.getSasFileProperties());
} catch (IOException e) {
logger.error(e.getMessage(), e);
} finally {
closeWriter(writer);
closeInputStream(fileInputStream);
}
compareResultWithControl(controlReader, writer, 0, sasFileReader.getColumns());
compareResultWithControl(controlReader, writer);
logger.info("Time passed: {} ms", System.currentTimeMillis() - programStart);
}

Expand Down Expand Up @@ -230,6 +243,7 @@ public void testSasFileProperties() throws IOException {
assertThat(sasFileProperties.getEncoding()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getEncoding());
assertThat(sasFileProperties.getName()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getName());
assertThat(sasFileProperties.getFileType()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getFileType());
assertThat(sasFileProperties.getFileLabel()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getFileLabel());
assertThat(sasFileProperties.getDateCreated()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getDateCreated());
assertThat(sasFileProperties.getDateModified()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getDateModified());
assertThat(sasFileProperties.getSasRelease()).isEqualTo(COLON_SAS_FILE_PROPERTIES.getSasRelease());
Expand Down Expand Up @@ -329,6 +343,29 @@ private void compareResultWithControl(CSVReader controlReader, Writer writer, in
}
}

private void compareResultWithControl(CSVReader controlReader, Writer writer) {
CSVReader resultReader = null;
int lineNumber = 1;
try {
resultReader = new CSVReader(new StringReader(writer.toString()));
String[] controlLine;
String[] resultLine;
while ((resultLine = resultReader.readNext()) != null && (controlLine = controlReader.readNext()) != null) {
assertThat(resultLine.length).isEqualTo(controlLine.length);
for (int i = 0; i < controlLine.length; i++) {
assertThat("Element in line number " + lineNumber + " : " + resultLine[i])
.isEqualTo("Element in line number " + lineNumber + " : " + controlLine[i]);
}
lineNumber++;
}
assertThat(resultReader.readNext()).isNull();
} catch (IOException e) {
logger.error(e.getMessage(), e);
} finally {
closeCSVReader(resultReader);
}
}

public void setFileName(String fileName) {
this.fileName = fileName;
}
Expand Down
Loading

0 comments on commit 4b114cd

Please sign in to comment.