Skip to content

Commit 634c8a1

Browse files
authored
Merge pull request #10 from nRo/develop
Develop
2 parents 9b7be42 + 2754e9e commit 634c8a1

File tree

5 files changed

+193
-3
lines changed

5 files changed

+193
-3
lines changed

src/main/java/de/unknownreality/dataframe/DataFrame.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,23 @@
2222
* Created by algru on 12.06.2017.
2323
*/
2424
public interface DataFrame extends DataContainer<DataFrameHeader, DataRow> {
25+
/**
26+
* Returns the name of this dataframe
27+
* @return name
28+
*/
29+
String getName();
30+
31+
/**
32+
* Sets the name of this dataframe
33+
* @param name dataframe name
34+
*/
35+
void setName(String name);
36+
37+
/**
38+
* Returns the version of this dataframe.
39+
* The version is automatically increased on each function that alters the dataframe (sort,...)
40+
* @return version
41+
*/
2542
int getVersion();
2643

2744
/**
@@ -926,6 +943,25 @@ static DataFrame create() {
926943
return new DefaultDataFrame();
927944
}
928945

946+
/**
947+
* Creates a new {@link DefaultDataFrame} instance with a name
948+
*
949+
* @param name dataframe name
950+
* @return new dataframe
951+
*/
952+
static DataFrame create(String name) {
953+
return new DefaultDataFrame(name);
954+
}
955+
956+
957+
/**
958+
* Creates a new {@link DataFrameBuilder}
959+
* @return dataframe builder
960+
*/
961+
static DataFrameBuilder builder(){
962+
return new DataFrameBuilder();
963+
}
964+
929965
/**
930966
* Loads a data frame from a file.
931967
* The matching data frame meta file must be present.

src/main/java/de/unknownreality/dataframe/DataFrameBuilder.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public class DataFrameBuilder {
4545
private GroupUtil groupUtil = null;
4646
private DataIterator<?> dataIterator;
4747
private FilterPredicate filterPredicate = FilterPredicate.EMPTY_FILTER;
48+
private String name;
4849

4950
protected DataFrameBuilder() {
5051
}
@@ -75,6 +76,16 @@ public static DataFrameBuilder create() {
7576
return new DataFrameBuilder();
7677
}
7778

79+
/**
80+
* Defines the name of the resulting dataframe
81+
*
82+
* @param name data frame name
83+
* @return <tt>self</tt> for method chaining
84+
*/
85+
public DataFrameBuilder withName(String name){
86+
this.name = name;
87+
return this;
88+
}
7889
/**
7990
* Adds a new column to the builder.
8091
*
@@ -237,7 +248,7 @@ public DataFrame build() {
237248
}
238249
return DataFrameConverter.fromDataIterator(dataIterator, columnInformationList, filterPredicate);
239250
}
240-
DefaultDataFrame dataFrame = new DefaultDataFrame();
251+
DefaultDataFrame dataFrame = new DefaultDataFrame(name);
241252
for (String n : columns.keySet()) {
242253
DataFrameColumn col = columns.get(n);
243254
col.setName(n);

src/main/java/de/unknownreality/dataframe/DefaultDataFrame.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,26 @@ public class DefaultDataFrame implements DataFrame {
5959
private JoinUtil joinUtil = new DefaultJoinUtil();
6060
private GroupUtil groupUtil = new TreeGroupUtil();
6161
private AtomicInteger version = new AtomicInteger(0);
62+
private String name;
6263

6364
public DefaultDataFrame() {
6465

6566
}
6667

68+
public DefaultDataFrame(String name){
69+
this.name = name;
70+
}
71+
72+
@Override
73+
public String getName() {
74+
return name;
75+
}
76+
77+
@Override
78+
public void setName(String name) {
79+
this.name = name;
80+
}
81+
6782
@Override
6883
public int getVersion() {
6984
return version.get();

src/main/java/de/unknownreality/dataframe/csv/CSVIterator.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class CSVIterator extends BufferedStreamIterator<CSVRow> implements DataI
5353
Map<String, Integer> selectedColumnsIndex = new HashMap<>();
5454
private List<ColumnInformation> columnInformations = new ArrayList<>();
5555
private CSVRow bufferedRow = null;
56+
private boolean[] skipIndices;
5657
public CSVIterator(BufferedReader reader, CSVSettings csvSettings, ColumnSettings columnSettings) {
5758
super(reader);
5859
this.csvSettings = csvSettings;
@@ -64,7 +65,7 @@ public CSVIterator(BufferedReader reader, CSVSettings csvSettings, ColumnSetting
6465
for (String col : columnSettings.getSelectColumns()) {
6566
selectedColumnsIndex.put(col, j++);
6667
}
67-
loadNext();
68+
//loadNext();
6869
initHeader();
6970

7071
}
@@ -84,7 +85,10 @@ public DataFrameBuilder toDataFrame(){
8485
*/
8586
public void initHeader() {
8687
try {
87-
CSVRow row = next();
88+
CSVRow row = getNext();
89+
skipIndices = new boolean[row.size()];
90+
91+
int skipIndex = 0;
8892
if (csvSettings.isContainsHeader()) {
8993
if (!row.get(0).startsWith(csvSettings.getHeaderPrefix())) {
9094
throw new CSVException("invalid header prefix in first line");
@@ -93,23 +97,29 @@ public void initHeader() {
9397
name = csvSettings.getHeaderPrefix() == null ? name : name.substring(csvSettings.getHeaderPrefix().length());
9498
if (includeColumn(name)) {
9599
header.add(name);
100+
skipIndices[skipIndex++] = false;
96101
}
97102
else{
98103
header.incrementEmptyColumnIndex();
104+
skipIndices[skipIndex++] = true;
99105
}
100106
for (int i = 1; i < row.size(); i++) {
101107
name = row.get(i);
102108
if (!includeColumn(name)) {
109+
skipIndices[skipIndex++] = true;
103110
continue;
104111
}
112+
skipIndices[skipIndex++] = false;
105113
header.add(name);
106114
}
107115
} else {
108116
for (int i = 0; i < row.size(); i++) {
109117
if (!includeColumn(header.getNextEmptyColumnName())) {
110118
header.incrementEmptyColumnIndex();
119+
skipIndices[skipIndex++] = true;
111120
continue;
112121
}
122+
skipIndices[skipIndex++] = false;
113123
header.add();
114124
}
115125
bufferedRow = row;
@@ -128,6 +138,7 @@ public void initHeader() {
128138
} catch (Exception e) {
129139
throw new CSVRuntimeException("error creating csv header", e);
130140
}
141+
loadNext();
131142
}
132143

133144
private boolean includeColumn(String col) {
@@ -170,13 +181,24 @@ protected CSVRow getNext() {
170181
}
171182
}
172183
String[] values = StringUtil.splitQuoted(line, csvSettings.getSeparator());
184+
173185
if (cols == -1) {
174186
cols = values.length;
175187
} else {
176188
if (values.length != cols) {
177189
throw new CSVException(String.format("unequal number of column %d != %d in line %d", values.length, cols, lineNumber));
178190
}
179191
}
192+
if(skipIndices != null && header.size() != values.length){
193+
String[] filteredValues = new String[header.size()];
194+
int j = 0;
195+
for(int i = 0; i < values.length; i++){
196+
if(!skipIndices[i]){
197+
filteredValues[j++] = values[i];
198+
}
199+
}
200+
values = filteredValues;
201+
}
180202
return new CSVRow(header, values, lineNumber);
181203

182204
} catch (Exception e) {

src/test/java/de/unknownreality/dataframe/csv/CSVReaderTest.java

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
package de.unknownreality.dataframe.csv;
2424

25+
import de.unknownreality.dataframe.DataFrame;
26+
import de.unknownreality.dataframe.DataRow;
2527
import org.junit.Assert;
2628
import org.junit.Rule;
2729
import org.junit.Test;
@@ -58,5 +60,109 @@ public void testReader() throws IOException {
5860
Assert.assertEquals("X", row.get("B"));
5961
Assert.assertEquals("3", row.get("C"));
6062
}
63+
DataFrame dataFrame = DataFrame.load(testCSV,reader);
64+
for (DataRow row : dataFrame) {
65+
Assert.assertEquals((Integer)1, row.getInteger(0));
66+
Assert.assertEquals("X", row.get(1));
67+
Assert.assertEquals((Integer)3, row.getInteger(2));
68+
Assert.assertEquals((Integer)1, row.getInteger("A"));
69+
Assert.assertEquals("X", row.get("B"));
70+
Assert.assertEquals((Integer)3, row.getInteger("C"));
71+
}
72+
}
73+
74+
@Test
75+
public void testSelect() throws IOException {
76+
String testCSV = "#A\tB\tC\n1\tX\t3\n1\tX\t3\n";
77+
CSVReader reader = CSVFormat.createReader()
78+
.withHeaderPrefix("#")
79+
.withHeader(true)
80+
.withSeparator('\t')
81+
.selectColumns("A","B")
82+
.build();
83+
84+
CSVIterator csvRows = reader.load(testCSV);
85+
Assert.assertEquals(2,csvRows.getColumnsInformation().size());
86+
Assert.assertEquals("A", csvRows.getColumnsInformation().get(0).getName());
87+
Assert.assertEquals("B", csvRows.getColumnsInformation().get(1).getName());
88+
89+
for (CSVRow row : csvRows) {
90+
Assert.assertEquals("1", row.get(0));
91+
Assert.assertEquals("X", row.get(1));
92+
93+
Assert.assertEquals("1", row.get("A"));
94+
Assert.assertEquals("X", row.get("B"));
95+
}
96+
DataFrame dataFrame = DataFrame.load(testCSV,reader);
97+
for (DataRow row : dataFrame) {
98+
Assert.assertEquals((Integer)1, row.getInteger(0));
99+
Assert.assertEquals("X", row.get(1));
100+
Assert.assertEquals((Integer)1, row.getInteger("A"));
101+
Assert.assertEquals("X", row.get("B"));
102+
}
103+
}
104+
105+
@Test
106+
public void testSkipFirst() throws IOException {
107+
String testCSV = "#A\tB\tC\n1\tX\t3\n1\tX\t3\n";
108+
CSVReader reader = CSVFormat.createReader()
109+
.withHeaderPrefix("#")
110+
.withHeader(true)
111+
.withSeparator('\t')
112+
.selectColumns("B","C")
113+
.build();
114+
115+
CSVIterator csvRows = reader.load(testCSV);
116+
Assert.assertEquals(2,csvRows.getColumnsInformation().size());
117+
Assert.assertEquals("B", csvRows.getColumnsInformation().get(0).getName());
118+
Assert.assertEquals("C", csvRows.getColumnsInformation().get(1).getName());
119+
120+
for (CSVRow row : csvRows) {
121+
Assert.assertEquals("X", row.get(0));
122+
Assert.assertEquals("3", row.get(1));
123+
Assert.assertEquals("X", row.get("B"));
124+
Assert.assertEquals("3", row.get("C"));
125+
126+
}
127+
128+
DataFrame dataFrame = DataFrame.load(testCSV,reader);
129+
for (DataRow row : dataFrame) {
130+
Assert.assertEquals("X", row.get(0));
131+
Assert.assertEquals((Integer)3, row.getInteger(1));
132+
Assert.assertEquals("X", row.get("B"));
133+
Assert.assertEquals((Integer)3, row.getInteger("C"));
134+
}
61135
}
136+
@Test
137+
public void testSkipMid() throws IOException {
138+
String testCSV = "#A\tB\tC\n1\tX\t3\n1\tX\t3\n";
139+
CSVReader reader = CSVFormat.createReader()
140+
.withHeaderPrefix("#")
141+
.withHeader(true)
142+
.withSeparator('\t')
143+
.selectColumns("A","C")
144+
.build();
145+
146+
CSVIterator csvRows = reader.load(testCSV);
147+
Assert.assertEquals(2,csvRows.getColumnsInformation().size());
148+
Assert.assertEquals("A", csvRows.getColumnsInformation().get(0).getName());
149+
Assert.assertEquals("C", csvRows.getColumnsInformation().get(1).getName());
150+
151+
for (CSVRow row : csvRows) {
152+
Assert.assertEquals("1", row.get(0));
153+
Assert.assertEquals("3", row.get(1));
154+
Assert.assertEquals("1", row.get("A"));
155+
Assert.assertEquals("3", row.get("C"));
156+
}
157+
158+
DataFrame dataFrame = DataFrame.load(testCSV,reader);
159+
for (DataRow row : dataFrame) {
160+
Assert.assertEquals((Integer)1, row.getInteger(0));
161+
Assert.assertEquals((Integer)3, row.getInteger(1));
162+
Assert.assertEquals((Integer)1, row.getInteger("A"));
163+
Assert.assertEquals((Integer)3, row.getInteger("C"));
164+
}
165+
}
166+
167+
62168
}

0 commit comments

Comments
 (0)