Skip to content

Commit ac6902a

Browse files
committed
ARROW-264: File format
1 parent 807db51 commit ac6902a

26 files changed

+1578
-26
lines changed

format/File.fbs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
include "Message.fbs";
2+
3+
namespace org.apache.arrow.flatbuf;
4+
5+
/// ----------------------------------------------------------------------
6+
/// Arrow File metadata
7+
///
8+
9+
table Footer {
10+
11+
schema: org.apache.arrow.flatbuf.Schema;
12+
13+
dictionaries: [ Block ];
14+
15+
recordBatches: [ Block ];
16+
}
17+
18+
struct Block {
19+
20+
offset: long;
21+
22+
metaDataLength: int;
23+
24+
bodyLength: long;
25+
26+
}
27+
28+
root_type Footer;

java/format/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
<argument>-o</argument>
107107
<argument>target/generated-sources/</argument>
108108
<argument>../../format/Message.fbs</argument>
109+
<argument>../../format/File.fbs</argument>
109110
</arguments>
110111
</configuration>
111112
</execution>

java/vector/src/main/codegen/data/ArrowTypes.tdd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
},
3131
{
3232
name: "Union",
33-
fields: []
33+
fields: [{name: "mode", type: int}]
3434
},
3535
{
3636
name: "Int",

java/vector/src/main/codegen/templates/ArrowType.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424

2525
<@pp.dropOutputFile />
2626
<@pp.changeOutputFile name="/org/apache/arrow/vector/types/pojo/ArrowType.java" />
27-
28-
2927
<#include "/@includes/license.ftl" />
28+
3029
package org.apache.arrow.vector.types.pojo;
3130

3231
import com.google.flatbuffers.FlatBufferBuilder;
@@ -38,7 +37,13 @@ public abstract class ArrowType {
3837

3938
public abstract byte getTypeType();
4039
public abstract int getType(FlatBufferBuilder builder);
40+
public abstract <T> T accept(ArrowTypeVisitor<T> visitor);
4141

42+
public static interface ArrowTypeVisitor<T> {
43+
<#list arrowTypes.types as type>
44+
T visit(${type.name} type);
45+
</#list>
46+
}
4247

4348
<#list arrowTypes.types as type>
4449
<#assign name = type.name>
@@ -102,6 +107,11 @@ public boolean equals(Object obj) {
102107
</#list>
103108
</#if>
104109
}
110+
111+
@Override
112+
public <T> T accept(ArrowTypeVisitor<T> visitor) {
113+
return visitor.visit(this);
114+
}
105115
}
106116
</#list>
107117

java/vector/src/main/codegen/templates/UnionVector.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
import org.apache.arrow.vector.complex.impl.ComplexCopier;
4444
import org.apache.arrow.vector.util.CallBack;
4545

46+
import static org.apache.arrow.flatbuf.UnionMode.Sparse;
47+
4648
/*
4749
* This class is generated using freemarker and the ${.template_name} template.
4850
*/
@@ -203,7 +205,7 @@ public Field getField() {
203205
for (ValueVector v : internalMap.getChildren()) {
204206
childFields.add(v.getField());
205207
}
206-
return new Field(name, true, new ArrowType.Union(), childFields);
208+
return new Field(name, true, new ArrowType.Union(Sparse), childFields);
207209
}
208210
209211
@Override
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.arrow.file;
19+
20+
import org.apache.arrow.flatbuf.Block;
21+
import org.apache.arrow.schema.FBSerializable;
22+
23+
import com.google.flatbuffers.FlatBufferBuilder;
24+
25+
public class ArrowBlock implements FBSerializable {
26+
27+
private final long offset;
28+
private final int metadataLength;
29+
private final long bodyLength;
30+
31+
public ArrowBlock(long offset, int metadataLength, long bodyLength) {
32+
super();
33+
this.offset = offset;
34+
this.metadataLength = metadataLength;
35+
this.bodyLength = bodyLength;
36+
}
37+
38+
public long getOffset() {
39+
return offset;
40+
}
41+
42+
public int getMetadataLength() {
43+
return metadataLength;
44+
}
45+
46+
public long getBodyLength() {
47+
return bodyLength;
48+
}
49+
50+
@Override
51+
public int writeTo(FlatBufferBuilder builder) {
52+
return Block.createBlock(builder, offset, metadataLength, bodyLength);
53+
}
54+
55+
@Override
56+
public int hashCode() {
57+
final int prime = 31;
58+
int result = 1;
59+
result = prime * result + (int) (bodyLength ^ (bodyLength >>> 32));
60+
result = prime * result + metadataLength;
61+
result = prime * result + (int) (offset ^ (offset >>> 32));
62+
return result;
63+
}
64+
65+
@Override
66+
public boolean equals(Object obj) {
67+
if (this == obj)
68+
return true;
69+
if (obj == null)
70+
return false;
71+
if (getClass() != obj.getClass())
72+
return false;
73+
ArrowBlock other = (ArrowBlock) obj;
74+
if (bodyLength != other.bodyLength)
75+
return false;
76+
if (metadataLength != other.metadataLength)
77+
return false;
78+
if (offset != other.offset)
79+
return false;
80+
return true;
81+
}
82+
}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.arrow.file;
19+
20+
import java.util.ArrayList;
21+
import java.util.List;
22+
23+
import org.apache.arrow.flatbuf.Block;
24+
import org.apache.arrow.flatbuf.Footer;
25+
import org.apache.arrow.schema.FBSerializable;
26+
import org.apache.arrow.vector.types.pojo.Schema;
27+
28+
import com.google.flatbuffers.FlatBufferBuilder;
29+
30+
public class ArrowFooter implements FBSerializable {
31+
32+
private final Schema schema;
33+
34+
private final List<ArrowBlock> dictionaries;
35+
36+
private final List<ArrowBlock> recordBatches;
37+
38+
public ArrowFooter(Schema schema, List<ArrowBlock> dictionaries, List<ArrowBlock> recordBatches) {
39+
super();
40+
this.schema = schema;
41+
this.dictionaries = dictionaries;
42+
this.recordBatches = recordBatches;
43+
}
44+
45+
public ArrowFooter(Footer footer) {
46+
this(
47+
Schema.convertSchema(footer.schema()),
48+
dictionaries(footer),
49+
recordBatches(footer)
50+
);
51+
}
52+
53+
private static List<ArrowBlock> recordBatches(Footer footer) {
54+
List<ArrowBlock> recordBatches = new ArrayList<>();
55+
Block tempBLock = new Block();
56+
int recordBatchesLength = footer.recordBatchesLength();
57+
for (int i = 0; i < recordBatchesLength; i++) {
58+
Block block = footer.recordBatches(tempBLock, i);
59+
recordBatches.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
60+
}
61+
return recordBatches;
62+
}
63+
64+
private static List<ArrowBlock> dictionaries(Footer footer) {
65+
List<ArrowBlock> dictionaries = new ArrayList<>();
66+
Block tempBLock = new Block();
67+
int dictionariesLength = footer.dictionariesLength();
68+
for (int i = 0; i < dictionariesLength; i++) {
69+
Block block = footer.dictionaries(tempBLock, i);
70+
dictionaries.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
71+
}
72+
return dictionaries;
73+
}
74+
75+
public Schema getSchema() {
76+
return schema;
77+
}
78+
79+
public List<ArrowBlock> getDictionaries() {
80+
return dictionaries;
81+
}
82+
83+
public List<ArrowBlock> getRecordBatches() {
84+
return recordBatches;
85+
}
86+
87+
@Override
88+
public int writeTo(FlatBufferBuilder builder) {
89+
int schemaIndex = schema.getSchema(builder);
90+
Footer.startDictionariesVector(builder, dictionaries.size());
91+
int dicsOffset = endVector(builder, dictionaries);
92+
Footer.startRecordBatchesVector(builder, recordBatches.size());
93+
int rbsOffset = endVector(builder, recordBatches);
94+
Footer.startFooter(builder);
95+
Footer.addSchema(builder, schemaIndex);
96+
Footer.addDictionaries(builder, dicsOffset);
97+
Footer.addRecordBatches(builder, rbsOffset);
98+
return Footer.endFooter(builder);
99+
}
100+
101+
private int endVector(FlatBufferBuilder builder, List<ArrowBlock> blocks) {
102+
for (ArrowBlock block : blocks) {
103+
block.writeTo(builder);
104+
}
105+
return builder.endVector();
106+
}
107+
108+
@Override
109+
public int hashCode() {
110+
final int prime = 31;
111+
int result = 1;
112+
result = prime * result + ((dictionaries == null) ? 0 : dictionaries.hashCode());
113+
result = prime * result + ((recordBatches == null) ? 0 : recordBatches.hashCode());
114+
result = prime * result + ((schema == null) ? 0 : schema.hashCode());
115+
return result;
116+
}
117+
118+
@Override
119+
public boolean equals(Object obj) {
120+
if (this == obj)
121+
return true;
122+
if (obj == null)
123+
return false;
124+
if (getClass() != obj.getClass())
125+
return false;
126+
ArrowFooter other = (ArrowFooter) obj;
127+
if (dictionaries == null) {
128+
if (other.dictionaries != null)
129+
return false;
130+
} else if (!dictionaries.equals(other.dictionaries))
131+
return false;
132+
if (recordBatches == null) {
133+
if (other.recordBatches != null)
134+
return false;
135+
} else if (!recordBatches.equals(other.recordBatches))
136+
return false;
137+
if (schema == null) {
138+
if (other.schema != null)
139+
return false;
140+
} else if (!schema.equals(other.schema))
141+
return false;
142+
return true;
143+
}
144+
}

0 commit comments

Comments
 (0)