Skip to content

Commit e43f26b

Browse files
committed
add layout spec
1 parent 0cc9718 commit e43f26b

File tree

5 files changed

+283
-0
lines changed

5 files changed

+283
-0
lines changed

java/vector/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
<artifactId>arrow-memory</artifactId>
3333
<version>${project.version}</version>
3434
</dependency>
35+
<dependency>
36+
<groupId>org.apache.arrow</groupId>
37+
<artifactId>arrow-format</artifactId>
38+
<version>${project.version}</version>
39+
</dependency>
3540
<dependency>
3641
<groupId>joda-time</groupId>
3742
<artifactId>joda-time</artifactId>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package org.apache.arrow.vector.layout;
2+
3+
public class ByteAlignedVectorLayout extends VectorLayout {
4+
5+
private final int typeByteWidth;
6+
7+
public ByteAlignedVectorLayout(int typeByteWidth) {
8+
super(typeByteWidth * 8);
9+
this.typeByteWidth = typeByteWidth;
10+
}
11+
12+
public int getTypeByteWidth() {
13+
return typeByteWidth;
14+
}
15+
16+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package org.apache.arrow.vector.layout;
2+
3+
import static java.util.Arrays.asList;
4+
5+
import java.util.Collections;
6+
import java.util.List;
7+
8+
import org.apache.arrow.vector.types.pojo.ArrowType;
9+
10+
11+
public class PrimitiveTypeLayout extends TypeLayout {
12+
13+
public PrimitiveTypeLayout(ArrowType type, List<VectorLayout> vectors) {
14+
super(type, vectors, Collections.<TypeLayout>emptyList());
15+
}
16+
17+
public PrimitiveTypeLayout(ArrowType type, VectorLayout... vectors) {
18+
this(type, asList(vectors));
19+
}
20+
21+
}
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
package org.apache.arrow.vector.layout;
2+
3+
import static java.util.Arrays.asList;
4+
import static org.apache.arrow.flatbuf.Precision.DOUBLE;
5+
import static org.apache.arrow.flatbuf.Precision.SINGLE;
6+
import static org.apache.arrow.vector.layout.VectorLayout.newBooleanVectorLayout;
7+
import static org.apache.arrow.vector.layout.VectorLayout.newByteVectorLayout;
8+
import static org.apache.arrow.vector.layout.VectorLayout.newIntVectorLayout;
9+
import static org.apache.arrow.vector.layout.VectorLayout.newOffsetVectorLayout;
10+
import static org.apache.arrow.vector.layout.VectorLayout.newValidityVectorLayout;
11+
12+
import java.util.ArrayList;
13+
import java.util.Collections;
14+
import java.util.List;
15+
16+
import org.apache.arrow.flatbuf.UnionMode;
17+
import org.apache.arrow.vector.types.pojo.ArrowType;
18+
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
19+
import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
20+
import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
21+
import org.apache.arrow.vector.types.pojo.ArrowType.Date;
22+
import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
23+
import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
24+
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
25+
import org.apache.arrow.vector.types.pojo.ArrowType.IntervalDay;
26+
import org.apache.arrow.vector.types.pojo.ArrowType.IntervalYear;
27+
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
28+
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
29+
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
30+
import org.apache.arrow.vector.types.pojo.ArrowType.Tuple;
31+
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
32+
import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
33+
import org.apache.arrow.vector.types.pojo.Field;
34+
35+
/**
36+
* The layout of vectors for a given type
37+
* It defines its own vectors followed by the vectors for the children
38+
* if it is a nested type (Tuple, List, Union)
39+
*/
40+
public class TypeLayout {
41+
42+
public static TypeLayout newTypeLayout(Field field) {
43+
final org.apache.arrow.vector.types.pojo.ArrowType arrowType = field.getType();
44+
final List<Field> children = field.getChildren();
45+
TypeLayout layout = arrowType.accept(new ArrowTypeVisitor<TypeLayout>() {
46+
47+
@Override public TypeLayout visit(Int type) {
48+
return new FixedWidthTypeLayout(
49+
arrowType,
50+
newIntVectorLayout(type.getBitWidth()));
51+
}
52+
53+
@Override public TypeLayout visit(Union type) {
54+
List<TypeLayout> childLayouts = childrenLayout(children);
55+
List<VectorLayout> vectors;
56+
switch (type.getMode()) {
57+
case UnionMode.Dense:
58+
vectors = asList(
59+
// TODO: validate this
60+
newValidityVectorLayout(),
61+
newIntVectorLayout(8), // type vector
62+
newOffsetVectorLayout() // offset to find the vector
63+
);
64+
break;
65+
case UnionMode.Sparse:
66+
vectors = asList(
67+
newValidityVectorLayout(),
68+
newIntVectorLayout(8) // type vector
69+
);
70+
break;
71+
default:
72+
throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode());
73+
}
74+
return new TypeLayout(arrowType, vectors, childLayouts);
75+
}
76+
77+
@Override public TypeLayout visit(Tuple type) {
78+
List<TypeLayout> childLayouts = childrenLayout(children);
79+
List<VectorLayout> vectors = asList(
80+
newValidityVectorLayout()
81+
);
82+
return new TypeLayout(arrowType, vectors, childLayouts);
83+
}
84+
85+
@Override public TypeLayout visit(Timestamp type) {
86+
throw new UnsupportedOperationException("NYI");
87+
}
88+
89+
@Override public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
90+
if (children.size() != 1) {
91+
throw new IllegalArgumentException("Lists should have exactly one child. Found " + children);
92+
}
93+
List<TypeLayout> childLayouts = childrenLayout(children);
94+
List<VectorLayout> vectors = asList(
95+
newValidityVectorLayout()
96+
);
97+
return new TypeLayout(arrowType, vectors, childLayouts);
98+
}
99+
100+
@Override public TypeLayout visit(FloatingPoint type) {
101+
int bitWidth;
102+
switch (type.getPrecision()) {
103+
case SINGLE:
104+
bitWidth = 32;
105+
break;
106+
case DOUBLE:
107+
bitWidth = 64;
108+
break;
109+
default:
110+
throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision());
111+
}
112+
return new FixedWidthTypeLayout(
113+
arrowType,
114+
newIntVectorLayout(bitWidth));
115+
}
116+
117+
@Override public TypeLayout visit(Decimal type) {
118+
throw new UnsupportedOperationException("NYI");
119+
}
120+
121+
@Override public TypeLayout visit(Bool type) {
122+
return new FixedWidthTypeLayout(
123+
arrowType,
124+
newBooleanVectorLayout());
125+
}
126+
127+
@Override public TypeLayout visit(Binary type) {
128+
return new VariableWidthTypeLayout(
129+
arrowType,
130+
newByteVectorLayout());
131+
}
132+
133+
@Override public TypeLayout visit(Utf8 type) {
134+
return new VariableWidthTypeLayout(
135+
arrowType,
136+
newByteVectorLayout());
137+
}
138+
139+
@Override
140+
public TypeLayout visit(Null type) {
141+
return new TypeLayout(type, Collections.<VectorLayout>emptyList(), Collections.<TypeLayout>emptyList());
142+
}
143+
144+
@Override
145+
public TypeLayout visit(Date type) {
146+
throw new UnsupportedOperationException("NYI");
147+
}
148+
149+
@Override
150+
public TypeLayout visit(Time type) {
151+
throw new UnsupportedOperationException("NYI");
152+
}
153+
154+
@Override
155+
public TypeLayout visit(IntervalDay type) {
156+
throw new UnsupportedOperationException("NYI");
157+
}
158+
159+
@Override
160+
public TypeLayout visit(IntervalYear type) {
161+
throw new UnsupportedOperationException("NYI");
162+
}
163+
164+
private List<TypeLayout> childrenLayout(final List<Field> children) {
165+
List<TypeLayout> childLayouts = new ArrayList<TypeLayout>();
166+
for (Field child : children) {
167+
childLayouts.add(newTypeLayout(child));
168+
}
169+
return childLayouts;
170+
}
171+
});
172+
return layout;
173+
}
174+
175+
private final ArrowType type;
176+
private final List<VectorLayout> vectors;
177+
private final List<TypeLayout> children;
178+
179+
public TypeLayout(ArrowType type, List<VectorLayout> vectors, List<TypeLayout> children) {
180+
super();
181+
this.type = type;
182+
this.vectors = vectors;
183+
this.children = children;
184+
}
185+
186+
public ArrowType getType() {
187+
return type;
188+
}
189+
190+
public List<VectorLayout> getVectors() {
191+
return vectors;
192+
}
193+
194+
public List<TypeLayout> getChildren() {
195+
return children;
196+
}
197+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package org.apache.arrow.vector.layout;
2+
3+
public class VectorLayout {
4+
5+
public static ByteAlignedVectorLayout newOffsetVectorLayout() {
6+
return newIntVectorLayout(32);
7+
}
8+
9+
public static ByteAlignedVectorLayout newIntVectorLayout(int typeBitWidth) {
10+
switch (typeBitWidth) {
11+
case 8:
12+
case 16:
13+
case 32:
14+
case 64:
15+
return new ByteAlignedVectorLayout(typeBitWidth / 8);
16+
default:
17+
throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported");
18+
}
19+
}
20+
21+
public static VectorLayout newBooleanVectorLayout() {
22+
return new VectorLayout(1);
23+
}
24+
25+
public static VectorLayout newValidityVectorLayout() {
26+
return newBooleanVectorLayout();
27+
}
28+
29+
public static ByteAlignedVectorLayout newByteVectorLayout() {
30+
return newIntVectorLayout(8);
31+
}
32+
33+
private final int typeBitWidth;
34+
35+
public VectorLayout(int typeBitWidth) {
36+
super();
37+
this.typeBitWidth = typeBitWidth;
38+
}
39+
40+
public int getTypeBitWidth() {
41+
return typeBitWidth;
42+
}
43+
44+
}

0 commit comments

Comments
 (0)