Skip to content

Commit f2f0596

Browse files
committed
Update FieldNode structure to be more explicit and reflect schema
1 parent ec51d56 commit f2f0596

File tree

1 file changed

+32
-14
lines changed

1 file changed

+32
-14
lines changed

format/Message.fbs

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ table Tuple {
1717
table List {
1818
}
1919

20-
enum UnionMode:int { Sparse, Dense }
20+
enum UnionMode:short { Sparse, Dense }
2121

2222
table Union {
2323
mode: UnionMode;
@@ -28,7 +28,7 @@ table Int {
2828
is_signed: bool;
2929
}
3030

31-
enum Precision:int {SINGLE, DOUBLE}
31+
enum Precision:short {SINGLE, DOUBLE}
3232

3333
table FloatingPoint {
3434
precision: Precision;
@@ -114,7 +114,7 @@ table Field {
114114
/// ----------------------------------------------------------------------
115115
/// Endianness of the platform that produces the RecordBatch
116116

117-
enum Endianness:int { Little, Big }
117+
enum Endianness:short { Little, Big }
118118

119119
/// ----------------------------------------------------------------------
120120
/// A Schema describes the columns in a row batch
@@ -133,8 +133,19 @@ table Schema {
133133
/// Data structures for describing a table row batch (a collection of
134134
/// equal-length Arrow arrays)
135135

136+
enum VectorType: short {
137+
/// used in List type Dense Union and variable length primitive types (String, Binary)
138+
OFFSET,
139+
/// fixed length primitive values
140+
VALUES,
141+
/// Bit vector indicated if each value is null
142+
VALIDITY,
143+
/// Type vector used in Union type
144+
TYPE
145+
}
146+
136147
/// A Buffer represents a single contiguous memory segment
137-
struct Buffer {
148+
table Buffer {
138149
/// The shared memory page id where this buffer is located. Currently this is
139150
/// not used
140151
page: int;
@@ -146,6 +157,9 @@ struct Buffer {
146157
/// The absolute length (in bytes) of the memory buffer. The memory is found
147158
/// from offset (inclusive) to offset + length (non-inclusive).
148159
length: long;
160+
161+
/// the type of the vector to be explicit
162+
type: VectorType;
149163
}
150164

151165
/// Metadata about a field at some level of a nested type tree (but not
@@ -154,7 +168,7 @@ struct Buffer {
154168
/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
155169
/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
156170
/// null_count: 0} for its Int16 node, as separate FieldNode structs
157-
struct FieldNode {
171+
table FieldNode {
158172
/// The number of value slots in the Arrow array at this level of a nested
159173
/// tree
160174
length: int;
@@ -163,26 +177,30 @@ struct FieldNode {
163177
/// to write their physical validity bitmap out as a materialized buffer,
164178
/// instead setting the length of the bitmap buffer to 0.
165179
null_count: int;
180+
181+
/// children according to the schema
182+
children: [FieldNode];
183+
184+
/// Buffers correspond to the pre-ordered flattened buffer tree
185+
///
186+
/// The number of buffers appended to this list depends on the field type and length.
187+
/// For example, most primitive arrays will have 2 buffers, 1 for the validity
188+
/// bitmap and 1 for the values. For struct arrays, there will only be a
189+
/// single buffer for the validity (nulls) bitmap
190+
vectors: [Buffer];
166191
}
167192

168193
/// A data header describing the shared memory layout of a "record" or "row"
169194
/// batch. Some systems call this a "row batch" internally and others a "record
170195
/// batch".
171196
table RecordBatch {
172-
/// number of records / rows. The arrays in the batch should all have this
197+
/// number of records / rows. The root arrays in the batch should all have this
173198
/// length
174199
length: int;
175200

176-
/// Nodes correspond to the pre-ordered flattened logical schema
201+
/// Nodes correspond to the first level of the logical schema
177202
nodes: [FieldNode];
178203

179-
/// Buffers correspond to the pre-ordered flattened buffer tree
180-
///
181-
/// The number of buffers appended to this list depends on the schema. For
182-
/// example, most primitive arrays will have 2 buffers, 1 for the validity
183-
/// bitmap and 1 for the values. For struct arrays, there will only be a
184-
/// single buffer for the validity (nulls) bitmap
185-
buffers: [Buffer];
186204
}
187205

188206
/// ----------------------------------------------------------------------

0 commit comments

Comments
 (0)