Skip to content

Commit 1b7a5e0

Browse files
authored
Implement histogram iterators for batch (#5944)
* Implement histogram iterators for batch Signed-off-by: Ben Ye <[email protected]> * lint Signed-off-by: Ben Ye <[email protected]> * update Signed-off-by: Ben Ye <[email protected]> * lint Signed-off-by: Ben Ye <[email protected]> * revert to not use unsafe Signed-off-by: Ben Ye <[email protected]> --------- Signed-off-by: Ben Ye <[email protected]>
1 parent 23b4148 commit 1b7a5e0

File tree

14 files changed

+510
-201
lines changed

14 files changed

+510
-201
lines changed

pkg/chunk/chunk.go

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package chunk
33
import (
44
"github.com/pkg/errors"
55
"github.com/prometheus/common/model"
6+
"github.com/prometheus/prometheus/model/histogram"
67
"github.com/prometheus/prometheus/model/labels"
78
"github.com/prometheus/prometheus/tsdb/chunkenc"
89
)
@@ -45,31 +46,41 @@ type prometheusChunkIterator struct {
4546
it chunkenc.Iterator
4647
}
4748

48-
func (p *prometheusChunkIterator) Scan() bool {
49-
return p.it.Next() != chunkenc.ValNone
49+
func (p *prometheusChunkIterator) Scan() chunkenc.ValueType {
50+
return p.it.Next()
5051
}
5152

52-
func (p *prometheusChunkIterator) FindAtOrAfter(time model.Time) bool {
53+
func (p *prometheusChunkIterator) FindAtOrAfter(time model.Time) chunkenc.ValueType {
5354
// FindAtOrAfter must return OLDEST value at given time. That means we need to start with a fresh iterator,
5455
// otherwise we cannot guarantee OLDEST.
5556
p.it = p.c.Iterator(p.it)
56-
return p.it.Seek(int64(time)) != chunkenc.ValNone
57+
return p.it.Seek(int64(time))
5758
}
5859

59-
func (p *prometheusChunkIterator) Batch(size int) Batch {
60+
func (p *prometheusChunkIterator) Batch(size int, valType chunkenc.ValueType) Batch {
6061
var batch Batch
6162
j := 0
6263
for j < size {
63-
t, v := p.it.At()
64-
batch.Timestamps[j] = t
65-
batch.Values[j] = v
64+
switch valType {
65+
case chunkenc.ValNone:
66+
break
67+
case chunkenc.ValFloat:
68+
t, v := p.it.At()
69+
batch.Timestamps[j] = t
70+
batch.Values[j] = v
71+
case chunkenc.ValHistogram:
72+
batch.Timestamps[j], batch.Histograms[j] = p.it.AtHistogram(nil)
73+
case chunkenc.ValFloatHistogram:
74+
batch.Timestamps[j], batch.FloatHistograms[j] = p.it.AtFloatHistogram(nil)
75+
}
6676
j++
6777
if j < size && p.it.Next() == chunkenc.ValNone {
6878
break
6979
}
7080
}
7181
batch.Index = 0
7282
batch.Length = j
83+
batch.ValType = valType
7384
return batch
7485
}
7586

@@ -79,7 +90,14 @@ func (p *prometheusChunkIterator) Err() error {
7990

8091
type errorIterator string
8192

82-
func (e errorIterator) Scan() bool { return false }
83-
func (e errorIterator) FindAtOrAfter(time model.Time) bool { return false }
84-
func (e errorIterator) Batch(size int) Batch { panic("no values") }
85-
func (e errorIterator) Err() error { return errors.New(string(e)) }
93+
func (e errorIterator) Scan() chunkenc.ValueType { return chunkenc.ValNone }
94+
func (e errorIterator) FindAtOrAfter(time model.Time) chunkenc.ValueType { return chunkenc.ValNone }
95+
func (e errorIterator) Value() model.SamplePair { panic("no values") }
96+
func (e errorIterator) AtHistogram(_ *histogram.Histogram) (int64, *histogram.Histogram) {
97+
panic("no values")
98+
}
99+
func (e errorIterator) AtFloatHistogram(_ *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
100+
panic("no values")
101+
}
102+
func (e errorIterator) Batch(size int, valType chunkenc.ValueType) Batch { panic("no values") }
103+
func (e errorIterator) Err() error { return errors.New(string(e)) }

pkg/chunk/encoding/encoding.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"github.com/prometheus/prometheus/tsdb/chunkenc"
99
)
1010

11-
// Encoding defines which encoding we are using, delta, doubledelta, or varbit
11+
// Encoding defines which encoding we are using.
1212
type Encoding byte
1313

1414
// String implements flag.Value.
@@ -26,28 +26,57 @@ func (e Encoding) PromChunkEncoding() chunkenc.Encoding {
2626
return chunkenc.EncNone
2727
}
2828

29+
func (e Encoding) ChunkValueType() chunkenc.ValueType {
30+
if known, found := encodings[e]; found {
31+
return known.ValueType
32+
}
33+
return chunkenc.ValNone
34+
}
35+
2936
const (
3037
// PrometheusXorChunk is a wrapper around Prometheus XOR-encoded chunk.
3138
// 4 is the magic value for backwards-compatibility with previous iota-based constants.
3239
PrometheusXorChunk Encoding = 4
40+
// PrometheusHistogramChunk is a wrapper around Prometheus histogram chunk.
41+
// 5 is the magic value for backwards-compatibility with previous iota-based constants.
42+
PrometheusHistogramChunk Encoding = 5
43+
// PrometheusFloatHistogramChunk is a wrapper around Prometheus float histogram chunk.
44+
// 6 is the magic value for backwards-compatibility with previous iota-based constants.
45+
PrometheusFloatHistogramChunk Encoding = 6
3346
)
3447

3548
type encoding struct {
36-
Name string
37-
Encoding chunkenc.Encoding
49+
Name string
50+
Encoding chunkenc.Encoding
51+
ValueType chunkenc.ValueType
3852
}
3953

4054
var encodings = map[Encoding]encoding{
4155
PrometheusXorChunk: {
42-
Name: "PrometheusXorChunk",
43-
Encoding: chunkenc.EncXOR,
56+
Name: "PrometheusXorChunk",
57+
Encoding: chunkenc.EncXOR,
58+
ValueType: chunkenc.ValFloat,
59+
},
60+
PrometheusHistogramChunk: {
61+
Name: "PrometheusHistogramChunk",
62+
Encoding: chunkenc.EncHistogram,
63+
ValueType: chunkenc.ValHistogram,
64+
},
65+
PrometheusFloatHistogramChunk: {
66+
Name: "PrometheusFloatHistogramChunk",
67+
Encoding: chunkenc.EncFloatHistogram,
68+
ValueType: chunkenc.ValFloatHistogram,
4469
},
4570
}
4671

4772
func FromPromChunkEncoding(enc chunkenc.Encoding) (Encoding, error) {
4873
switch enc {
4974
case chunkenc.EncXOR:
5075
return PrometheusXorChunk, nil
76+
case chunkenc.EncHistogram:
77+
return PrometheusHistogramChunk, nil
78+
case chunkenc.EncFloatHistogram:
79+
return PrometheusFloatHistogramChunk, nil
5180
}
5281
return Encoding(0), errors.Errorf("unknown Prometheus chunk encoding: %v", enc)
5382
}

pkg/chunk/iterator.go

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package chunk
22

33
import (
44
"github.com/prometheus/common/model"
5+
"github.com/prometheus/prometheus/model/histogram"
6+
"github.com/prometheus/prometheus/tsdb/chunkenc"
57
)
68

79
// Iterator enables efficient access to the content of a chunk. It is
@@ -11,16 +13,16 @@ type Iterator interface {
1113
// Scans the next value in the chunk. Directly after the iterator has
1214
// been created, the next value is the first value in the
1315
// chunk. Otherwise, it is the value following the last value scanned or
14-
// found (by one of the Find... methods). Returns false if either the
15-
// end of the chunk is reached or an error has occurred.
16-
Scan() bool
17-
// Finds the oldest value at or after the provided time. Returns false
18-
// if either the chunk contains no value at or after the provided time,
19-
// or an error has occurred.
20-
FindAtOrAfter(model.Time) bool
16+
// found (by one of the Find... methods). Returns chunkenc.ValNoe if either
17+
// the end of the chunk is reached or an error has occurred.
18+
Scan() chunkenc.ValueType
19+
// Finds the oldest value at or after the provided time and returns the value type.
20+
// Returns chunkenc.ValNone if either the chunk contains no value at or after
21+
// the provided time, or an error has occurred.
22+
FindAtOrAfter(model.Time) chunkenc.ValueType
2123
// Returns a batch of the provisded size; NB not idempotent! Should only be called
2224
// once per Scan.
23-
Batch(size int) Batch
25+
Batch(size int, valType chunkenc.ValueType) Batch
2426
// Returns the last error encountered. In general, an error signals data
2527
// corruption in the chunk and requires quarantining.
2628
Err() error
@@ -30,11 +32,14 @@ type Iterator interface {
3032
// 1 to 128.
3133
const BatchSize = 12
3234

33-
// Batch is a sorted set of (timestamp, value) pairs. They are intended to be
34-
// small, and passed by value.
35+
// Batch is a sorted set of (timestamp, value) pairs. They are intended to be small,
36+
// and passed by value. Value can vary depending on the chunk value type.
3537
type Batch struct {
36-
Timestamps [BatchSize]int64
37-
Values [BatchSize]float64
38-
Index int
39-
Length int
38+
Timestamps [BatchSize]int64
39+
Values [BatchSize]float64
40+
Histograms [BatchSize]*histogram.Histogram
41+
FloatHistograms [BatchSize]*histogram.FloatHistogram
42+
Index int
43+
Length int
44+
ValType chunkenc.ValueType
4045
}

pkg/querier/batch/batch.go

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ package batch
22

33
import (
44
"github.com/prometheus/common/model"
5+
"github.com/prometheus/prometheus/model/histogram"
56
"github.com/prometheus/prometheus/tsdb/chunkenc"
67

78
"github.com/cortexproject/cortex/pkg/chunk"
8-
"github.com/cortexproject/cortex/pkg/querier/iterators"
99
)
1010

1111
// GenericChunk is a generic chunk used by the batch iterator, in order to make the batch
@@ -31,11 +31,11 @@ func (c GenericChunk) Iterator(reuse chunk.Iterator) chunk.Iterator {
3131

3232
// iterator iterates over batches.
3333
type iterator interface {
34-
// Seek to the batch at (or after) time t.
35-
Seek(t int64, size int) bool
34+
// Seek to the batch at (or after) time t and returns chunk value type.
35+
Seek(t int64, size int) chunkenc.ValueType
3636

37-
// Next moves to the next batch.
38-
Next(size int) bool
37+
// Next moves to the next batch and returns chunk value type.
38+
Next(size int) chunkenc.ValueType
3939

4040
// AtTime returns the start time of the next batch. Must only be called after
4141
// Seek or Next have returned true.
@@ -44,7 +44,7 @@ type iterator interface {
4444
// MaxCurrentChunkTime returns the max time on the current chunk.
4545
MaxCurrentChunkTime() int64
4646

47-
// Batch returns the current batch. Must only be called after Seek or Next
47+
// Batch returns the current batch. Must only be called after Seek or Next
4848
// have returned true.
4949
Batch() chunk.Batch
5050

@@ -78,62 +78,71 @@ type iteratorAdapter struct {
7878
}
7979

8080
func newIteratorAdapter(underlying iterator) chunkenc.Iterator {
81-
return iterators.NewCompatibleChunksIterator(&iteratorAdapter{
81+
return &iteratorAdapter{
8282
batchSize: 1,
8383
underlying: underlying,
84-
})
84+
}
8585
}
8686

8787
// Seek implements chunkenc.Iterator.
88-
func (a *iteratorAdapter) Seek(t int64) bool {
88+
func (a *iteratorAdapter) Seek(t int64) chunkenc.ValueType {
8989

9090
// Optimisation: fulfill the seek using current batch if possible.
9191
if a.curr.Length > 0 && a.curr.Index < a.curr.Length {
9292
if t <= a.curr.Timestamps[a.curr.Index] {
9393
//In this case, the interface's requirement is met, so state of this
9494
//iterator does not need any change.
95-
return true
95+
return a.curr.ValType
9696
} else if t <= a.curr.Timestamps[a.curr.Length-1] {
9797
//In this case, some timestamp between current sample and end of batch can fulfill
9898
//the seek. Let's find it.
9999
for a.curr.Index < a.curr.Length && t > a.curr.Timestamps[a.curr.Index] {
100100
a.curr.Index++
101101
}
102-
return true
102+
return a.curr.ValType
103103
} else if t <= a.underlying.MaxCurrentChunkTime() {
104104
// In this case, some timestamp inside the current underlying chunk can fulfill the seek.
105105
// In this case we will call next until we find the sample as it will be faster than calling
106106
// `a.underlying.Seek` directly as this would cause the iterator to start from the beginning of the chunk.
107107
// See: https://github.com/cortexproject/cortex/blob/f69452975877c67ac307709e5f60b8d20477764c/pkg/querier/batch/chunk.go#L26-L45
108108
// https://github.com/cortexproject/cortex/blob/f69452975877c67ac307709e5f60b8d20477764c/pkg/chunk/encoding/prometheus_chunk.go#L90-L95
109-
for a.Next() {
109+
for {
110+
valType := a.Next()
111+
if valType == chunkenc.ValNone {
112+
break
113+
}
110114
if t <= a.curr.Timestamps[a.curr.Index] {
111-
return true
115+
return valType
112116
}
113117
}
114118
}
115119
}
116120

117121
a.curr.Length = -1
118122
a.batchSize = 1
119-
if a.underlying.Seek(t, a.batchSize) {
123+
if valType := a.underlying.Seek(t, a.batchSize); valType != chunkenc.ValNone {
120124
a.curr = a.underlying.Batch()
121-
return a.curr.Index < a.curr.Length
125+
if a.curr.Index < a.curr.Length {
126+
return a.curr.ValType
127+
}
122128
}
123-
return false
129+
return chunkenc.ValNone
124130
}
125131

126132
// Next implements chunkenc.Iterator.
127-
func (a *iteratorAdapter) Next() bool {
133+
func (a *iteratorAdapter) Next() chunkenc.ValueType {
128134
a.curr.Index++
129-
for a.curr.Index >= a.curr.Length && a.underlying.Next(a.batchSize) {
135+
for a.curr.Index >= a.curr.Length && a.underlying.Next(a.batchSize) != chunkenc.ValNone {
130136
a.curr = a.underlying.Batch()
131137
a.batchSize = a.batchSize * 2
132138
if a.batchSize > chunk.BatchSize {
133139
a.batchSize = chunk.BatchSize
134140
}
135141
}
136-
return a.curr.Index < a.curr.Length
142+
if a.curr.Index < a.curr.Length {
143+
return a.curr.ValType
144+
}
145+
return chunkenc.ValNone
137146
}
138147

139148
// At implements chunkenc.Iterator.
@@ -145,3 +154,18 @@ func (a *iteratorAdapter) At() (int64, float64) {
145154
func (a *iteratorAdapter) Err() error {
146155
return nil
147156
}
157+
158+
// AtHistogram implements chunkenc.Iterator.
159+
func (a *iteratorAdapter) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) {
160+
return a.curr.Timestamps[a.curr.Index], a.curr.Histograms[a.curr.Index]
161+
}
162+
163+
// AtFloatHistogram implements chunkenc.Iterator.
164+
func (a *iteratorAdapter) AtFloatHistogram(h *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
165+
return a.curr.Timestamps[a.curr.Index], a.curr.FloatHistograms[a.curr.Index]
166+
}
167+
168+
// AtT implements chunkenc.Iterator.
169+
func (a *iteratorAdapter) AtT() int64 {
170+
return a.curr.Timestamps[a.curr.Index]
171+
}

0 commit comments

Comments
 (0)