Skip to content

Commit f370211

Browse files
committed
Add LineReaderAt interface and implementation
This is a line-oriented parallel to io.ReaderAt, meant for text applies. While the mapping isn't quite as clean as in the binary case, a text apply still reads a fixed chunk of lines starting at a specific line number and modifies them. This also allows a consistent interface for strict and fuzzy applies. The implementation wraps an io.ReaderAt and reads data in chunks, indexing line boundaries as it goes. This is probably not the most efficient way to implement this interface, but it works and allows file application to take a consistent interface.
1 parent 131a046 commit f370211

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

gitdiff/io.go

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package gitdiff
22

33
import (
44
"bufio"
5+
"errors"
56
"fmt"
67
"io"
78
)
@@ -77,3 +78,134 @@ func unwrapLineReader(lr LineReader) io.Reader {
7778
panic(fmt.Sprintf("%T does not implement io.Reader and is not a gitdiff wrapper", lr))
7879
}
7980
}
81+
82+
// LineReaderAt is the interface that wraps the ReadLinesAt method.
83+
//
84+
// ReadLinesAt reads len(lines) into lines starting at line offset in the
85+
// input source. It returns number of full lines read (0 <= n <= len(lines))
86+
// and any error encountered. Line numbers are zero-indexed.
87+
//
88+
// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more
89+
// lines were not returned.
90+
//
91+
// Each full line includes the line ending character(s). If the last line of
92+
// the input does not have a line ending character, ReadLinesAt returns the
93+
// content of the line and io.EOF.
94+
//
95+
// If the content of the input source changes after the first call to
96+
// ReadLinesAt, the behavior of future calls is undefined.
97+
type LineReaderAt interface {
98+
ReadLinesAt(lines [][]byte, offset int64) (n int, err error)
99+
}
100+
101+
// NewLineReaderAt creates a LineReaderAt from an io.ReaderAt.
102+
func NewLineReaderAt(r io.ReaderAt) LineReaderAt {
103+
return &lineReaderAt{r: r}
104+
}
105+
106+
type lineReaderAt struct {
107+
r io.ReaderAt
108+
index []int64
109+
eof bool
110+
}
111+
112+
func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err error) {
113+
// TODO(bkeyes): revisit variable names
114+
// - it's generally not clear when something is bytes vs lines
115+
// - offset is a good example of this
116+
117+
if len(lines) == 0 {
118+
return 0, nil
119+
}
120+
121+
endLine := offset + int64(len(lines))
122+
if endLine > int64(len(r.index)) && !r.eof {
123+
if err := r.indexTo(endLine); err != nil {
124+
return 0, err
125+
}
126+
}
127+
if offset > int64(len(r.index)) {
128+
return 0, io.EOF
129+
}
130+
131+
// TODO(bkeyes): check usage of int / int64
132+
// - interface uses int64 for arbitrarily large files
133+
// - implementation is limited to int lines by index array
134+
135+
// offset <= len(r.index) means that it must fit in int without loss
136+
size, readOffset := lookupLines(r.index, int(offset), len(lines))
137+
138+
b := make([]byte, size)
139+
if _, err := r.r.ReadAt(b, readOffset); err != nil {
140+
if err == io.EOF {
141+
err = errors.New("ReadLinesAt: corrupt line index or changed source data")
142+
}
143+
return 0, err
144+
}
145+
146+
for n = 0; n < len(lines) && offset+int64(n) < int64(len(r.index)); n++ {
147+
i := int(offset) + n
148+
start, end := readOffset, r.index[i]
149+
if i > 0 {
150+
start = r.index[i-1]
151+
}
152+
lines[n] = b[start-readOffset : end-readOffset]
153+
}
154+
155+
if n < len(lines) || b[size-1] != '\n' {
156+
return n, io.EOF
157+
}
158+
return n, nil
159+
}
160+
161+
// indexTo reads data and computes the line index until there is information
162+
// for line or a read returns io.EOF. It returns an error if and only if there
163+
// is an error reading data.
164+
func (r *lineReaderAt) indexTo(line int64) error {
165+
var buf [1024]byte
166+
167+
var offset int64
168+
if len(r.index) > 0 {
169+
offset = r.index[len(r.index)-1]
170+
}
171+
172+
for int64(len(r.index)) < line {
173+
n, err := r.r.ReadAt(buf[:], offset)
174+
if err != nil && err != io.EOF {
175+
return err
176+
}
177+
for _, b := range buf[:n] {
178+
offset++
179+
if b == '\n' {
180+
r.index = append(r.index, offset)
181+
}
182+
}
183+
if err == io.EOF {
184+
if n > 0 && buf[n-1] != '\n' {
185+
r.index = append(r.index, offset)
186+
}
187+
r.eof = true
188+
break
189+
}
190+
}
191+
return nil
192+
}
193+
194+
// lookupLines gets the byte offset and size of a range of lines from an index
195+
// where the value at n is the offset of the first byte after line number n.
196+
func lookupLines(index []int64, start, n int) (size int64, offset int64) {
197+
if start > len(index) {
198+
offset = index[len(index)-1]
199+
} else if start > 0 {
200+
offset = index[start-1]
201+
}
202+
if n > 0 {
203+
// TODO(bkeyes): check types for overflow
204+
if start+n > len(index) {
205+
size = index[len(index)-1] - offset
206+
} else {
207+
size = index[start+n-1] - offset
208+
}
209+
}
210+
return
211+
}

0 commit comments

Comments
 (0)