@@ -2,6 +2,7 @@ package gitdiff
22
33import (
44 "bufio"
5+ "errors"
56 "fmt"
67 "io"
78)
@@ -77,3 +78,134 @@ func unwrapLineReader(lr LineReader) io.Reader {
7778 panic (fmt .Sprintf ("%T does not implement io.Reader and is not a gitdiff wrapper" , lr ))
7879 }
7980}
81+
82+ // LineReaderAt is the interface that wraps the ReadLinesAt method.
83+ //
84+ // ReadLinesAt reads len(lines) into lines starting at line offset in the
85+ // input source. It returns number of full lines read (0 <= n <= len(lines))
86+ // and any error encountered. Line numbers are zero-indexed.
87+ //
88+ // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more
89+ // lines were not returned.
90+ //
91+ // Each full line includes the line ending character(s). If the last line of
92+ // the input does not have a line ending character, ReadLinesAt returns the
93+ // content of the line and io.EOF.
94+ //
95+ // If the content of the input source changes after the first call to
96+ // ReadLinesAt, the behavior of future calls is undefined.
97+ type LineReaderAt interface {
98+ ReadLinesAt (lines [][]byte , offset int64 ) (n int , err error )
99+ }
100+
101+ // NewLineReaderAt creates a LineReaderAt from an io.ReaderAt.
102+ func NewLineReaderAt (r io.ReaderAt ) LineReaderAt {
103+ return & lineReaderAt {r : r }
104+ }
105+
106+ type lineReaderAt struct {
107+ r io.ReaderAt
108+ index []int64
109+ eof bool
110+ }
111+
112+ func (r * lineReaderAt ) ReadLinesAt (lines [][]byte , offset int64 ) (n int , err error ) {
113+ // TODO(bkeyes): revisit variable names
114+ // - it's generally not clear when something is bytes vs lines
115+ // - offset is a good example of this
116+
117+ if len (lines ) == 0 {
118+ return 0 , nil
119+ }
120+
121+ endLine := offset + int64 (len (lines ))
122+ if endLine > int64 (len (r .index )) && ! r .eof {
123+ if err := r .indexTo (endLine ); err != nil {
124+ return 0 , err
125+ }
126+ }
127+ if offset > int64 (len (r .index )) {
128+ return 0 , io .EOF
129+ }
130+
131+ // TODO(bkeyes): check usage of int / int64
132+ // - interface uses int64 for arbitrarily large files
133+ // - implementation is limited to int lines by index array
134+
135+ // offset <= len(r.index) means that it must fit in int without loss
136+ size , readOffset := lookupLines (r .index , int (offset ), len (lines ))
137+
138+ b := make ([]byte , size )
139+ if _ , err := r .r .ReadAt (b , readOffset ); err != nil {
140+ if err == io .EOF {
141+ err = errors .New ("ReadLinesAt: corrupt line index or changed source data" )
142+ }
143+ return 0 , err
144+ }
145+
146+ for n = 0 ; n < len (lines ) && offset + int64 (n ) < int64 (len (r .index )); n ++ {
147+ i := int (offset ) + n
148+ start , end := readOffset , r .index [i ]
149+ if i > 0 {
150+ start = r .index [i - 1 ]
151+ }
152+ lines [n ] = b [start - readOffset : end - readOffset ]
153+ }
154+
155+ if n < len (lines ) || b [size - 1 ] != '\n' {
156+ return n , io .EOF
157+ }
158+ return n , nil
159+ }
160+
161+ // indexTo reads data and computes the line index until there is information
162+ // for line or a read returns io.EOF. It returns an error if and only if there
163+ // is an error reading data.
164+ func (r * lineReaderAt ) indexTo (line int64 ) error {
165+ var buf [1024 ]byte
166+
167+ var offset int64
168+ if len (r .index ) > 0 {
169+ offset = r .index [len (r .index )- 1 ]
170+ }
171+
172+ for int64 (len (r .index )) < line {
173+ n , err := r .r .ReadAt (buf [:], offset )
174+ if err != nil && err != io .EOF {
175+ return err
176+ }
177+ for _ , b := range buf [:n ] {
178+ offset ++
179+ if b == '\n' {
180+ r .index = append (r .index , offset )
181+ }
182+ }
183+ if err == io .EOF {
184+ if n > 0 && buf [n - 1 ] != '\n' {
185+ r .index = append (r .index , offset )
186+ }
187+ r .eof = true
188+ break
189+ }
190+ }
191+ return nil
192+ }
193+
194+ // lookupLines gets the byte offset and size of a range of lines from an index
195+ // where the value at n is the offset of the first byte after line number n.
196+ func lookupLines (index []int64 , start , n int ) (size int64 , offset int64 ) {
197+ if start > len (index ) {
198+ offset = index [len (index )- 1 ]
199+ } else if start > 0 {
200+ offset = index [start - 1 ]
201+ }
202+ if n > 0 {
203+ // TODO(bkeyes): check types for overflow
204+ if start + n > len (index ) {
205+ size = index [len (index )- 1 ] - offset
206+ } else {
207+ size = index [start + n - 1 ] - offset
208+ }
209+ }
210+ return
211+ }
0 commit comments