Skip to content

Commit e6e045f

Browse files
stefanbellergitster
authored andcommitted
diff.c: buffer all output if asked to
Introduce a new option 'emitted_symbols' in the struct diff_options which controls whether all output is buffered up until all output is available. It is set internally in diff.c when necessary. We'll have a new struct 'emitted_string' in diff.c which will be used to buffer each line. The emitted_string will duplicate the memory of the line to buffer as that is easiest to reason about for now. In a future patch we may want to decrease the memory usage by not duplicating all output for buffering but rather we may want to store offsets into the file or in case of hunk descriptions such as the similarity score, we could just store the relevant number and reproduce the text later on. This approach was chosen as a first step because it is quite simple compared to the alternative with less memory footprint. emit_diff_symbol factors out the emission part and depending on the diff_options->emitted_symbols the emission will be performed directly when calling emit_diff_symbol or after the whole process is done, i.e. by buffering we have add the possibility for a second pass over the whole output before doing the actual output. In 6440d34 (2012-03-14, diff: tweak a _copy_ of diff_options with word-diff) we introduced a duplicate diff options struct for word emissions as we may have different regex settings in there. When buffering the output, we need to operate on just one buffer, so we have to copy back the emissions of the word buffer into the main buffer. Unconditionally enable output via buffer in this patch as it yields a great opportunity for testing, i.e. all the diff tests from the test suite pass without having reordering issues (i.e. only parts of the output got buffered, and we forgot to buffer other parts). The test suite passes, which gives confidence that we converted all functions to use emit_string for output. Signed-off-by: Stefan Beller <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 146fdb0 commit e6e045f

File tree

2 files changed

+109
-2
lines changed

2 files changed

+109
-2
lines changed

diff.c

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,47 @@ enum diff_symbol {
605605
#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
606606
#define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK)
607607

608+
/*
609+
* This struct is used when we need to buffer the output of the diff output.
610+
*
611+
* NEEDSWORK: Instead of storing a copy of the line, add an offset pointer
612+
* into the pre/post image file. This pointer could be a union with the
613+
* line pointer. By storing an offset into the file instead of the literal line,
614+
* we can decrease the memory footprint for the buffered output. At first we
615+
* may want to only have indirection for the content lines, but we could also
616+
* enhance the state for emitting prefabricated lines, e.g. the similarity
617+
* score line or hunk/file headers would only need to store a number or path
618+
* and then the output can be constructed later on depending on state.
619+
*/
620+
struct emitted_diff_symbol {
621+
const char *line;
622+
int len;
623+
int flags;
624+
enum diff_symbol s;
625+
};
626+
#define EMITTED_DIFF_SYMBOL_INIT {NULL}
627+
628+
struct emitted_diff_symbols {
629+
struct emitted_diff_symbol *buf;
630+
int nr, alloc;
631+
};
632+
#define EMITTED_DIFF_SYMBOLS_INIT {NULL, 0, 0}
633+
634+
static void append_emitted_diff_symbol(struct diff_options *o,
635+
struct emitted_diff_symbol *e)
636+
{
637+
struct emitted_diff_symbol *f;
638+
639+
ALLOC_GROW(o->emitted_symbols->buf,
640+
o->emitted_symbols->nr + 1,
641+
o->emitted_symbols->alloc);
642+
f = &o->emitted_symbols->buf[o->emitted_symbols->nr++];
643+
644+
memcpy(f, e, sizeof(struct emitted_diff_symbol));
645+
f->line = e->line ? xmemdupz(e->line, e->len) : NULL;
646+
}
647+
648+
608649
static void emit_line_ws_markup(struct diff_options *o,
609650
const char *set, const char *reset,
610651
const char *line, int len, char sign,
@@ -631,12 +672,18 @@ static void emit_line_ws_markup(struct diff_options *o,
631672
}
632673
}
633674

634-
static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
635-
const char *line, int len, unsigned flags)
675+
static void emit_diff_symbol_from_struct(struct diff_options *o,
676+
struct emitted_diff_symbol *eds)
636677
{
637678
static const char *nneof = " No newline at end of file\n";
638679
const char *context, *reset, *set, *meta, *fraginfo;
639680
struct strbuf sb = STRBUF_INIT;
681+
682+
enum diff_symbol s = eds->s;
683+
const char *line = eds->line;
684+
int len = eds->len;
685+
unsigned flags = eds->flags;
686+
640687
switch (s) {
641688
case DIFF_SYMBOL_NO_LF_EOF:
642689
context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -778,6 +825,17 @@ static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
778825
strbuf_release(&sb);
779826
}
780827

828+
static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
829+
const char *line, int len, unsigned flags)
830+
{
831+
struct emitted_diff_symbol e = {line, len, flags, s};
832+
833+
if (o->emitted_symbols)
834+
append_emitted_diff_symbol(o, &e);
835+
else
836+
emit_diff_symbol_from_struct(o, &e);
837+
}
838+
781839
void diff_emit_submodule_del(struct diff_options *o, const char *line)
782840
{
783841
emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_DEL, line, strlen(line), 0);
@@ -1374,9 +1432,29 @@ static void diff_words_show(struct diff_words_data *diff_words)
13741432
/* In "color-words" mode, show word-diff of words accumulated in the buffer */
13751433
static void diff_words_flush(struct emit_callback *ecbdata)
13761434
{
1435+
struct diff_options *wo = ecbdata->diff_words->opt;
1436+
13771437
if (ecbdata->diff_words->minus.text.size ||
13781438
ecbdata->diff_words->plus.text.size)
13791439
diff_words_show(ecbdata->diff_words);
1440+
1441+
if (wo->emitted_symbols) {
1442+
struct diff_options *o = ecbdata->opt;
1443+
struct emitted_diff_symbols *wol = wo->emitted_symbols;
1444+
int i;
1445+
1446+
/*
1447+
* NEEDSWORK:
1448+
* Instead of appending each, concat all words to a line?
1449+
*/
1450+
for (i = 0; i < wol->nr; i++)
1451+
append_emitted_diff_symbol(o, &wol->buf[i]);
1452+
1453+
for (i = 0; i < wol->nr; i++)
1454+
free((void *)wol->buf[i].line);
1455+
1456+
wol->nr = 0;
1457+
}
13801458
}
13811459

13821460
static void diff_filespec_load_driver(struct diff_filespec *one)
@@ -1412,6 +1490,11 @@ static void init_diff_words_data(struct emit_callback *ecbdata,
14121490
xcalloc(1, sizeof(struct diff_words_data));
14131491
ecbdata->diff_words->type = o->word_diff;
14141492
ecbdata->diff_words->opt = o;
1493+
1494+
if (orig_opts->emitted_symbols)
1495+
o->emitted_symbols =
1496+
xcalloc(1, sizeof(struct emitted_diff_symbols));
1497+
14151498
if (!o->word_regex)
14161499
o->word_regex = userdiff_word_regex(one);
14171500
if (!o->word_regex)
@@ -1446,6 +1529,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
14461529
{
14471530
if (ecbdata->diff_words) {
14481531
diff_words_flush(ecbdata);
1532+
free (ecbdata->diff_words->opt->emitted_symbols);
14491533
free (ecbdata->diff_words->opt);
14501534
free (ecbdata->diff_words->minus.text.ptr);
14511535
free (ecbdata->diff_words->minus.orig);
@@ -4996,16 +5080,37 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc)
49965080
static void diff_flush_patch_all_file_pairs(struct diff_options *o)
49975081
{
49985082
int i;
5083+
static struct emitted_diff_symbols esm = EMITTED_DIFF_SYMBOLS_INIT;
49995084
struct diff_queue_struct *q = &diff_queued_diff;
50005085

50015086
if (WSEH_NEW & WS_RULE_MASK)
50025087
die("BUG: WS rules bit mask overlaps with diff symbol flags");
50035088

5089+
/*
5090+
* For testing purposes we want to make sure the diff machinery
5091+
* works completely with the buffer. If there is anything emitted
5092+
* outside the emit_string, then the order is screwed
5093+
* up and the tests will fail.
5094+
*
5095+
* TODO (later in this series):
5096+
* We'll unset this pointer in a later patch.
5097+
*/
5098+
o->emitted_symbols = &esm;
5099+
50045100
for (i = 0; i < q->nr; i++) {
50055101
struct diff_filepair *p = q->queue[i];
50065102
if (check_pair_status(p))
50075103
diff_flush_patch(p, o);
50085104
}
5105+
5106+
if (o->emitted_symbols) {
5107+
for (i = 0; i < esm.nr; i++)
5108+
emit_diff_symbol_from_struct(o, &esm.buf[i]);
5109+
5110+
for (i = 0; i < esm.nr; i++)
5111+
free((void *)esm.buf[i].line);
5112+
}
5113+
esm.nr = 0;
50095114
}
50105115

50115116
void diff_flush(struct diff_options *options)

diff.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ struct diff_options {
186186
void *output_prefix_data;
187187

188188
int diff_path_counter;
189+
190+
struct emitted_diff_symbols *emitted_symbols;
189191
};
190192

191193
void diff_emit_submodule_del(struct diff_options *o, const char *line);

0 commit comments

Comments
 (0)