Skip to content

Commit e435bc6

Browse files
committed
Makefile: Make the source archive build reproducible
The easy part was to figure the latest Git commit date and use it as the last modification date for all files in the archive. We also pass a few flags to tar(1), in particular to set the owner and group to 0. The hard part was again Elixir/Mix. Mix Hex component requires a cache file in order to build things offline (no idea why). This cache file is an ETS table. There two issues with this table: * It contains a timestamp of the last time the cache was updated. * `ets:tab2file()` produces a different file each time it's called, even though the entries are the same. So in order to be able to reproduce this cache file, the content of the ETS table is dumped as an Erlang term to a text file. Then, when RabbitMQ is compiled, the first step is to recreate the ETS table from that Erlang term.
1 parent 40f3e9f commit e435bc6

File tree

1 file changed

+115
-8
lines changed

1 file changed

+115
-8
lines changed

Makefile

Lines changed: 115 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,59 @@ DEP_PLUGINS = rabbit_common/mk/rabbitmq-dist.mk \
2525
ERLANG_MK_REPO = https://github.com/rabbitmq/erlang.mk.git
2626
ERLANG_MK_COMMIT = rabbitmq-tmp
2727

28+
deps:: restore-hex-cache-ets-file
29+
2830
include rabbitmq-components.mk
2931
include erlang.mk
3032

33+
# --------------------------------------------------------------------
34+
# Mix Hex cache management.
35+
# --------------------------------------------------------------------
36+
37+
# We restore the initial Hex cache.ets file from an Erlang term created
38+
# at the time the source archive was prepared.
39+
#
40+
# See the `$(SOURCE_DIST)` recipe for the reason behind this step.
41+
42+
restore-hex-cache-ets-file: deps/.hex/cache.ets
43+
44+
deps/.hex/cache.ets: deps/.hex/cache.erl
45+
$(gen_verbose) $(call erlang,$(call restore_hex_cache_from_erl_term,$<,$@))
46+
47+
define restore_hex_cache_from_erl_term
48+
In = "$(1)",
49+
Out = "$(2)",
50+
{ok, [Props, Entries]} = file:consult(In),
51+
Name = proplists:get_value(name, Props),
52+
Type = proplists:get_value(type, Props),
53+
Access = proplists:get_value(protection, Props),
54+
NamedTable = proplists:get_bool(named_table, Props),
55+
Keypos = proplists:get_value(keypos, Props),
56+
Heir = proplists:get_value(heir, Props),
57+
ReadConc = proplists:get_bool(read_concurrency, Props),
58+
WriteConc = proplists:get_bool(write_concurrency, Props),
59+
Compressed = proplists:get_bool(compressed, Props),
60+
Options0 = [
61+
Type,
62+
Access,
63+
{keypos, Keypos},
64+
{heir, Heir},
65+
{read_concurrency, ReadConc},
66+
{write_concurrency, WriteConc}],
67+
Options1 = case NamedTable of
68+
true -> [named_table | Options0];
69+
false -> Options0
70+
end,
71+
Options2 = case Compressed of
72+
true -> [compressed | Options0];
73+
false -> Options0
74+
end,
75+
Tab = ets:new(Name, Options2),
76+
[true = ets:insert(Tab, Entry) || Entry <- Entries],
77+
ok = ets:tab2file(Tab, Out),
78+
init:stop().
79+
endef
80+
3181
# --------------------------------------------------------------------
3282
# Distribution.
3383
# --------------------------------------------------------------------
@@ -65,6 +115,7 @@ RSYNC_FLAGS += -a $(RSYNC_V) \
65115
--exclude '_build/' \
66116
--exclude 'cover/' \
67117
--exclude 'deps/' \
118+
--exclude 'doc/' \
68119
--exclude 'ebin/' \
69120
--exclude 'erl_crash.dump' \
70121
--exclude 'escript/' \
@@ -118,8 +169,9 @@ ZIP_V = $(ZIP_V_$(V))
118169
$(SOURCE_DIST): $(ERLANG_MK_RECURSIVE_DEPS_LIST)
119170
$(verbose) mkdir -p $(dir $@)
120171
$(gen_verbose) $(RSYNC) $(RSYNC_FLAGS) ./ $@/
121-
$(verbose) echo "$(PROJECT_DESCRIPTION) $(PROJECT_VERSION)" > $@/git-revisions.txt
122-
$(verbose) echo "$(PROJECT) $$(git rev-parse HEAD) $$(git describe --tags --exact-match 2>/dev/null || git symbolic-ref -q --short HEAD)" >> $@/git-revisions.txt
172+
$(verbose) echo "$(PROJECT_DESCRIPTION) $(PROJECT_VERSION)" > "$@/git-revisions.txt"
173+
$(verbose) echo "$(PROJECT) $$(git rev-parse HEAD) $$(git describe --tags --exact-match 2>/dev/null || git symbolic-ref -q --short HEAD)" >> "$@/git-revisions.txt"
174+
$(verbose) echo "$$(TZ= git --no-pager log -n 1 --format='%cd' --date='format-local:%Y%m%d%H%M%S')" > "$@.git-times.txt"
123175
$(verbose) cat packaging/common/LICENSE.head > $@/LICENSE
124176
$(verbose) mkdir -p $@/deps/licensing
125177
$(verbose) set -e; for dep in $$(cat $(ERLANG_MK_RECURSIVE_DEPS_LIST) | LC_COLLATE=C sort); do \
@@ -152,7 +204,10 @@ $(SOURCE_DIST): $(ERLANG_MK_RECURSIVE_DEPS_LIST)
152204
find "$$dep" -maxdepth 1 -name 'LICENSE-*' -exec cp '{}' $@/deps/licensing \; ; \
153205
(cd $$dep; \
154206
echo "$$(basename "$$dep") $$(git rev-parse HEAD) $$(git describe --tags --exact-match 2>/dev/null || git symbolic-ref -q --short HEAD)") \
155-
>> $@/git-revisions.txt; \
207+
>> "$@/git-revisions.txt"; \
208+
(cd $$dep; \
209+
echo "$$(env TZ= git --no-pager log -n 1 --format='%cd' --date='format-local:%Y%m%d%H%M.%S')") \
210+
>> "$@.git-times.txt"; \
156211
done
157212
$(verbose) cat packaging/common/LICENSE.tail >> $@/LICENSE
158213
$(verbose) find $@/deps/licensing -name 'LICENSE-*' -exec cp '{}' $@ \;
@@ -164,27 +219,79 @@ $(SOURCE_DIST): $(ERLANG_MK_RECURSIVE_DEPS_LIST)
164219
rm $$file.bak; \
165220
done
166221
$(verbose) echo "PLUGINS := $(PLUGINS)" > $@/plugins.mk
222+
# Remember the latest Git timestamp.
223+
$(verbose) sort -r < "[email protected]" | head -n 1 > "[email protected]"
224+
$(verbose) rm "[email protected]"
225+
# Mix Hex component requires a cache file, otherwise it refuses to build
226+
# offline... That cache is an ETS table with all the applications we
227+
# depend on, plus some versioning informations and checksums. There
228+
# are two problems with that: the table contains a date (`last_update`
229+
# field) and `ets:tab2file()` produces a different file each time it's
230+
# called.
231+
#
232+
# To make our source archive reproducible, we fix the time of the
233+
# `last_update` field to the last Git commit and dump the content of the
234+
# table as an Erlang term to a text file.
235+
#
236+
# The ETS file must be recreated before compiling RabbitMQ. See the
237+
# `restore-hex-cache-ets-file` Make target.
238+
$(verbose) $(call erlang,$(call dump_hex_cache_to_erl_term,$@,[email protected]))
239+
# Fix file timestamps to have reproducible source archives.
240+
$(verbose) find $@ -print0 | xargs -0 touch -t "$$(cat "[email protected]")"
241+
$(verbose) rm "[email protected]"
242+
243+
define dump_hex_cache_to_erl_term
244+
In = "$(1)/deps/.hex/cache.ets",
245+
Out = "$(1)/deps/.hex/cache.erl",
246+
{ok, DateStr} = file:read_file("$(2)"),
247+
{match, Date} = re:run(DateStr,
248+
"^([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})\.([0-9]{2})",
249+
[{capture, all_but_first, list}]),
250+
[Year, Month, Day, Hour, Min, Sec] = [erlang:list_to_integer(V) || V <- Date],
251+
{ok, Tab} = ets:file2tab(In),
252+
true = ets:insert(Tab, {last_update, {{Year, Month, Day}, {Hour, Min, Sec}}}),
253+
Props = [
254+
Prop
255+
|| {Key, _} = Prop <- ets:info(Tab),
256+
Key =:= name orelse
257+
Key =:= type orelse
258+
Key =:= protection orelse
259+
Key =:= named_table orelse
260+
Key =:= keypos orelse
261+
Key =:= heir orelse
262+
Key =:= read_concurrency orelse
263+
Key =:= write_concurrency orelse
264+
Key =:= compressed],
265+
Entries = ets:tab2list(Tab),
266+
ok = file:write_file(Out, io_lib:format("~p.~n~p.~n", [Props, Entries])),
267+
ok = file:delete(In),
268+
init:stop().
269+
endef
167270

168271
$(SOURCE_DIST).manifest: $(SOURCE_DIST)
169272
$(gen_verbose) cd $(dir $(SOURCE_DIST)) && \
170273
find $(notdir $(SOURCE_DIST)) | LC_COLLATE=C sort > $@
171274

172-
# TODO: Fix file timestamps to have reproducible source archives.
173-
# $(verbose) find $@ -not -name 'git-revisions.txt' -print0 | xargs -0 touch -r $@/git-revisions.txt
275+
TAR_FLAGS_FOR_REPRODUCIBLE_BUILDS = --uid 0 \
276+
--gid 0 \
277+
--numeric-owner \
278+
--no-acls \
279+
--no-fflags \
280+
--no-xattrs
174281

175282
$(SOURCE_DIST).tar.gz: $(SOURCE_DIST).manifest
176283
$(gen_verbose) cd $(dir $(SOURCE_DIST)) && \
177-
$(TAR) $(TAR_V) --no-recursion -T $(SOURCE_DIST).manifest -cf - | \
284+
$(TAR) $(TAR_V) $(TAR_FLAGS_FOR_REPRODUCIBLE_BUILDS) --no-recursion -T $(SOURCE_DIST).manifest -cf - | \
178285
$(GZIP) --best > $@
179286

180287
$(SOURCE_DIST).tar.bz2: $(SOURCE_DIST).manifest
181288
$(gen_verbose) cd $(dir $(SOURCE_DIST)) && \
182-
$(TAR) $(TAR_V) --no-recursion -T $(SOURCE_DIST).manifest -cf - | \
289+
$(TAR) $(TAR_V) $(TAR_FLAGS_FOR_REPRODUCIBLE_BUILDS) --no-recursion -T $(SOURCE_DIST).manifest -cf - | \
183290
$(BZIP2) > $@
184291

185292
$(SOURCE_DIST).tar.xz: $(SOURCE_DIST).manifest
186293
$(gen_verbose) cd $(dir $(SOURCE_DIST)) && \
187-
$(TAR) $(TAR_V) --no-recursion -T $(SOURCE_DIST).manifest -cf - | \
294+
$(TAR) $(TAR_V) $(TAR_FLAGS_FOR_REPRODUCIBLE_BUILDS) --no-recursion -T $(SOURCE_DIST).manifest -cf - | \
188295
$(XZ) > $@
189296

190297
$(SOURCE_DIST).zip: $(SOURCE_DIST).manifest

0 commit comments

Comments
 (0)