diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 2db16334252271..ab8b485b0d2464 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -576,12 +576,29 @@ core.whitespace:: errors. The default tab width is 8. Allowed values are 1 to 63. core.fsyncObjectFiles:: - This boolean will enable 'fsync()' when writing object files. -+ -This is a total waste of time and effort on a filesystem that orders -data writes properly, but can be useful for filesystems that do not use -journalling (traditional UNIX filesystems) or that only journal metadata -and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback"). + A value indicating the level of effort Git will expend in + trying to make objects added to the repo durable in the event + of an unclean system shutdown. This setting currently only + controls loose objects in the object store, so updates to any + refs or the index may not be equally durable. ++ +* `false` allows data to remain in file system caches according to + operating system policy, whence it may be lost if the system loses power + or crashes. +* `true` triggers a data integrity flush for each loose object added to the + object store. This is the safest setting that is likely to ensure durability + across all operating systems and file systems that honor the 'fsync' system + call. However, this setting comes with a significant performance cost on + common hardware. Git does not currently fsync parent directories for + newly-added files, so some filesystems may still allow data to be lost on + system crash. +* `batch` enables an experimental mode that uses interfaces available in some + operating systems to write loose object data with a minimal set of FLUSH + CACHE (or equivalent) commands sent to the storage controller. If the + operating system interfaces are not available, this mode behaves the same as + `true`. This mode is expected to be as safe as `true` on macOS for repos + stored on HFS+ or APFS filesystems and on Windows for repos stored on NTFS or + ReFS. core.preloadIndex:: Enable parallel index preload for operations like 'git diff' diff --git a/Makefile b/Makefile index f54d62d5562d91..2d9c4f0dbf36ad 100644 --- a/Makefile +++ b/Makefile @@ -406,6 +406,8 @@ all:: # # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC. # +# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range. +# # Define NEEDS_LIBRT if your platform requires linking with librt (glibc version # before 2.17) for clock_gettime and CLOCK_MONOTONIC. # @@ -1913,6 +1915,10 @@ ifdef HAVE_CLOCK_MONOTONIC BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC endif +ifdef HAVE_SYNC_FILE_RANGE + BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE +endif + ifdef NEEDS_LIBRT EXTLIBS += -lrt endif diff --git a/builtin/prune.c b/builtin/prune.c index 847864dbe50bfb..169d47784a22cc 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -19,6 +19,7 @@ static int show_only; static int verbose; static timestamp_t expire; static int show_progress = -1; +static struct strbuf remove_dir_buf = STRBUF_INIT; static int prune_tmp_file(const char *fullpath) { @@ -27,10 +28,20 @@ static int prune_tmp_file(const char *fullpath) return error("Could not stat '%s'", fullpath); if (st.st_mtime > expire) return 0; - if (show_only || verbose) - printf("Removing stale temporary file %s\n", fullpath); - if (!show_only) - unlink_or_warn(fullpath); + if (S_ISDIR(st.st_mode)) { + if (show_only || verbose) + printf("Removing stale temporary directory %s\n", fullpath); + if (!show_only) { + strbuf_reset(&remove_dir_buf); + strbuf_addstr(&remove_dir_buf, fullpath); + remove_dir_recursively(&remove_dir_buf, 0); + } + } else { + if (show_only || verbose) + printf("Removing stale temporary file %s\n", fullpath); + if (!show_only) + unlink_or_warn(fullpath); + } return 0; } @@ -98,6 +109,9 @@ static int prune_cruft(const char *basename, const char *path, void *data) static int prune_subdir(unsigned int nr, const char *path, void *data) { + if (verbose) + printf("Removing directory %s\n", path); + if (!show_only) rmdir(path); return 0; @@ -188,5 +202,6 @@ int cmd_prune(int argc, const char **argv, const char *prefix) prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0); } + strbuf_release(&remove_dir_buf); return 0; } diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 2d1f97e1ca7b53..34626b978e5952 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2211,7 +2211,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) strvec_push(&child.args, alt_shallow_file); } - tmp_objdir = tmp_objdir_create(); + tmp_objdir = tmp_objdir_create("incoming"); if (!tmp_objdir) { if (err_fd > 0) close(err_fd); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4a9466295ba10d..51eb4f7b531aab 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -1,5 +1,6 @@ #include "builtin.h" #include "cache.h" +#include "bulk-checkin.h" #include "config.h" #include "object-store.h" #include "object.h" @@ -503,10 +504,12 @@ static void unpack_all(void) if (!quiet) progress = start_progress(_("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); + plug_bulk_checkin(); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } + unplug_bulk_checkin(); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index 3f1ed955498f33..62f35cd6e51320 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -5,6 +5,7 @@ */ #define USE_THE_INDEX_COMPATIBILITY_MACROS #include "cache.h" +#include "bulk-checkin.h" #include "config.h" #include "lockfile.h" #include "quote.h" @@ -1088,6 +1089,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) the_index.updated_skipworktree = 1; + /* we might be adding many objects to the object database */ + plug_bulk_checkin(); + /* * Custom copy of parse_options() because we want to handle * filename arguments as they come. @@ -1168,6 +1172,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) strbuf_release(&buf); } + /* by now we must have added all of the new objects */ + unplug_bulk_checkin(); if (split_index > 0) { if (git_config_get_split_index() == 0) warning(_("core.splitIndex is set to false; " diff --git a/bulk-checkin.c b/bulk-checkin.c index 8785b2ac806992..4deee1af46e946 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -3,16 +3,22 @@ */ #include "cache.h" #include "bulk-checkin.h" +#include "lockfile.h" #include "repository.h" #include "csum-file.h" #include "pack.h" #include "strbuf.h" +#include "string-list.h" +#include "tmp-objdir.h" #include "packfile.h" #include "object-store.h" -static struct bulk_checkin_state { - unsigned plugged:1; +static int bulk_checkin_plugged; +static int needs_batch_fsync; + +static struct tmp_objdir *bulk_fsync_objdir; +static struct bulk_checkin_state { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -21,7 +27,7 @@ static struct bulk_checkin_state { struct pack_idx_entry **written; uint32_t alloc_written; uint32_t nr_written; -} state; +} bulk_checkin_state; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -79,6 +85,34 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state) reprepare_packed_git(the_repository); } +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void do_batch_fsync(void) +{ + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_bulk_checkin has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware. + */ + + if (needs_batch_fsync) { + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory()); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + } + + if (bulk_fsync_objdir) + tmp_objdir_migrate(bulk_fsync_objdir); +} + static int already_written(struct bulk_checkin_state *state, struct object_id *oid) { int i; @@ -273,25 +307,61 @@ static int deflate_to_pack(struct bulk_checkin_state *state, return 0; } +void fsync_loose_object_bulk_checkin(int fd) +{ + assert(fsync_object_files == FSYNC_OBJECT_FILES_BATCH); + + /* + * If we have a plugged bulk checkin, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before as part of do_batch_fsync. + */ + if (bulk_checkin_plugged && + git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) { + if (!needs_batch_fsync) + needs_batch_fsync = 1; + } else { + fsync_or_die(fd, "loose object file"); + } +} + int index_bulk_checkin(struct object_id *oid, int fd, size_t size, enum object_type type, const char *path, unsigned flags) { - int status = deflate_to_pack(&state, oid, fd, size, type, + int status = deflate_to_pack(&bulk_checkin_state, oid, fd, size, type, path, flags); - if (!state.plugged) - finish_bulk_checkin(&state); + if (!bulk_checkin_plugged) + finish_bulk_checkin(&bulk_checkin_state); return status; } void plug_bulk_checkin(void) { - state.plugged = 1; + assert(!bulk_checkin_plugged); + + /* + * A temporary object directory is used to hold the files + * while they are not fsynced. + */ + if (fsync_object_files == FSYNC_OBJECT_FILES_BATCH) { + bulk_fsync_objdir = tmp_objdir_create("bulk-fsync"); + if (!bulk_fsync_objdir) + die(_("Could not create temporary object directory for core.fsyncobjectfiles=batch")); + + tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); + } + + bulk_checkin_plugged = 1; } void unplug_bulk_checkin(void) { - state.plugged = 0; - if (state.f) - finish_bulk_checkin(&state); + assert(bulk_checkin_plugged); + bulk_checkin_plugged = 0; + if (bulk_checkin_state.f) + finish_bulk_checkin(&bulk_checkin_state); + + do_batch_fsync(); } diff --git a/bulk-checkin.h b/bulk-checkin.h index b26f3dc3b74cb9..08f292379b6cf9 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -6,6 +6,8 @@ #include "cache.h" +void fsync_loose_object_bulk_checkin(int fd); + int index_bulk_checkin(struct object_id *oid, int fd, size_t size, enum object_type type, const char *path, unsigned flags); diff --git a/cache.h b/cache.h index ffe45a7b403dba..bd20bfbf054c42 100644 --- a/cache.h +++ b/cache.h @@ -984,7 +984,13 @@ void reset_shared_repository(void); extern int read_replace_refs; extern char *git_replace_ref_base; -extern int fsync_object_files; +enum fsync_object_files_mode { + FSYNC_OBJECT_FILES_OFF, + FSYNC_OBJECT_FILES_ON, + FSYNC_OBJECT_FILES_BATCH +}; + +extern enum fsync_object_files_mode fsync_object_files; extern int core_preload_index; extern int precomposed_unicode; extern int protect_hfs; diff --git a/compat/mingw.c b/compat/mingw.c index a56fb6cb35aa8f..5185289ff5a5d3 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1239,8 +1239,11 @@ char *mingw_mktemp(char *template) int offset = 0; /* we need to return the path, thus no long paths here! */ - if (xutftowcs_path(wtemplate, template) < 0) + if (xutftowcsn(wtemplate, template, MAX_PATH, -1) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; return NULL; + } if (is_dir_sep(template[0]) && !is_dir_sep(template[1]) && iswalpha(wtemplate[0]) && wtemplate[1] == L':') { @@ -3754,7 +3757,7 @@ int wmain(int argc, const wchar_t **wargv) maybe_redirect_std_handles(); adjust_symlink_flags(); - fsync_object_files = 1; + fsync_object_files = FSYNC_OBJECT_FILES_ON; /* determine size of argv and environ conversion buffer */ maxlen = wcslen(wargv[0]); diff --git a/compat/mingw.h b/compat/mingw.h index 25bdada0d57b8a..31d35be2d90d9c 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -332,6 +332,9 @@ int mingw_getpagesize(void); #define getpagesize mingw_getpagesize #endif +int win32_fsync_no_flush(int fd); +#define fsync_no_flush win32_fsync_no_flush + struct rlimit { unsigned int rlim_cur; }; diff --git a/compat/win32/flush.c b/compat/win32/flush.c new file mode 100644 index 00000000000000..75324c24ee7cef --- /dev/null +++ b/compat/win32/flush.c @@ -0,0 +1,28 @@ +#include "../../git-compat-util.h" +#include +#include "lazyload.h" + +int win32_fsync_no_flush(int fd) +{ + IO_STATUS_BLOCK io_status; + +#define FLUSH_FLAGS_FILE_DATA_ONLY 1 + + DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NtFlushBuffersFileEx, + HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize, + PIO_STATUS_BLOCK IoStatusBlock); + + if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) { + errno = ENOSYS; + return -1; + } + + memset(&io_status, 0, sizeof(io_status)); + if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY, + NULL, 0, &io_status)) { + errno = EINVAL; + return -1; + } + + return 0; +} diff --git a/config.c b/config.c index a065b2f8a6d0a5..0e24476e5e97bc 100644 --- a/config.c +++ b/config.c @@ -1506,7 +1506,12 @@ int git_default_core_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "core.fsyncobjectfiles")) { - fsync_object_files = git_config_bool(var, value); + if (value && !strcmp(value, "batch")) + fsync_object_files = FSYNC_OBJECT_FILES_BATCH; + else if (git_config_bool(var, value)) + fsync_object_files = FSYNC_OBJECT_FILES_ON; + else + fsync_object_files = FSYNC_OBJECT_FILES_OFF; return 0; } diff --git a/config.mak.uname b/config.mak.uname index 5deac1b450c2a4..faa18081d85cf9 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -53,6 +53,7 @@ ifeq ($(uname_S),Linux) HAVE_CLOCK_MONOTONIC = YesPlease # -lrt is needed for clock_gettime on glibc <= 2.16 NEEDS_LIBRT = YesPlease + HAVE_SYNC_FILE_RANGE = YesPlease HAVE_GETDELIM = YesPlease SANE_TEXT_GREP=-a FREAD_READS_DIRECTORIES = UnfortunatelyYes @@ -466,6 +467,7 @@ endif CFLAGS = BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE COMPAT_OBJS = compat/msvc.o compat/winansi.o \ + compat/win32/flush.o \ compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/trace2_win32_process_info.o \ @@ -651,6 +653,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ compat/win32/trace2_win32_process_info.o \ + compat/win32/flush.o \ compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/dirent.o compat/win32/fscache.o diff --git a/configure.ac b/configure.ac index 031e8d3fee8f22..c711037d625da7 100644 --- a/configure.ac +++ b/configure.ac @@ -1090,6 +1090,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC], [AC_MSG_RESULT([no]) HAVE_CLOCK_MONOTONIC=]) GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC]) + +# +# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available. +GIT_CHECK_FUNC(sync_file_range, + [HAVE_SYNC_FILE_RANGE=YesPlease], + [HAVE_SYNC_FILE_RANGE]) +GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE]) + # # Define NO_SETITIMER if you don't have setitimer. GIT_CHECK_FUNC(setitimer, diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index cc68687c908e74..9663e3dcf30444 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -286,7 +286,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0 USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET) - list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c compat/win32/path-utils.c + list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c + compat/win32/flush.c compat/win32/path-utils.c compat/win32/pthread.c compat/win32mmap.c compat/win32/syslog.c compat/win32/trace2_win32_process_info.c compat/win32/dirent.c compat/nedmalloc/nedmalloc.c compat/strdup.c compat/win32/fscache.c) diff --git a/environment.c b/environment.c index 70214cf1b5d774..32c7b689184279 100644 --- a/environment.c +++ b/environment.c @@ -17,6 +17,7 @@ #include "commit.h" #include "strvec.h" #include "object-store.h" +#include "tmp-objdir.h" #include "chdir-notify.h" #include "shallow.h" @@ -42,7 +43,7 @@ const char *git_attributes_file; const char *git_hooks_path; int zlib_compression_level = Z_BEST_SPEED; int pack_compression_level = Z_DEFAULT_COMPRESSION; -int fsync_object_files; +enum fsync_object_files_mode fsync_object_files; size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE; size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT; size_t delta_base_cache_limit = 96 * 1024 * 1024; @@ -175,6 +176,10 @@ void setup_git_env(const char *git_dir) args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); + if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { + args.disable_ref_updates = 1; + } + repo_set_gitdir(the_repository, git_dir, &args); strvec_clear(&to_free); @@ -339,10 +344,14 @@ static void update_relative_gitdir(const char *name, void *data) { char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir()); + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); + set_git_dir_1(path); + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); } diff --git a/git-compat-util.h b/git-compat-util.h index f03ba605aef5cf..29563afcdb34d9 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1231,6 +1231,13 @@ __attribute__((format (printf, 1, 2))) NORETURN void BUG(const char *fmt, ...); #endif +enum fsync_action { + FSYNC_WRITEOUT_ONLY, + FSYNC_HARDWARE_FLUSH +}; + +int git_fsync(int fd, enum fsync_action action); + /* * Preserves errno, prints a message, but gives no warning for ENOENT. * Returns 0 on success, which includes trying to unlink an object that does diff --git a/object-file.c b/object-file.c index a8be8994814933..9aec225c68cf88 100644 --- a/object-file.c +++ b/object-file.c @@ -750,6 +750,49 @@ void add_to_alternates_memory(const char *reference) '\n', NULL, 0); } +struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy) +{ + struct object_directory *new_odb; + + /* + * Make sure alternates are initialized, or else our entry may be + * overwritten when they are. + */ + prepare_alt_odb(the_repository); + + /* + * Make a new primary odb and link the old primary ODB in as an + * alternate + */ + new_odb = xcalloc(1, sizeof(*new_odb)); + new_odb->path = xstrdup(dir); + + /* + * Disable ref updates while a temporary odb is active, since + * the objects in the database may roll back. + */ + new_odb->disable_ref_updates = 1; + new_odb->will_destroy = will_destroy; + new_odb->next = the_repository->objects->odb; + the_repository->objects->odb = new_odb; + return new_odb->next; +} + +void restore_primary_odb(struct object_directory *restore_odb, const char *old_path) +{ + struct object_directory *cur_odb = the_repository->objects->odb; + + if (strcmp(old_path, cur_odb->path)) + BUG("expected %s as primary object store; found %s", + old_path, cur_odb->path); + + if (cur_odb->next != restore_odb) + BUG("we expect the old primary object store to be the first alternate"); + + the_repository->objects->odb = restore_odb; + free_object_directory(cur_odb); +} + /* * Compute the exact path an alternate is at and returns it. In case of * error NULL is returned and the human readable error is added to `err` @@ -1862,8 +1905,21 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf, /* Finalize a file on disk, and close it. */ static void close_loose_object(int fd) { - if (fsync_object_files) - fsync_or_die(fd, "loose object file"); + if (!the_repository->objects->odb->will_destroy) { + switch (fsync_object_files) { + case FSYNC_OBJECT_FILES_OFF: + break; + case FSYNC_OBJECT_FILES_ON: + fsync_or_die(fd, "loose object file"); + break; + case FSYNC_OBJECT_FILES_BATCH: + fsync_loose_object_bulk_checkin(fd); + break; + default: + BUG("Invalid fsync_object_files mode."); + } + } + if (close(fd) != 0) die_errno(_("error when closing loose object file")); } diff --git a/object-store.h b/object-store.h index d24915ced1b2dd..b205aad8438983 100644 --- a/object-store.h +++ b/object-store.h @@ -26,6 +26,18 @@ struct object_directory { uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ struct oidtree *loose_objects_cache; + /* + * This is a temporary object store created by the tmp_objdir + * facility. Disable ref updates since the objects in the store + * might be discarded on rollback. + */ + unsigned int disable_ref_updates : 1; + + /* + * This object store is ephemeral, so there is no need to fsync. + */ + unsigned int will_destroy : 1; + /* * Path to the alternative object store. If this is a relative path, * it is relative to the current working directory. @@ -56,6 +68,17 @@ void add_to_alternates_file(const char *dir); */ void add_to_alternates_memory(const char *dir); +/* + * Replace the current writable object directory with the specified temporary + * object directory; returns the former primary object directory. + */ +struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy); + +/* + * Restore a previous ODB replaced by set_temporary_main_odb. + */ +void restore_primary_odb(struct object_directory *restore_odb, const char *old_path); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,6 +89,9 @@ struct oidtree *odb_loose_cache(struct object_directory *odb, /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct object_directory *odb); +/* Clear and free the specified object directory */ +void free_object_directory(struct object_directory *odb); + struct packed_git { struct hashmap_entry packmap_ent; struct packed_git *next; diff --git a/object.c b/object.c index 4e85955a941168..98635bc4043625 100644 --- a/object.c +++ b/object.c @@ -513,7 +513,7 @@ struct raw_object_store *raw_object_store_new(void) return o; } -static void free_object_directory(struct object_directory *odb) +void free_object_directory(struct object_directory *odb) { free(odb->path); odb_clear_loose_cache(odb); diff --git a/refs.c b/refs.c index db8ae87ca79ab4..12fff45e4d7693 100644 --- a/refs.c +++ b/refs.c @@ -2126,7 +2126,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction, break; } - if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { + if (the_repository->objects->odb->disable_ref_updates) { strbuf_addstr(err, _("ref updates forbidden inside quarantine environment")); return -1; diff --git a/repository.c b/repository.c index b2bf44c6fafec8..0a7b3c30c6cad9 100644 --- a/repository.c +++ b/repository.c @@ -80,6 +80,8 @@ void repo_set_gitdir(struct repository *repo, expand_base_dir(&repo->objects->odb->path, o->object_dir, repo->commondir, "objects"); + repo->objects->odb->disable_ref_updates = o->disable_ref_updates; + free(repo->objects->alternate_db); repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); expand_base_dir(&repo->graft_file, o->graft_file, diff --git a/repository.h b/repository.h index fdc0a818b83c08..2e10594f895474 100644 --- a/repository.h +++ b/repository.h @@ -165,6 +165,7 @@ struct set_gitdir_args { const char *graft_file; const char *index_file; const char *alternate_db; + int disable_ref_updates; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/t/lib-unique-files.sh b/t/lib-unique-files.sh new file mode 100644 index 00000000000000..a7de4ca8512ac5 --- /dev/null +++ b/t/lib-unique-files.sh @@ -0,0 +1,36 @@ +# Helper to create files with unique contents + + +# Create multiple files with unique contents. Takes the number of +# directories, the number of files in each directory, and the base +# directory. +# +# test_create_unique_files 2 3 my_dir -- Creates 2 directories with 3 files +# each in my_dir, all with unique +# contents. + +test_create_unique_files() { + test "$#" -ne 3 && BUG "3 param" + + local dirs=$1 + local files=$2 + local basedir=$3 + local counter=0 + test_tick + local basedata=$test_tick + + + rm -rf $basedir + + for i in $(test_seq $dirs) + do + local dir=$basedir/dir$i + + mkdir -p "$dir" + for j in $(test_seq $files) + do + counter=$((counter + 1)) + echo "$basedata.$counter" >"$dir/file$j.txt" + done + done +} diff --git a/t/perf/p3700-add.sh b/t/perf/p3700-add.sh new file mode 100755 index 00000000000000..e93c08a2e70a8f --- /dev/null +++ b/t/perf/p3700-add.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# +# This test measures the performance of adding new files to the object database +# and index. The test was originally added to measure the effect of the +# core.fsyncObjectFiles=batch mode, which is why we are testing different values +# of that setting explicitly and creating a lot of unique objects. + +test_description="Tests performance of add" + +. ./perf-lib.sh + +. $TEST_DIRECTORY/lib-unique-files.sh + +test_perf_default_repo +test_checkout_worktree + +dir_count=10 +files_per_dir=50 +total_files=$((dir_count * files_per_dir)) + +# We need to create the files each time we run the perf test, but +# we do not want to measure the cost of creating the files, so run +# the tet once. +if test "${GIT_PERF_REPEAT_COUNT-1}" -ne 1 +then + echo "warning: Setting GIT_PERF_REPEAT_COUNT=1" >&2 + GIT_PERF_REPEAT_COUNT=1 +fi + +for m in false true batch +do + test_expect_success "create the files for core.fsyncObjectFiles=$m" ' + git reset --hard && + # create files across directories + test_create_unique_files $dir_count $files_per_dir files + ' + + test_perf "add $total_files files (core.fsyncObjectFiles=$m)" " + git -c core.fsyncobjectfiles=$m add files + " +done + +test_done diff --git a/t/perf/p3900-stash.sh b/t/perf/p3900-stash.sh new file mode 100755 index 00000000000000..c9fcd0c03eb9b7 --- /dev/null +++ b/t/perf/p3900-stash.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# +# This test measures the performance of adding new files to the object database +# and index. The test was originally added to measure the effect of the +# core.fsyncObjectFiles=batch mode, which is why we are testing different values +# of that setting explicitly and creating a lot of unique objects. + +test_description="Tests performance of stash" + +. ./perf-lib.sh + +. $TEST_DIRECTORY/lib-unique-files.sh + +test_perf_default_repo +test_checkout_worktree + +dir_count=10 +files_per_dir=50 +total_files=$((dir_count * files_per_dir)) + +# We need to create the files each time we run the perf test, but +# we do not want to measure the cost of creating the files, so run +# the tet once. +if test "${GIT_PERF_REPEAT_COUNT-1}" -ne 1 +then + echo "warning: Setting GIT_PERF_REPEAT_COUNT=1" >&2 + GIT_PERF_REPEAT_COUNT=1 +fi + +for m in false true batch +do + test_expect_success "create the files for core.fsyncObjectFiles=$m" ' + git reset --hard && + # create files across directories + test_create_unique_files $dir_count $files_per_dir files + ' + + # We only stash files in the 'files' subdirectory since + # the perf test infrastructure creates files in the + # current working directory that need to be preserved + test_perf "stash 500 files (core.fsyncObjectFiles=$m)" " + git -c core.fsyncobjectfiles=$m stash push -u -- files + " +done + +test_done diff --git a/t/t3700-add.sh b/t/t3700-add.sh index 717653f9d8e699..be88f3783292aa 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -7,6 +7,8 @@ test_description='Test of git add, including the -- option.' . ./test-lib.sh +. $TEST_DIRECTORY/lib-unique-files.sh + # Test the file mode "$1" of the file "$2" in the index. test_mode_in_index () { case "$(git ls-files -s "$2")" in @@ -33,6 +35,24 @@ test_expect_success \ 'Test that "git add -- -q" works' \ 'touch -- -q && git add -- -q' +test_expect_success 'git add: core.fsyncobjectfiles=batch' " + test_create_unique_files 2 4 fsync-files && + git -c core.fsyncobjectfiles=batch add -- ./fsync-files/ && + rm -f fsynced_files && + git ls-files --stage fsync-files/ > fsynced_files && + test_line_count = 8 fsynced_files && + awk -- '{print \$2}' fsynced_files | xargs -n1 git cat-file -e +" + +test_expect_success 'git update-index: core.fsyncobjectfiles=batch' " + test_create_unique_files 2 4 fsync-files2 && + find fsync-files2 ! -type d -print | xargs git -c core.fsyncobjectfiles=batch update-index --add -- && + rm -f fsynced_files2 && + git ls-files --stage fsync-files2/ > fsynced_files2 && + test_line_count = 8 fsynced_files2 && + awk -- '{print \$2}' fsynced_files2 | xargs -n1 git cat-file -e +" + test_expect_success \ 'git add: Test that executable bit is not used if core.filemode=0' \ 'git config core.filemode 0 && diff --git a/t/t3903-stash.sh b/t/t3903-stash.sh index 18eebf230cd128..c204cda2a0e2d9 100755 --- a/t/t3903-stash.sh +++ b/t/t3903-stash.sh @@ -9,6 +9,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. $TEST_DIRECTORY/lib-unique-files.sh diff_cmp () { for i in "$1" "$2" @@ -1293,6 +1294,19 @@ test_expect_success 'stash handles skip-worktree entries nicely' ' git rev-parse --verify refs/stash:A.t ' +test_expect_success 'stash with core.fsyncobjectfiles=batch' " + test_create_unique_files 2 4 fsync-files && + git -c core.fsyncobjectfiles=batch stash push -u -- ./fsync-files/ && + rm -f fsynced_files && + + # The files were untracked, so use the third parent, + # which contains the untracked files + git ls-tree -r stash^3 -- ./fsync-files/ > fsynced_files && + test_line_count = 8 fsynced_files && + awk -- '{print \$3}' fsynced_files | xargs -n1 git cat-file -e +" + + test_expect_success 'stash -c stash.useBuiltin=false warning ' ' expected="stash.useBuiltin support has been removed" && diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index e13a884207589d..38663dc1393654 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -162,23 +162,23 @@ test_expect_success 'pack-objects with bogus arguments' ' check_unpack () { test_when_finished "rm -rf git2" && - git init --bare git2 && - git -C git2 unpack-objects -n <"$1".pack && - git -C git2 unpack-objects <"$1".pack && - (cd .git && find objects -type f -print) | - while read path - do - cmp git2/$path .git/$path || { - echo $path differs. - return 1 - } - done + git $2 init --bare git2 && + ( + git $2 -C git2 unpack-objects -n <"$1".pack && + git $2 -C git2 unpack-objects <"$1".pack && + git $2 -C git2 cat-file --batch-check="%(objectname)" + ) current && + cmp obj-list current } test_expect_success 'unpack without delta' ' check_unpack test-1-${packname_1} ' +test_expect_success 'unpack without delta (core.fsyncobjectfiles=batch)' ' + check_unpack test-1-${packname_1} "-c core.fsyncobjectfiles=batch" +' + test_expect_success 'pack with REF_DELTA' ' packname_2=$(git pack-objects --progress test-2 stderr) && check_deltas stderr -gt 0 @@ -188,6 +188,10 @@ test_expect_success 'unpack with REF_DELTA' ' check_unpack test-2-${packname_2} ' +test_expect_success 'unpack with REF_DELTA (core.fsyncobjectfiles=batch)' ' + check_unpack test-2-${packname_2} "-c core.fsyncobjectfiles=batch" +' + test_expect_success 'pack with OFS_DELTA' ' packname_3=$(git pack-objects --progress --delta-base-offset test-3 \ stderr) && @@ -198,6 +202,10 @@ test_expect_success 'unpack with OFS_DELTA' ' check_unpack test-3-${packname_3} ' +test_expect_success 'unpack with OFS_DELTA (core.fsyncobjectfiles=batch)' ' + check_unpack test-3-${packname_3} "-c core.fsyncobjectfiles=batch" +' + test_expect_success 'compare delta flavors' ' perl -e '\'' defined($_ = -s $_) or die for @ARGV; diff --git a/tmp-objdir.c b/tmp-objdir.c index b8d880e3626a9c..3d38eeab66bfb0 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -1,5 +1,6 @@ #include "cache.h" #include "tmp-objdir.h" +#include "chdir-notify.h" #include "dir.h" #include "sigchain.h" #include "string-list.h" @@ -11,6 +12,8 @@ struct tmp_objdir { struct strbuf path; struct strvec env; + struct object_directory *prev_odb; + int will_destroy; }; /* @@ -38,6 +41,9 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal) if (t == the_tmp_objdir) the_tmp_objdir = NULL; + if (!on_signal && t->prev_odb) + restore_primary_odb(t->prev_odb, t->path.buf); + /* * This may use malloc via strbuf_grow(), but we should * have pre-grown t->path sufficiently so that this @@ -52,6 +58,7 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal) */ if (!on_signal) tmp_objdir_free(t); + return err; } @@ -121,7 +128,7 @@ static int setup_tmp_objdir(const char *root) return ret; } -struct tmp_objdir *tmp_objdir_create(void) +struct tmp_objdir *tmp_objdir_create(const char *prefix) { static int installed_handlers; struct tmp_objdir *t; @@ -129,11 +136,16 @@ struct tmp_objdir *tmp_objdir_create(void) if (the_tmp_objdir) BUG("only one tmp_objdir can be used at a time"); - t = xmalloc(sizeof(*t)); + t = xcalloc(1, sizeof(*t)); strbuf_init(&t->path, 0); strvec_init(&t->env); - strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory()); + /* + * Use a string starting with tmp_ so that the builtin/prune.c code + * can recognize any stale objdirs left behind by a crash and delete + * them. + */ + strbuf_addf(&t->path, "%s/tmp_objdir-%s-XXXXXX", get_object_directory(), prefix); /* * Grow the strbuf beyond any filename we expect to be placed in it. @@ -269,6 +281,13 @@ int tmp_objdir_migrate(struct tmp_objdir *t) if (!t) return 0; + if (t->prev_odb) { + if (the_repository->objects->odb->will_destroy) + BUG("migrating an ODB that was marked for destruction"); + restore_primary_odb(t->prev_odb, t->path.buf); + t->prev_odb = NULL; + } + strbuf_addbuf(&src, &t->path); strbuf_addstr(&dst, get_object_directory()); @@ -292,3 +311,33 @@ void tmp_objdir_add_as_alternate(const struct tmp_objdir *t) { add_to_alternates_memory(t->path.buf); } + +void tmp_objdir_replace_primary_odb(struct tmp_objdir *t, int will_destroy) +{ + if (t->prev_odb) + BUG("the primary object database is already replaced"); + t->prev_odb = set_temporary_primary_odb(t->path.buf, will_destroy); + t->will_destroy = will_destroy; +} + +struct tmp_objdir *tmp_objdir_unapply_primary_odb(void) +{ + if (!the_tmp_objdir || !the_tmp_objdir->prev_odb) + return NULL; + + restore_primary_odb(the_tmp_objdir->prev_odb, the_tmp_objdir->path.buf); + the_tmp_objdir->prev_odb = NULL; + return the_tmp_objdir; +} + +void tmp_objdir_reapply_primary_odb(struct tmp_objdir *t, const char *old_cwd, + const char *new_cwd) +{ + char *path; + + path = reparent_relative_path(old_cwd, new_cwd, t->path.buf); + strbuf_reset(&t->path); + strbuf_addstr(&t->path, path); + free(path); + tmp_objdir_replace_primary_odb(t, t->will_destroy); +} diff --git a/tmp-objdir.h b/tmp-objdir.h index b1e45b4c75d2d8..a3145051f25bf6 100644 --- a/tmp-objdir.h +++ b/tmp-objdir.h @@ -10,7 +10,7 @@ * * Example: * - * struct tmp_objdir *t = tmp_objdir_create(); + * struct tmp_objdir *t = tmp_objdir_create("incoming"); * if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) && * !tmp_objdir_migrate(t)) * printf("success!\n"); @@ -22,9 +22,10 @@ struct tmp_objdir; /* - * Create a new temporary object directory; returns NULL on failure. + * Create a new temporary object directory with the specified prefix; + * returns NULL on failure. */ -struct tmp_objdir *tmp_objdir_create(void); +struct tmp_objdir *tmp_objdir_create(const char *prefix); /* * Return a list of environment strings, suitable for use with @@ -51,4 +52,26 @@ int tmp_objdir_destroy(struct tmp_objdir *); */ void tmp_objdir_add_as_alternate(const struct tmp_objdir *); +/* + * Replaces the main object store in the current process with the temporary + * object directory and makes the former main object store an alternate. + * If will_destroy is nonzero, the object directory may not be migrated. + */ +void tmp_objdir_replace_primary_odb(struct tmp_objdir *, int will_destroy); + +/* + * If the primary object database was replaced by a temporary object directory, + * restore it to its original value while keeping the directory contents around. + * Returns NULL if the primary object database was not replaced. + */ +struct tmp_objdir *tmp_objdir_unapply_primary_odb(void); + +/* + * Reapplies the former primary temporary object database, after protentially + * changing its relative path. + */ +void tmp_objdir_reapply_primary_odb(struct tmp_objdir *, const char *old_cwd, + const char *new_cwd); + + #endif /* TMP_OBJDIR_H */ diff --git a/wrapper.c b/wrapper.c index 7c6586af321000..1a1e2fba9c942d 100644 --- a/wrapper.c +++ b/wrapper.c @@ -540,6 +540,54 @@ int xmkstemp_mode(char *filename_template, int mode) return fd; } +int git_fsync(int fd, enum fsync_action action) +{ + switch (action) { + case FSYNC_WRITEOUT_ONLY: + +#ifdef __APPLE__ + /* + * on macOS, fsync just causes filesystem cache writeback but does not + * flush hardware caches. + */ + return fsync(fd); +#endif + +#ifdef HAVE_SYNC_FILE_RANGE + /* + * On linux 2.6.17 and above, sync_file_range is the way to issue + * a writeback without a hardware flush. An offset of 0 and size of 0 + * indicates writeout of the entire file and the wait flags ensure that all + * dirty data is written to the disk (potentially in a disk-side cache) + * before we continue. + */ + + return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE | + SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); +#endif + +#ifdef fsync_no_flush + return fsync_no_flush(fd); +#endif + + errno = ENOSYS; + return -1; + + case FSYNC_HARDWARE_FLUSH: + +#ifdef __APPLE__ + return fcntl(fd, F_FULLFSYNC); +#else + return fsync(fd); +#endif + + default: + BUG("unexpected git_fsync(%d) call", action); + } + +} + static int warn_if_unremovable(const char *op, const char *file, int rc) { int err; diff --git a/write-or-die.c b/write-or-die.c index d33e68f6abb30a..8f53953d4ababb 100644 --- a/write-or-die.c +++ b/write-or-die.c @@ -57,7 +57,7 @@ void fprintf_or_die(FILE *f, const char *fmt, ...) void fsync_or_die(int fd, const char *msg) { - while (fsync(fd) < 0) { + while (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0) { if (errno != EINTR) die_errno("fsync error on '%s'", msg); }