Skip to content

Commit 232ea05

Browse files
author
Darrick J. Wong
committed
xfs: enable sorting of xfile-backed arrays
The btree bulk loading code requires that records be provided in the correct record sort order for the given btree type. In general, repair code cannot be required to collect records in order, and it is not feasible to insert new records in the middle of an array to maintain sort order. Implement a sorting algorithm so that we can sort the records just prior to bulk loading. In principle, an xfarray could consume many gigabytes of memory and its backing pages can be sent out to disk at any time. This means that we cannot map the entire array into memory at once, so we must find a way to divide the work into smaller portions (e.g. a page) that /can/ be mapped into memory. Quicksort seems like a reasonable fit for this purpose, since it uses a divide and conquer strategy to keep its average runtime logarithmic. The solution presented here is a port of the glibc implementation, which itself is derived from the median-of-three and tail call recursion strategies outlined by Sedgwick. Subsequent patches will optimize the implementation further by utilizing the kernel's heapsort on directly-mapped memory whenever possible, and improving the quicksort pivot selection algorithm to try to avoid O(n^2) collapses. Note: The sorting functionality gets its own patch because the basic big array mechanisms were plenty for a single code patch. Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Kent Overstreet <[email protected]> Reviewed-by: Dave Chinner <[email protected]>
1 parent 3934e8e commit 232ea05

File tree

3 files changed

+750
-0
lines changed

3 files changed

+750
-0
lines changed

fs/xfs/scrub/trace.h

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
struct xfile;
2020
struct xfarray;
21+
struct xfarray_sortinfo;
2122

2223
/*
2324
* ftrace's __print_symbolic requires that all enum values be wrapped in the
@@ -846,6 +847,119 @@ TRACE_EVENT(xfarray_create,
846847
__entry->obj_size_log)
847848
);
848849

850+
TRACE_EVENT(xfarray_isort,
851+
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
852+
TP_ARGS(si, lo, hi),
853+
TP_STRUCT__entry(
854+
__field(unsigned long, ino)
855+
__field(unsigned long long, lo)
856+
__field(unsigned long long, hi)
857+
),
858+
TP_fast_assign(
859+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
860+
__entry->lo = lo;
861+
__entry->hi = hi;
862+
),
863+
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
864+
__entry->ino,
865+
__entry->lo,
866+
__entry->hi,
867+
__entry->hi - __entry->lo)
868+
);
869+
870+
TRACE_EVENT(xfarray_qsort,
871+
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
872+
TP_ARGS(si, lo, hi),
873+
TP_STRUCT__entry(
874+
__field(unsigned long, ino)
875+
__field(unsigned long long, lo)
876+
__field(unsigned long long, hi)
877+
__field(int, stack_depth)
878+
__field(int, max_stack_depth)
879+
),
880+
TP_fast_assign(
881+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
882+
__entry->lo = lo;
883+
__entry->hi = hi;
884+
__entry->stack_depth = si->stack_depth;
885+
__entry->max_stack_depth = si->max_stack_depth;
886+
),
887+
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu stack %d/%d",
888+
__entry->ino,
889+
__entry->lo,
890+
__entry->hi,
891+
__entry->hi - __entry->lo,
892+
__entry->stack_depth,
893+
__entry->max_stack_depth)
894+
);
895+
896+
TRACE_EVENT(xfarray_sort,
897+
TP_PROTO(struct xfarray_sortinfo *si, size_t bytes),
898+
TP_ARGS(si, bytes),
899+
TP_STRUCT__entry(
900+
__field(unsigned long, ino)
901+
__field(unsigned long long, nr)
902+
__field(size_t, obj_size)
903+
__field(size_t, bytes)
904+
__field(unsigned int, max_stack_depth)
905+
),
906+
TP_fast_assign(
907+
__entry->nr = si->array->nr;
908+
__entry->obj_size = si->array->obj_size;
909+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
910+
__entry->bytes = bytes;
911+
__entry->max_stack_depth = si->max_stack_depth;
912+
),
913+
TP_printk("xfino 0x%lx nr %llu objsz %zu stack %u bytes %zu",
914+
__entry->ino,
915+
__entry->nr,
916+
__entry->obj_size,
917+
__entry->max_stack_depth,
918+
__entry->bytes)
919+
);
920+
921+
TRACE_EVENT(xfarray_sort_stats,
922+
TP_PROTO(struct xfarray_sortinfo *si, int error),
923+
TP_ARGS(si, error),
924+
TP_STRUCT__entry(
925+
__field(unsigned long, ino)
926+
#ifdef DEBUG
927+
__field(unsigned long long, loads)
928+
__field(unsigned long long, stores)
929+
__field(unsigned long long, compares)
930+
#endif
931+
__field(unsigned int, max_stack_depth)
932+
__field(unsigned int, max_stack_used)
933+
__field(int, error)
934+
),
935+
TP_fast_assign(
936+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
937+
#ifdef DEBUG
938+
__entry->loads = si->loads;
939+
__entry->stores = si->stores;
940+
__entry->compares = si->compares;
941+
#endif
942+
__entry->max_stack_depth = si->max_stack_depth;
943+
__entry->max_stack_used = si->max_stack_used;
944+
__entry->error = error;
945+
),
946+
TP_printk(
947+
#ifdef DEBUG
948+
"xfino 0x%lx loads %llu stores %llu compares %llu stack_depth %u/%u error %d",
949+
#else
950+
"xfino 0x%lx stack_depth %u/%u error %d",
951+
#endif
952+
__entry->ino,
953+
#ifdef DEBUG
954+
__entry->loads,
955+
__entry->stores,
956+
__entry->compares,
957+
#endif
958+
__entry->max_stack_used,
959+
__entry->max_stack_depth,
960+
__entry->error)
961+
);
962+
849963
/* repair tracepoints */
850964
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
851965

0 commit comments

Comments
 (0)