Skip to content

Commit 58fe9cf

Browse files
committed
Merge branch 'master' into xsn/clean_up_server
2 parents 906afe7 + 6f9939d commit 58fe9cf

34 files changed

+2275
-705
lines changed

.devops/nix/nixpkgs-instances.nix

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
{ system, ... }:
88
{
99
_module.args = {
10+
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11+
# again, the below creates several nixpkgs instances which the
12+
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13+
#
14+
# This is currently "slow" and "expensive", on a certain scale.
15+
# This also isn't "right" in that this hinders dependency injection at
16+
# the level of flake inputs. This might get removed in the foreseeable
17+
# future.
18+
#
19+
# Note that you can use these expressions without Nix
20+
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21+
1022
pkgsCuda = import inputs.nixpkgs {
1123
inherit system;
1224
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,

.devops/nix/package.nix

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ let
7373
ps: [
7474
ps.numpy
7575
ps.sentencepiece
76+
ps.tiktoken
7677
ps.torchWithoutCuda
7778
ps.transformers
7879
]
@@ -114,14 +115,22 @@ effectiveStdenv.mkDerivation (
114115
pname = "llama-cpp${pnameSuffix}";
115116
version = llamaVersion;
116117

118+
# Note: none of the files discarded here are visible in the sandbox or
119+
# affect the output hash. This also means they can be modified without
120+
# triggering a rebuild.
117121
src = lib.cleanSourceWith {
118122
filter =
119123
name: type:
120-
!(builtins.any (_: _) [
124+
let
125+
noneOf = builtins.all (x: !x);
126+
baseName = baseNameOf name;
127+
in
128+
noneOf [
121129
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
122-
(name == "README.md") # Ignore *.md changes whe computing outPaths
123-
(lib.hasPrefix "." name) # Skip hidden files and directories
124-
]);
130+
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
131+
(lib.hasPrefix "." baseName) # Skip hidden files and directories
132+
(baseName == "flake.lock")
133+
];
125134
src = lib.cleanSource ../../.;
126135
};
127136

@@ -159,7 +168,7 @@ effectiveStdenv.mkDerivation (
159168

160169
cmakeFlags =
161170
[
162-
(cmakeBool "LLAMA_NATIVE" true)
171+
(cmakeBool "LLAMA_NATIVE" false)
163172
(cmakeBool "LLAMA_BUILD_SERVER" true)
164173
(cmakeBool "BUILD_SHARED_LIBS" true)
165174
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)

.devops/nix/scope.nix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
llamaVersion ? "0.0.0",
55
}:
66

7+
# We're using `makeScope` instead of just writing out an attrset
8+
# because it allows users to apply overlays later using `overrideScope'`.
9+
# Cf. https://noogle.dev/f/lib/makeScope
10+
711
lib.makeScope newScope (
812
self: {
913
inherit llamaVersion;

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ jobs:
295295
OPENBLAS_VERSION: 0.3.23
296296
OPENCL_VERSION: 2023.04.17
297297
CLBLAST_VERSION: 1.6.0
298-
SDE_VERSION: 9.21.1-2023-04-24
298+
SDE_VERSION: 9.33.0-2024-01-07
299299

300300
strategy:
301301
matrix:
@@ -400,7 +400,7 @@ jobs:
400400
id: cmake_test_sde
401401
if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
402402
run: |
403-
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/777395/sde-external-${env:SDE_VERSION}-win.tar.xz"
403+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
404404
# for some weird reason windows tar doesn't like sde tar.xz
405405
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
406406
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar

.github/workflows/nix-ci-aarch64.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,20 @@ name: Nix aarch64 builds
22

33
on:
44
workflow_dispatch: # allows manual triggering
5+
schedule:
6+
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
7+
# 1.5h instead of minutes with the cold cache).
8+
#
9+
# randint(0, 59), randint(0, 23)
10+
- cron: '26 12 * * *'
11+
# But also rebuild if we touched any of the Nix expressions:
512
push:
613
branches:
714
- master
8-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '**/*.sh', '**/*.py', '**/*.nix']
15+
paths: ['**/*.nix', 'flake.lock']
916
pull_request:
1017
types: [opened, synchronize, reopened]
11-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '**/*.sh', '**/*.py', '**/*.nix']
18+
paths: ['**/*.nix', 'flake.lock']
1219

1320
jobs:
1421
nix-build-aarch64:

.github/workflows/nix-ci.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@ on:
55
push:
66
branches:
77
- master
8-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '**/*.sh', '**/*.py', '**/*.nix']
98
pull_request:
109
types: [opened, synchronize, reopened]
11-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '**/*.sh', '**/*.py', '**/*.nix']
1210

1311
jobs:
1412
nix-eval:

CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ option(BUILD_SHARED_LIBS "build shared libraries"
4747
option(LLAMA_STATIC "llama: static link libraries" OFF)
4848
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
4949
option(LLAMA_LTO "llama: enable link time optimization" OFF)
50+
option(LLAMA_CCACHE "llama: use ccache if available" ON)
5051

5152
# debug
5253
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
@@ -107,6 +108,13 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STA
107108
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
108109
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
109110

111+
112+
# add perf arguments
113+
option(LLAMA_PERF "llama: enable perf" OFF)
114+
if (LLAMA_PERF)
115+
add_definitions(-DGGML_PERF)
116+
endif()
117+
110118
# Required for relocatable CMake package
111119
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
112120

@@ -561,6 +569,17 @@ if (LLAMA_LTO)
561569
endif()
562570
endif()
563571

572+
if (LLAMA_CCACHE)
573+
find_program(LLAMA_CCACHE_FOUND ccache)
574+
if (LLAMA_CCACHE_FOUND)
575+
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
576+
set(ENV{CCACHE_SLOPPINESS} time_macros)
577+
message(STATUS "Using ccache")
578+
else()
579+
message(STATUS "Warning: ccache not found - consider installing it or use LLAMA_CCACHE=OFF")
580+
endif ()
581+
endif()
582+
564583
# this version of Apple ld64 is buggy
565584
execute_process(
566585
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ as the main playground for developing new features for the [ggml](https://github
128128
- React Native: [mybigday/llama.rn](https://github.com/mybigday/llama.rn)
129129
- Java: [kherud/java-llama.cpp](https://github.com/kherud/java-llama.cpp)
130130
- Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig)
131+
- Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart)
131132

132133
**UI:**
133134

common/common.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
203203
params.prompt_cache_all = true;
204204
} else if (arg == "--prompt-cache-ro") {
205205
params.prompt_cache_ro = true;
206+
} else if (arg == "-bf" || arg == "--binary-file") {
207+
if (++i >= argc) {
208+
invalid_param = true;
209+
break;
210+
}
211+
std::ifstream file(argv[i], std::ios::binary);
212+
if (!file) {
213+
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
214+
invalid_param = true;
215+
break;
216+
}
217+
// store the external file name in params
218+
params.prompt_file = argv[i];
219+
file.seekg(0, std::ios::end);
220+
size_t size = file.tellg();
221+
file.seekg(0, std::ios::beg);
222+
params.prompt.resize(size);
223+
file.read((char *)params.prompt.data(), size);
224+
fprintf(stderr, "Read %zu bytes from binary file %s\n", size, argv[i]);
206225
} else if (arg == "-f" || arg == "--file") {
207226
if (++i >= argc) {
208227
invalid_param = true;
@@ -653,6 +672,12 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
653672
if (params.logdir.back() != DIRECTORY_SEPARATOR) {
654673
params.logdir += DIRECTORY_SEPARATOR;
655674
}
675+
} else if (arg == "--save-all-logits" || arg == "--kl-divergence-base") {
676+
if (++i >= argc) {
677+
invalid_param = true;
678+
break;
679+
}
680+
params.logits_file = argv[i];
656681
} else if (arg == "--perplexity" || arg == "--all-logits") {
657682
params.logits_all = true;
658683
} else if (arg == "--ppl-stride") {
@@ -689,6 +714,16 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
689714
break;
690715
}
691716
params.winogrande_tasks = std::stoi(argv[i]);
717+
} else if (arg == "--multiple-choice") {
718+
params.multiple_choice = true;
719+
} else if (arg == "--multiple-choice-tasks") {
720+
if (++i >= argc) {
721+
invalid_param = true;
722+
break;
723+
}
724+
params.multiple_choice_tasks = std::stoi(argv[i]);
725+
} else if (arg == "--kl-divergence") {
726+
params.kl_divergence = true;
692727
} else if (arg == "--ignore-eos") {
693728
params.ignore_eos = true;
694729
} else if (arg == "--no-penalize-nl") {
@@ -888,6 +923,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
888923
printf(" --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
889924
printf(" -f FNAME, --file FNAME\n");
890925
printf(" prompt file to start generation.\n");
926+
printf(" -bf FNAME, --binary-file FNAME\n");
927+
printf(" binary file containing multiple choice tasks.\n");
891928
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
892929
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
893930
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
@@ -936,6 +973,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
936973
printf(" --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
937974
printf(" --winogrande compute Winogrande score over random tasks from datafile supplied with -f\n");
938975
printf(" --winogrande-tasks N number of tasks to use when computing the Winogrande score (default: %zu)\n", params.winogrande_tasks);
976+
printf(" --multiple-choice compute multiple choice score over random tasks from datafile supplied with -f\n");
977+
printf(" --multiple-choice-tasks N number of tasks to use when computing the multiple choice score (default: %zu)\n", params.winogrande_tasks);
978+
printf(" --kl-divergence computes KL-divergence to logits provided via --kl-divergence-base");
939979
printf(" --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
940980
printf(" --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
941981
printf(" --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);

common/common.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ struct gpt_params {
9191
std::string input_suffix = ""; // string to suffix user inputs with
9292
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
9393
std::string logdir = ""; // directory in which to save YAML log files
94+
std::string logits_file = ""; // file for saving *all* logits
9495

9596
std::vector<llama_model_kv_override> kv_overrides;
9697

@@ -108,6 +109,11 @@ struct gpt_params {
108109
bool winogrande = false; // compute Winogrande score over random tasks from datafile supplied in prompt
109110
size_t winogrande_tasks= 0; // number of tasks to use when computing the Winogrande score. If 0, all tasks will be computed
110111

112+
bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt
113+
size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed
114+
115+
bool kl_divergence = false; // compute KL-divergence
116+
111117
bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS
112118
bool random_prompt = false; // do not randomize prompt if none provided
113119
bool use_color = false; // use color to distinguish generations and inputs

0 commit comments

Comments
 (0)