Skip to content

Commit ddb13ed

Browse files
committed
llama: llama_split_prefix fix strncpy does not include string termination
common: llama_load_model_from_url: - fix header name case sensitive - support downloading additional split in parallel - hide password in url
1 parent 50ccaf5 commit ddb13ed

File tree

4 files changed

+159
-48
lines changed

4 files changed

+159
-48
lines changed

common/common.cpp

Lines changed: 150 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
#endif
4040
#if defined(LLAMA_USE_CURL)
4141
#include <curl/curl.h>
42+
#include <curl/easy.h>
43+
#include <thread>
44+
#include <future>
4245
#endif
4346

4447
#if defined(_MSC_VER)
@@ -1702,27 +1705,27 @@ void llama_batch_add(
17021705

17031706
#ifdef LLAMA_USE_CURL
17041707

1705-
struct llama_model * llama_load_model_from_url(
1706-
const char * model_url,
1707-
const char * path_model,
1708-
const struct llama_model_params & params) {
1709-
// Basic validation of the model_url
1710-
if (!model_url || strlen(model_url) == 0) {
1711-
fprintf(stderr, "%s: invalid model_url\n", __func__);
1712-
return NULL;
1708+
static std::string llama_download_hide_password_in_url(const std::string & url) {
1709+
std::size_t protocol_pos = url.find("://");
1710+
if (protocol_pos == std::string::npos) {
1711+
return url; // Malformed URL
17131712
}
17141713

1715-
// Initialize libcurl globally
1716-
auto curl = curl_easy_init();
1717-
1718-
if (!curl) {
1719-
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1720-
return NULL;
1714+
std::size_t at_pos = url.find('@', protocol_pos + 3);
1715+
if (at_pos == std::string::npos) {
1716+
return url; // No password in URL
17211717
}
17221718

1719+
return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
1720+
}
1721+
1722+
static bool llama_download_file(CURL * curl, const char * url, const char * path) {
1723+
bool force_download = false;
1724+
17231725
// Set the URL, allow to follow http redirection
1724-
curl_easy_setopt(curl, CURLOPT_URL, model_url);
1726+
curl_easy_setopt(curl, CURLOPT_URL, url);
17251727
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
1728+
17261729
#if defined(_WIN32)
17271730
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
17281731
// operating system. Currently implemented under MS-Windows.
@@ -1731,24 +1734,24 @@ struct llama_model * llama_load_model_from_url(
17311734

17321735
// Check if the file already exists locally
17331736
struct stat model_file_info;
1734-
auto file_exists = (stat(path_model, &model_file_info) == 0);
1737+
auto file_exists = (stat(path, &model_file_info) == 0);
17351738

17361739
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
17371740
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1738-
char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1739-
snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model);
1741+
char etag_path[PATH_MAX] = {0};
1742+
snprintf(etag_path, sizeof(etag_path), "%s.etag", path);
17401743

17411744
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1742-
char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1743-
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model);
1745+
char last_modified_path[PATH_MAX] = {0};
1746+
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path);
17441747

17451748
if (file_exists) {
17461749
auto * f_etag = fopen(etag_path, "r");
17471750
if (f_etag) {
17481751
if (!fgets(etag, sizeof(etag), f_etag)) {
17491752
fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path);
17501753
} else {
1751-
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag);
1754+
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, etag_path, etag);
17521755
}
17531756
fclose(f_etag);
17541757
}
@@ -1758,7 +1761,7 @@ struct llama_model * llama_load_model_from_url(
17581761
if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) {
17591762
fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path);
17601763
} else {
1761-
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path,
1764+
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, last_modified_path,
17621765
last_modified);
17631766
}
17641767
fclose(f_last_modified);
@@ -1776,6 +1779,11 @@ struct llama_model * llama_load_model_from_url(
17761779
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
17771780
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
17781781

1782+
// Convert header field name to lowercase
1783+
for(size_t i = 0; i < n_items && buffer[i] != ':'; ++i) {
1784+
buffer[i] = tolower(buffer[i]);
1785+
}
1786+
17791787
const char * etag_prefix = "etag: ";
17801788
if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) {
17811789
strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF
@@ -1798,38 +1806,42 @@ struct llama_model * llama_load_model_from_url(
17981806
if (res != CURLE_OK) {
17991807
curl_easy_cleanup(curl);
18001808
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
1801-
return NULL;
1809+
return false;
18021810
}
18031811

18041812
long http_code = 0;
18051813
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
18061814
if (http_code != 200) {
18071815
// HEAD not supported, we don't know if the file has changed
18081816
// force trigger downloading
1809-
file_exists = false;
1817+
force_download = true;
18101818
fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
18111819
}
18121820
}
18131821

18141822
// If the ETag or the Last-Modified headers are different: trigger a new download
1815-
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
1816-
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1817-
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
1823+
bool should_download = !file_exists
1824+
|| force_download
1825+
|| (strlen(headers.etag) > 0 && strcmp(etag, headers.etag) != 0)
1826+
|| (strlen(headers.last_modified) > 0 && strcmp(last_modified, headers.last_modified) != 0);
1827+
if (should_download) {
1828+
char path_temporary[PATH_MAX] = {0};
1829+
snprintf(path_temporary, sizeof(path_temporary), "%s.downloadInProgress", path);
18181830
if (file_exists) {
1819-
fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model);
1820-
if (remove(path_model) != 0) {
1831+
fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path);
1832+
if (remove(path) != 0) {
18211833
curl_easy_cleanup(curl);
1822-
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model);
1823-
return NULL;
1834+
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path);
1835+
return false;
18241836
}
18251837
}
18261838

18271839
// Set the output file
1828-
auto * outfile = fopen(path_model_temporary, "wb");
1840+
auto * outfile = fopen(path_temporary, "wb");
18291841
if (!outfile) {
18301842
curl_easy_cleanup(curl);
1831-
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
1832-
return NULL;
1843+
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path);
1844+
return false;
18331845
}
18341846

18351847
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
@@ -1844,14 +1856,14 @@ struct llama_model * llama_load_model_from_url(
18441856
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
18451857

18461858
// start the download
1847-
fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1848-
model_url, path_model, headers.etag, headers.last_modified);
1859+
fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1860+
llama_download_hide_password_in_url(url).c_str(), path, headers.etag, headers.last_modified);
18491861
auto res = curl_easy_perform(curl);
18501862
if (res != CURLE_OK) {
18511863
fclose(outfile);
18521864
curl_easy_cleanup(curl);
18531865
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
1854-
return NULL;
1866+
return false;
18551867
}
18561868

18571869
long http_code = 0;
@@ -1860,7 +1872,7 @@ struct llama_model * llama_load_model_from_url(
18601872
fclose(outfile);
18611873
curl_easy_cleanup(curl);
18621874
fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
1863-
return NULL;
1875+
return false;
18641876
}
18651877

18661878
// Clean up
@@ -1872,7 +1884,7 @@ struct llama_model * llama_load_model_from_url(
18721884
if (etag_file) {
18731885
fputs(headers.etag, etag_file);
18741886
fclose(etag_file);
1875-
fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag);
1887+
fprintf(stderr, "%s: file etag saved %s: %s\n", __func__, etag_path, headers.etag);
18761888
}
18771889
}
18781890

@@ -1882,20 +1894,116 @@ struct llama_model * llama_load_model_from_url(
18821894
if (last_modified_file) {
18831895
fputs(headers.last_modified, last_modified_file);
18841896
fclose(last_modified_file);
1885-
fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path,
1897+
fprintf(stderr, "%s: file last modified saved %s: %s\n", __func__, last_modified_path,
18861898
headers.last_modified);
18871899
}
18881900
}
18891901

1890-
if (rename(path_model_temporary, path_model) != 0) {
1902+
if (rename(path_temporary, path) != 0) {
18911903
curl_easy_cleanup(curl);
1892-
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
1904+
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary, path);
1905+
return false;
1906+
}
1907+
}
1908+
1909+
return true;
1910+
}
1911+
1912+
struct llama_model * llama_load_model_from_url(
1913+
const char * model_url,
1914+
const char * path_model,
1915+
const struct llama_model_params & params) {
1916+
// Basic validation of the model_url
1917+
if (!model_url || strlen(model_url) == 0) {
1918+
fprintf(stderr, "%s: invalid model_url\n", __func__);
1919+
return NULL;
1920+
}
1921+
1922+
// Initialize libcurl
1923+
auto * curl = curl_easy_init();
1924+
1925+
if (!curl) {
1926+
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1927+
return NULL;
1928+
}
1929+
1930+
if (!curl) {
1931+
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1932+
return NULL;
1933+
}
1934+
1935+
if (!llama_download_file(curl, model_url, path_model)) {
1936+
return NULL;
1937+
}
1938+
1939+
// check for additional GGUFs split to download
1940+
int n_split = 0;
1941+
{
1942+
struct gguf_init_params gguf_params = {
1943+
/*.no_alloc = */ true,
1944+
/*.ctx = */ NULL,
1945+
};
1946+
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
1947+
if (!ctx_gguf) {
1948+
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model);
18931949
return NULL;
18941950
}
1951+
1952+
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
1953+
if (key_n_split >= 0) {
1954+
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
1955+
}
1956+
1957+
gguf_free(ctx_gguf);
18951958
}
18961959

18971960
curl_easy_cleanup(curl);
18981961

1962+
if (n_split > 1) {
1963+
char split_prefix[PATH_MAX] = {0};
1964+
char split_url_prefix[PATH_MAX] = {0};
1965+
1966+
// Verify the first split file format
1967+
{
1968+
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
1969+
fprintf(stderr, "\n%s: unexpected model file name: %s"
1970+
" n_split=%d\n", __func__, path_model, n_split);
1971+
return NULL;
1972+
}
1973+
1974+
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
1975+
fprintf(stderr, "\n%s: unexpected model url: %s"
1976+
" n_split=%d\n", __func__, model_url, n_split);
1977+
return NULL;
1978+
}
1979+
}
1980+
1981+
// Prepare download in parallel
1982+
std::vector<std::future<bool>> futures_download;
1983+
for (int idx = 1; idx < n_split; idx++) {
1984+
futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool {
1985+
char split_path[PATH_MAX] = {0};
1986+
llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split);
1987+
1988+
char split_url[PATH_MAX] = {0};
1989+
llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split);
1990+
1991+
auto * curl = curl_easy_init();
1992+
bool res = llama_download_file(curl, split_url, split_path);
1993+
curl_easy_cleanup(curl);
1994+
1995+
return res;
1996+
}, idx));
1997+
}
1998+
1999+
// Wait for all downloads to complete
2000+
for(auto &f : futures_download) {
2001+
if(!f.get()) {
2002+
return NULL;
2003+
}
2004+
}
2005+
}
2006+
18992007
return llama_load_model_from_file(path_model, params);
19002008
}
19012009

common/common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,10 @@ struct llama_control_vector_load_info {
306306
// Load control vectors, scale each by strength, and add them together.
307307
// On error, returns {-1, empty}
308308
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
309+
310+
//
311+
// Split utils
312+
//
313+
static const char * const LLM_KV_SPLIT_NO = "split.no";
314+
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
315+
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";

examples/gguf-split/gguf-split.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,6 @@ enum split_operation : uint8_t {
2626
SPLIT_OP_MERGE,
2727
};
2828

29-
static const char * const LLM_KV_SPLIT_NO = "split.no";
30-
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
31-
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
32-
3329
struct split_params {
3430
split_operation operation = SPLIT_OP_SPLIT;
3531
int n_split_tensors = 128;

llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14839,9 +14839,9 @@ int llama_split_prefix(char * dest, size_t maxlen, const char * split_path, int
1483914839
std::string str_postfix(postfix);
1484014840

1484114841
// check if dest ends with postfix
14842-
int size_prefix = str_split_path.size() - str_postfix.size();
14842+
int size_prefix = str_split_path.size() - str_postfix.size() ;
1484314843
if (size_prefix > 0 && str_split_path.find(str_postfix, size_prefix) != std::string::npos) {
14844-
snprintf(dest, std::min((size_t) size_prefix, maxlen), "%s", split_path);
14844+
snprintf(dest, std::min((size_t) size_prefix + 1, maxlen), "%s", split_path);
1484514845
return size_prefix;
1484614846
}
1484714847

0 commit comments

Comments
 (0)