39
39
#endif
40
40
#if defined(LLAMA_USE_CURL)
41
41
#include < curl/curl.h>
42
+ #include < curl/easy.h>
43
+ #include < thread>
44
+ #include < future>
42
45
#endif
43
46
44
47
#if defined(_MSC_VER)
61
64
#else
62
65
#include < sys/syslimits.h>
63
66
#endif
64
- #define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX
67
+ #define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
65
68
#define LLAMA_CURL_MAX_HEADER_LENGTH 256
66
69
#endif // LLAMA_USE_CURL
67
70
@@ -1702,27 +1705,13 @@ void llama_batch_add(
1702
1705
1703
1706
#ifdef LLAMA_USE_CURL
1704
1707
1705
- struct llama_model * llama_load_model_from_url (
1706
- const char * model_url,
1707
- const char * path_model,
1708
- const struct llama_model_params & params) {
1709
- // Basic validation of the model_url
1710
- if (!model_url || strlen (model_url) == 0 ) {
1711
- fprintf (stderr, " %s: invalid model_url\n " , __func__);
1712
- return NULL ;
1713
- }
1714
-
1715
- // Initialize libcurl globally
1716
- auto curl = curl_easy_init ();
1717
-
1718
- if (!curl) {
1719
- fprintf (stderr, " %s: error initializing libcurl\n " , __func__);
1720
- return NULL ;
1721
- }
1708
+ static bool llama_download_file (CURL * curl, const char * url, const char * path) {
1709
+ bool force_download = false ;
1722
1710
1723
1711
// Set the URL, allow to follow http redirection
1724
- curl_easy_setopt (curl, CURLOPT_URL, model_url );
1712
+ curl_easy_setopt (curl, CURLOPT_URL, url );
1725
1713
curl_easy_setopt (curl, CURLOPT_FOLLOWLOCATION, 1L );
1714
+
1726
1715
#if defined(_WIN32)
1727
1716
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
1728
1717
// operating system. Currently implemented under MS-Windows.
@@ -1731,24 +1720,24 @@ struct llama_model * llama_load_model_from_url(
1731
1720
1732
1721
// Check if the file already exists locally
1733
1722
struct stat model_file_info;
1734
- auto file_exists = (stat (path_model , &model_file_info) == 0 );
1723
+ auto file_exists = (stat (path , &model_file_info) == 0 );
1735
1724
1736
1725
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
1737
1726
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0 };
1738
- char etag_path[LLAMA_CURL_MAX_PATH_LENGTH ] = {0 };
1739
- snprintf (etag_path, sizeof (etag_path), " %s.etag" , path_model );
1727
+ char etag_path[PATH_MAX ] = {0 };
1728
+ snprintf (etag_path, sizeof (etag_path), " %s.etag" , path );
1740
1729
1741
1730
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0 };
1742
- char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH ] = {0 };
1743
- snprintf (last_modified_path, sizeof (last_modified_path), " %s.lastModified" , path_model );
1731
+ char last_modified_path[PATH_MAX ] = {0 };
1732
+ snprintf (last_modified_path, sizeof (last_modified_path), " %s.lastModified" , path );
1744
1733
1745
1734
if (file_exists) {
1746
1735
auto * f_etag = fopen (etag_path, " r" );
1747
1736
if (f_etag) {
1748
1737
if (!fgets (etag, sizeof (etag), f_etag)) {
1749
1738
fprintf (stderr, " %s: unable to read file %s\n " , __func__, etag_path);
1750
1739
} else {
1751
- fprintf (stderr, " %s: previous model file found %s: %s\n " , __func__, etag_path, etag);
1740
+ fprintf (stderr, " %s: previous file found %s: %s\n " , __func__, etag_path, etag);
1752
1741
}
1753
1742
fclose (f_etag);
1754
1743
}
@@ -1758,7 +1747,7 @@ struct llama_model * llama_load_model_from_url(
1758
1747
if (!fgets (last_modified, sizeof (last_modified), f_last_modified)) {
1759
1748
fprintf (stderr, " %s: unable to read file %s\n " , __func__, last_modified_path);
1760
1749
} else {
1761
- fprintf (stderr, " %s: previous model file found %s: %s\n " , __func__, last_modified_path,
1750
+ fprintf (stderr, " %s: previous file found %s: %s\n " , __func__, last_modified_path,
1762
1751
last_modified);
1763
1752
}
1764
1753
fclose (f_last_modified);
@@ -1776,6 +1765,11 @@ struct llama_model * llama_load_model_from_url(
1776
1765
auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
1777
1766
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
1778
1767
1768
+ // Convert header field name to lowercase
1769
+ for (size_t i = 0 ; i < n_items && buffer[i] != ' :' ; ++i) {
1770
+ buffer[i] = tolower (buffer[i]);
1771
+ }
1772
+
1779
1773
const char * etag_prefix = " etag: " ;
1780
1774
if (strncmp (buffer, etag_prefix, strlen (etag_prefix)) == 0 ) {
1781
1775
strncpy (headers->etag , buffer + strlen (etag_prefix), n_items - strlen (etag_prefix) - 2 ); // Remove CRLF
@@ -1798,38 +1792,42 @@ struct llama_model * llama_load_model_from_url(
1798
1792
if (res != CURLE_OK) {
1799
1793
curl_easy_cleanup (curl);
1800
1794
fprintf (stderr, " %s: curl_easy_perform() failed: %s\n " , __func__, curl_easy_strerror (res));
1801
- return NULL ;
1795
+ return false ;
1802
1796
}
1803
1797
1804
1798
long http_code = 0 ;
1805
1799
curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code);
1806
1800
if (http_code != 200 ) {
1807
1801
// HEAD not supported, we don't know if the file has changed
1808
1802
// force trigger downloading
1809
- file_exists = false ;
1803
+ force_download = true ;
1810
1804
fprintf (stderr, " %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
1811
1805
}
1812
1806
}
1813
1807
1814
1808
// If the ETag or the Last-Modified headers are different: trigger a new download
1815
- if (!file_exists || strcmp (etag, headers.etag ) != 0 || strcmp (last_modified, headers.last_modified ) != 0 ) {
1816
- char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0 };
1817
- snprintf (path_model_temporary, sizeof (path_model_temporary), " %s.downloadInProgress" , path_model);
1809
+ bool should_download = !file_exists
1810
+ || force_download
1811
+ || (strlen (headers.etag ) > 0 && strcmp (etag, headers.etag ) != 0 )
1812
+ || (strlen (headers.last_modified ) > 0 && strcmp (last_modified, headers.last_modified ) != 0 );
1813
+ if (should_download) {
1814
+ char path_temporary[PATH_MAX] = {0 };
1815
+ snprintf (path_temporary, sizeof (path_temporary), " %s.downloadInProgress" , path);
1818
1816
if (file_exists) {
1819
- fprintf (stderr, " %s: deleting previous downloaded model file: %s\n " , __func__, path_model );
1820
- if (remove (path_model ) != 0 ) {
1817
+ fprintf (stderr, " %s: deleting previous downloaded file: %s\n " , __func__, path );
1818
+ if (remove (path ) != 0 ) {
1821
1819
curl_easy_cleanup (curl);
1822
- fprintf (stderr, " %s: unable to delete file: %s\n " , __func__, path_model );
1823
- return NULL ;
1820
+ fprintf (stderr, " %s: unable to delete file: %s\n " , __func__, path );
1821
+ return false ;
1824
1822
}
1825
1823
}
1826
1824
1827
1825
// Set the output file
1828
- auto * outfile = fopen (path_model_temporary , " wb" );
1826
+ auto * outfile = fopen (path_temporary , " wb" );
1829
1827
if (!outfile) {
1830
1828
curl_easy_cleanup (curl);
1831
- fprintf (stderr, " %s: error opening local file for writing: %s\n " , __func__, path_model );
1832
- return NULL ;
1829
+ fprintf (stderr, " %s: error opening local file for writing: %s\n " , __func__, path );
1830
+ return false ;
1833
1831
}
1834
1832
1835
1833
typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
@@ -1843,15 +1841,30 @@ struct llama_model * llama_load_model_from_url(
1843
1841
// display download progress
1844
1842
curl_easy_setopt (curl, CURLOPT_NOPROGRESS, 0L );
1845
1843
1844
+ // helper function to hide password in URL
1845
+ auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
1846
+ std::size_t protocol_pos = url.find (" ://" );
1847
+ if (protocol_pos == std::string::npos) {
1848
+ return url; // Malformed URL
1849
+ }
1850
+
1851
+ std::size_t at_pos = url.find (' @' , protocol_pos + 3 );
1852
+ if (at_pos == std::string::npos) {
1853
+ return url; // No password in URL
1854
+ }
1855
+
1856
+ return url.substr (0 , protocol_pos + 3 ) + " ********" + url.substr (at_pos);
1857
+ };
1858
+
1846
1859
// start the download
1847
- fprintf (stderr, " %s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " , __func__,
1848
- model_url, path_model , headers.etag , headers.last_modified );
1860
+ fprintf (stderr, " %s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n " , __func__,
1861
+ llama_download_hide_password_in_url (url). c_str (), path , headers.etag , headers.last_modified );
1849
1862
auto res = curl_easy_perform (curl);
1850
1863
if (res != CURLE_OK) {
1851
1864
fclose (outfile);
1852
1865
curl_easy_cleanup (curl);
1853
1866
fprintf (stderr, " %s: curl_easy_perform() failed: %s\n " , __func__, curl_easy_strerror (res));
1854
- return NULL ;
1867
+ return false ;
1855
1868
}
1856
1869
1857
1870
long http_code = 0 ;
@@ -1860,7 +1873,7 @@ struct llama_model * llama_load_model_from_url(
1860
1873
fclose (outfile);
1861
1874
curl_easy_cleanup (curl);
1862
1875
fprintf (stderr, " %s: invalid http status code received: %ld\n " , __func__, http_code);
1863
- return NULL ;
1876
+ return false ;
1864
1877
}
1865
1878
1866
1879
// Clean up
@@ -1872,7 +1885,7 @@ struct llama_model * llama_load_model_from_url(
1872
1885
if (etag_file) {
1873
1886
fputs (headers.etag , etag_file);
1874
1887
fclose (etag_file);
1875
- fprintf (stderr, " %s: model etag saved %s: %s\n " , __func__, etag_path, headers.etag );
1888
+ fprintf (stderr, " %s: file etag saved %s: %s\n " , __func__, etag_path, headers.etag );
1876
1889
}
1877
1890
}
1878
1891
@@ -1882,20 +1895,118 @@ struct llama_model * llama_load_model_from_url(
1882
1895
if (last_modified_file) {
1883
1896
fputs (headers.last_modified , last_modified_file);
1884
1897
fclose (last_modified_file);
1885
- fprintf (stderr, " %s: model last modified saved %s: %s\n " , __func__, last_modified_path,
1898
+ fprintf (stderr, " %s: file last modified saved %s: %s\n " , __func__, last_modified_path,
1886
1899
headers.last_modified );
1887
1900
}
1888
1901
}
1889
1902
1890
- if (rename (path_model_temporary, path_model) != 0 ) {
1903
+ if (rename (path_temporary, path) != 0 ) {
1904
+ curl_easy_cleanup (curl);
1905
+ fprintf (stderr, " %s: unable to rename file: %s to %s\n " , __func__, path_temporary, path);
1906
+ return false ;
1907
+ }
1908
+ }
1909
+
1910
+ return true ;
1911
+ }
1912
+
1913
+ struct llama_model * llama_load_model_from_url (
1914
+ const char * model_url,
1915
+ const char * path_model,
1916
+ const struct llama_model_params & params) {
1917
+ // Basic validation of the model_url
1918
+ if (!model_url || strlen (model_url) == 0 ) {
1919
+ fprintf (stderr, " %s: invalid model_url\n " , __func__);
1920
+ return NULL ;
1921
+ }
1922
+
1923
+ // Initialize libcurl
1924
+ auto * curl = curl_easy_init ();
1925
+
1926
+ if (!curl) {
1927
+ fprintf (stderr, " %s: error initializing libcurl\n " , __func__);
1928
+ return NULL ;
1929
+ }
1930
+
1931
+ if (!curl) {
1932
+ fprintf (stderr, " %s: error initializing libcurl\n " , __func__);
1933
+ return NULL ;
1934
+ }
1935
+
1936
+ if (!llama_download_file (curl, model_url, path_model)) {
1937
+ return NULL ;
1938
+ }
1939
+
1940
+ // check for additional GGUFs split to download
1941
+ int n_split = 0 ;
1942
+ {
1943
+ struct gguf_init_params gguf_params = {
1944
+ /* .no_alloc = */ true ,
1945
+ /* .ctx = */ NULL ,
1946
+ };
1947
+ auto * ctx_gguf = gguf_init_from_file (path_model, gguf_params);
1948
+ if (!ctx_gguf) {
1949
+ fprintf (stderr, " \n %s: failed to load input GGUF from %s\n " , __func__, path_model);
1891
1950
curl_easy_cleanup (curl);
1892
- fprintf (stderr, " %s: unable to rename file: %s to %s\n " , __func__, path_model_temporary, path_model);
1893
1951
return NULL ;
1894
1952
}
1953
+
1954
+ auto key_n_split = gguf_find_key (ctx_gguf, LLM_KV_SPLIT_COUNT);
1955
+ if (key_n_split >= 0 ) {
1956
+ n_split = gguf_get_val_u16 (ctx_gguf, key_n_split);
1957
+ }
1958
+
1959
+ gguf_free (ctx_gguf);
1895
1960
}
1896
1961
1897
1962
curl_easy_cleanup (curl);
1898
1963
1964
+ if (n_split > 1 ) {
1965
+ char split_prefix[PATH_MAX] = {0 };
1966
+ char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0 };
1967
+
1968
+ // Verify the first split file format
1969
+ // and extract split URL and PATH prefixes
1970
+ {
1971
+ if (!llama_split_prefix (split_prefix, sizeof (split_prefix), path_model, 0 , n_split)) {
1972
+ fprintf (stderr, " \n %s: unexpected model file name: %s"
1973
+ " n_split=%d\n " , __func__, path_model, n_split);
1974
+ return NULL ;
1975
+ }
1976
+
1977
+ if (!llama_split_prefix (split_url_prefix, sizeof (split_url_prefix), model_url, 0 , n_split)) {
1978
+ fprintf (stderr, " \n %s: unexpected model url: %s"
1979
+ " n_split=%d\n " , __func__, model_url, n_split);
1980
+ return NULL ;
1981
+ }
1982
+ }
1983
+
1984
+ // Prepare download in parallel
1985
+ std::vector<std::future<bool >> futures_download;
1986
+ for (int idx = 1 ; idx < n_split; idx++) {
1987
+ futures_download.push_back (std::async (std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool {
1988
+ char split_path[PATH_MAX] = {0 };
1989
+ llama_split_path (split_path, sizeof (split_path), split_prefix, download_idx, n_split);
1990
+
1991
+ char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0 };
1992
+ llama_split_path (split_url, sizeof (split_url), split_url_prefix, download_idx, n_split);
1993
+
1994
+ auto * curl = curl_easy_init ();
1995
+ bool res = llama_download_file (curl, split_url, split_path);
1996
+ curl_easy_cleanup (curl);
1997
+
1998
+ return res;
1999
+ }, idx));
2000
+ }
2001
+
2002
+ // Wait for all downloads to complete
2003
+ for (auto & f : futures_download) {
2004
+ if (!f.get ()) {
2005
+ return NULL ;
2006
+ }
2007
+ }
2008
+ }
2009
+
1899
2010
return llama_load_model_from_file (path_model, params);
1900
2011
}
1901
2012
0 commit comments