39
39
#endif
40
40
#if defined(LLAMA_USE_CURL)
41
41
#include < curl/curl.h>
42
+ #include < curl/easy.h>
43
+ #include < thread>
44
+ #include < future>
42
45
#endif
43
46
44
47
#if defined(_MSC_VER)
@@ -1702,27 +1705,27 @@ void llama_batch_add(
1702
1705
1703
1706
#ifdef LLAMA_USE_CURL
1704
1707
1705
- struct llama_model * llama_load_model_from_url (
1706
- const char * model_url,
1707
- const char * path_model,
1708
- const struct llama_model_params & params) {
1709
- // Basic validation of the model_url
1710
- if (!model_url || strlen (model_url) == 0 ) {
1711
- fprintf (stderr, " %s: invalid model_url\n " , __func__);
1712
- return NULL ;
1708
+ static std::string llama_download_hide_password_in_url (const std::string & url) {
1709
+ std::size_t protocol_pos = url.find (" ://" );
1710
+ if (protocol_pos == std::string::npos) {
1711
+ return url; // Malformed URL
1713
1712
}
1714
1713
1715
- // Initialize libcurl globally
1716
- auto curl = curl_easy_init ();
1717
-
1718
- if (!curl) {
1719
- fprintf (stderr, " %s: error initializing libcurl\n " , __func__);
1720
- return NULL ;
1714
+ std::size_t at_pos = url.find (' @' , protocol_pos + 3 );
1715
+ if (at_pos == std::string::npos) {
1716
+ return url; // No password in URL
1721
1717
}
1722
1718
1719
+ return url.substr (0 , protocol_pos + 3 ) + " ********" + url.substr (at_pos);
1720
+ }
1721
+
1722
+ static bool llama_download_file (CURL * curl, const char * url, const char * path) {
1723
+ bool force_download = false ;
1724
+
1723
1725
// Set the URL, allow to follow http redirection
1724
- curl_easy_setopt (curl, CURLOPT_URL, model_url );
1726
+ curl_easy_setopt (curl, CURLOPT_URL, url );
1725
1727
curl_easy_setopt (curl, CURLOPT_FOLLOWLOCATION, 1L );
1728
+
1726
1729
#if defined(_WIN32)
1727
1730
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
1728
1731
// operating system. Currently implemented under MS-Windows.
@@ -1731,24 +1734,24 @@ struct llama_model * llama_load_model_from_url(
1731
1734
1732
1735
// Check if the file already exists locally
1733
1736
struct stat model_file_info;
1734
- auto file_exists = (stat (path_model , &model_file_info) == 0 );
1737
+ auto file_exists = (stat (path , &model_file_info) == 0 );
1735
1738
1736
1739
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
1737
1740
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0 };
1738
- char etag_path[LLAMA_CURL_MAX_PATH_LENGTH ] = {0 };
1739
- snprintf (etag_path, sizeof (etag_path), " %s.etag" , path_model );
1741
+ char etag_path[PATH_MAX ] = {0 };
1742
+ snprintf (etag_path, sizeof (etag_path), " %s.etag" , path );
1740
1743
1741
1744
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0 };
1742
- char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH ] = {0 };
1743
- snprintf (last_modified_path, sizeof (last_modified_path), " %s.lastModified" , path_model );
1745
+ char last_modified_path[PATH_MAX ] = {0 };
1746
+ snprintf (last_modified_path, sizeof (last_modified_path), " %s.lastModified" , path );
1744
1747
1745
1748
if (file_exists) {
1746
1749
auto * f_etag = fopen (etag_path, " r" );
1747
1750
if (f_etag) {
1748
1751
if (!fgets (etag, sizeof (etag), f_etag)) {
1749
1752
fprintf (stderr, " %s: unable to read file %s\n " , __func__, etag_path);
1750
1753
} else {
1751
- fprintf (stderr, " %s: previous model file found %s: %s\n " , __func__, etag_path, etag);
1754
+ fprintf (stderr, " %s: previous file found %s: %s\n " , __func__, etag_path, etag);
1752
1755
}
1753
1756
fclose (f_etag);
1754
1757
}
@@ -1758,7 +1761,7 @@ struct llama_model * llama_load_model_from_url(
1758
1761
if (!fgets (last_modified, sizeof (last_modified), f_last_modified)) {
1759
1762
fprintf (stderr, " %s: unable to read file %s\n " , __func__, last_modified_path);
1760
1763
} else {
1761
- fprintf (stderr, " %s: previous model file found %s: %s\n " , __func__, last_modified_path,
1764
+ fprintf (stderr, " %s: previous file found %s: %s\n " , __func__, last_modified_path,
1762
1765
last_modified);
1763
1766
}
1764
1767
fclose (f_last_modified);
@@ -1776,6 +1779,11 @@ struct llama_model * llama_load_model_from_url(
1776
1779
auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
1777
1780
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
1778
1781
1782
+ // Convert header field name to lowercase
1783
+ for (size_t i = 0 ; i < n_items && buffer[i] != ' :' ; ++i) {
1784
+ buffer[i] = tolower (buffer[i]);
1785
+ }
1786
+
1779
1787
const char * etag_prefix = " etag: " ;
1780
1788
if (strncmp (buffer, etag_prefix, strlen (etag_prefix)) == 0 ) {
1781
1789
strncpy (headers->etag , buffer + strlen (etag_prefix), n_items - strlen (etag_prefix) - 2 ); // Remove CRLF
@@ -1798,38 +1806,42 @@ struct llama_model * llama_load_model_from_url(
1798
1806
if (res != CURLE_OK) {
1799
1807
curl_easy_cleanup (curl);
1800
1808
fprintf (stderr, " %s: curl_easy_perform() failed: %s\n " , __func__, curl_easy_strerror (res));
1801
- return NULL ;
1809
+ return false ;
1802
1810
}
1803
1811
1804
1812
long http_code = 0 ;
1805
1813
curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code);
1806
1814
if (http_code != 200 ) {
1807
1815
// HEAD not supported, we don't know if the file has changed
1808
1816
// force trigger downloading
1809
- file_exists = false ;
1817
+ force_download = true ;
1810
1818
fprintf (stderr, " %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
1811
1819
}
1812
1820
}
1813
1821
1814
1822
// If the ETag or the Last-Modified headers are different: trigger a new download
1815
- if (!file_exists || strcmp (etag, headers.etag ) != 0 || strcmp (last_modified, headers.last_modified ) != 0 ) {
1816
- char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0 };
1817
- snprintf (path_model_temporary, sizeof (path_model_temporary), " %s.downloadInProgress" , path_model);
1823
+ bool should_download = !file_exists
1824
+ || force_download
1825
+ || (strlen (headers.etag ) > 0 && strcmp (etag, headers.etag ) != 0 )
1826
+ || (strlen (headers.last_modified ) > 0 && strcmp (last_modified, headers.last_modified ) != 0 );
1827
+ if (should_download) {
1828
+ char path_temporary[PATH_MAX] = {0 };
1829
+ snprintf (path_temporary, sizeof (path_temporary), " %s.downloadInProgress" , path);
1818
1830
if (file_exists) {
1819
- fprintf (stderr, " %s: deleting previous downloaded model file: %s\n " , __func__, path_model );
1820
- if (remove (path_model ) != 0 ) {
1831
+ fprintf (stderr, " %s: deleting previous downloaded file: %s\n " , __func__, path );
1832
+ if (remove (path ) != 0 ) {
1821
1833
curl_easy_cleanup (curl);
1822
- fprintf (stderr, " %s: unable to delete file: %s\n " , __func__, path_model );
1823
- return NULL ;
1834
+ fprintf (stderr, " %s: unable to delete file: %s\n " , __func__, path );
1835
+ return false ;
1824
1836
}
1825
1837
}
1826
1838
1827
1839
// Set the output file
1828
- auto * outfile = fopen (path_model_temporary , " wb" );
1840
+ auto * outfile = fopen (path_temporary , " wb" );
1829
1841
if (!outfile) {
1830
1842
curl_easy_cleanup (curl);
1831
- fprintf (stderr, " %s: error opening local file for writing: %s\n " , __func__, path_model );
1832
- return NULL ;
1843
+ fprintf (stderr, " %s: error opening local file for writing: %s\n " , __func__, path );
1844
+ return false ;
1833
1845
}
1834
1846
1835
1847
typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
@@ -1844,14 +1856,14 @@ struct llama_model * llama_load_model_from_url(
1844
1856
curl_easy_setopt (curl, CURLOPT_NOPROGRESS, 0L );
1845
1857
1846
1858
// start the download
1847
- fprintf (stderr, " %s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " , __func__,
1848
- model_url, path_model , headers.etag , headers.last_modified );
1859
+ fprintf (stderr, " %s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n " , __func__,
1860
+ llama_download_hide_password_in_url (url). c_str (), path , headers.etag , headers.last_modified );
1849
1861
auto res = curl_easy_perform (curl);
1850
1862
if (res != CURLE_OK) {
1851
1863
fclose (outfile);
1852
1864
curl_easy_cleanup (curl);
1853
1865
fprintf (stderr, " %s: curl_easy_perform() failed: %s\n " , __func__, curl_easy_strerror (res));
1854
- return NULL ;
1866
+ return false ;
1855
1867
}
1856
1868
1857
1869
long http_code = 0 ;
@@ -1860,7 +1872,7 @@ struct llama_model * llama_load_model_from_url(
1860
1872
fclose (outfile);
1861
1873
curl_easy_cleanup (curl);
1862
1874
fprintf (stderr, " %s: invalid http status code received: %ld\n " , __func__, http_code);
1863
- return NULL ;
1875
+ return false ;
1864
1876
}
1865
1877
1866
1878
// Clean up
@@ -1872,7 +1884,7 @@ struct llama_model * llama_load_model_from_url(
1872
1884
if (etag_file) {
1873
1885
fputs (headers.etag , etag_file);
1874
1886
fclose (etag_file);
1875
- fprintf (stderr, " %s: model etag saved %s: %s\n " , __func__, etag_path, headers.etag );
1887
+ fprintf (stderr, " %s: file etag saved %s: %s\n " , __func__, etag_path, headers.etag );
1876
1888
}
1877
1889
}
1878
1890
@@ -1882,20 +1894,116 @@ struct llama_model * llama_load_model_from_url(
1882
1894
if (last_modified_file) {
1883
1895
fputs (headers.last_modified , last_modified_file);
1884
1896
fclose (last_modified_file);
1885
- fprintf (stderr, " %s: model last modified saved %s: %s\n " , __func__, last_modified_path,
1897
+ fprintf (stderr, " %s: file last modified saved %s: %s\n " , __func__, last_modified_path,
1886
1898
headers.last_modified );
1887
1899
}
1888
1900
}
1889
1901
1890
- if (rename (path_model_temporary, path_model ) != 0 ) {
1902
+ if (rename (path_temporary, path ) != 0 ) {
1891
1903
curl_easy_cleanup (curl);
1892
- fprintf (stderr, " %s: unable to rename file: %s to %s\n " , __func__, path_model_temporary, path_model);
1904
+ fprintf (stderr, " %s: unable to rename file: %s to %s\n " , __func__, path_temporary, path);
1905
+ return false ;
1906
+ }
1907
+ }
1908
+
1909
+ return true ;
1910
+ }
1911
+
1912
+ struct llama_model * llama_load_model_from_url (
1913
+ const char * model_url,
1914
+ const char * path_model,
1915
+ const struct llama_model_params & params) {
1916
+ // Basic validation of the model_url
1917
+ if (!model_url || strlen (model_url) == 0 ) {
1918
+ fprintf (stderr, " %s: invalid model_url\n " , __func__);
1919
+ return NULL ;
1920
+ }
1921
+
1922
+ // Initialize libcurl
1923
+ auto * curl = curl_easy_init ();
1924
+
1925
+ if (!curl) {
1926
+ fprintf (stderr, " %s: error initializing libcurl\n " , __func__);
1927
+ return NULL ;
1928
+ }
1929
+
1930
+ if (!curl) {
1931
+ fprintf (stderr, " %s: error initializing libcurl\n " , __func__);
1932
+ return NULL ;
1933
+ }
1934
+
1935
+ if (!llama_download_file (curl, model_url, path_model)) {
1936
+ return NULL ;
1937
+ }
1938
+
1939
+ // check for additional GGUFs split to download
1940
+ int n_split = 0 ;
1941
+ {
1942
+ struct gguf_init_params gguf_params = {
1943
+ /* .no_alloc = */ true ,
1944
+ /* .ctx = */ NULL ,
1945
+ };
1946
+ auto * ctx_gguf = gguf_init_from_file (path_model, gguf_params);
1947
+ if (!ctx_gguf) {
1948
+ fprintf (stderr, " \n %s: failed to load input GGUF from %s\n " , __func__, path_model);
1893
1949
return NULL ;
1894
1950
}
1951
+
1952
+ auto key_n_split = gguf_find_key (ctx_gguf, LLM_KV_SPLIT_COUNT);
1953
+ if (key_n_split >= 0 ) {
1954
+ n_split = gguf_get_val_u16 (ctx_gguf, key_n_split);
1955
+ }
1956
+
1957
+ gguf_free (ctx_gguf);
1895
1958
}
1896
1959
1897
1960
curl_easy_cleanup (curl);
1898
1961
1962
+ if (n_split > 1 ) {
1963
+ char split_prefix[PATH_MAX] = {0 };
1964
+ char split_url_prefix[PATH_MAX] = {0 };
1965
+
1966
+ // Verify the first split file format
1967
+ {
1968
+ if (!llama_split_prefix (split_prefix, sizeof (split_prefix), path_model, 0 , n_split)) {
1969
+ fprintf (stderr, " \n %s: unexpected model file name: %s"
1970
+ " n_split=%d\n " , __func__, path_model, n_split);
1971
+ return NULL ;
1972
+ }
1973
+
1974
+ if (!llama_split_prefix (split_url_prefix, sizeof (split_url_prefix), model_url, 0 , n_split)) {
1975
+ fprintf (stderr, " \n %s: unexpected model url: %s"
1976
+ " n_split=%d\n " , __func__, model_url, n_split);
1977
+ return NULL ;
1978
+ }
1979
+ }
1980
+
1981
+ // Prepare download in parallel
1982
+ std::vector<std::future<bool >> futures_download;
1983
+ for (int idx = 1 ; idx < n_split; idx++) {
1984
+ futures_download.push_back (std::async (std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool {
1985
+ char split_path[PATH_MAX] = {0 };
1986
+ llama_split_path (split_path, sizeof (split_path), split_prefix, download_idx, n_split);
1987
+
1988
+ char split_url[PATH_MAX] = {0 };
1989
+ llama_split_path (split_url, sizeof (split_url), split_url_prefix, download_idx, n_split);
1990
+
1991
+ auto * curl = curl_easy_init ();
1992
+ bool res = llama_download_file (curl, split_url, split_path);
1993
+ curl_easy_cleanup (curl);
1994
+
1995
+ return res;
1996
+ }, idx));
1997
+ }
1998
+
1999
+ // Wait for all downloads to complete
2000
+ for (auto &f : futures_download) {
2001
+ if (!f.get ()) {
2002
+ return NULL ;
2003
+ }
2004
+ }
2005
+ }
2006
+
1899
2007
return llama_load_model_from_file (path_model, params);
1900
2008
}
1901
2009
0 commit comments