abetlen
diff --git a/‎.github/workflows/build-and-release.yaml
+5-3 b/‎.github/workflows/build-and-release.yaml
+5-3
diff --git a/‎.github/workflows/publish-to-test.yaml
+3-2 b/‎.github/workflows/publish-to-test.yaml
+3-2
diff --git a/‎.github/workflows/publish.yaml
+3-2 b/‎.github/workflows/publish.yaml
+3-2
diff --git a/‎.github/workflows/test-pypi.yaml
+3-3 b/‎.github/workflows/test-pypi.yaml
+3-3
diff --git a/‎.github/workflows/test.yaml
+12-12 b/‎.github/workflows/test.yaml
+12-12
diff --git a/‎CMakeLists.txt
+26-22 b/‎CMakeLists.txt
+26-22
diff --git a/‎Makefile
+11-7 b/‎Makefile
+11-7
diff --git a/‎README.md
+9-8 b/‎README.md
+9-8
diff --git a/‎docker/cuda_simple/Dockerfile
+1-1 b/‎docker/cuda_simple/Dockerfile
+1-1
diff --git a/‎docker/simple/Dockerfile
+2-2 b/‎docker/simple/Dockerfile
+2-2
diff --git a/‎docs/index.md
+4-1 b/‎docs/index.md
+4-1
diff --git a/‎docs/install/macos.md
+1-1 b/‎docs/install/macos.md
+1-1
diff --git a/‎llama_cpp/__init__.py
+1-1 b/‎llama_cpp/__init__.py
+1-1
@@ -26,7 +26,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip pytest cmake scikit-build setuptools
+          python -m pip install --upgrade pip
+          python -m pip install -e .[all]
 
       - name: Build wheels
         run: python -m cibuildwheel --output-dir wheelhouse
@@ -46,10 +47,11 @@ jobs:
       - uses: actions/setup-python@v3
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip pytest cmake scikit-build setuptools
+          python -m pip install --upgrade pip build
+          python -m pip install -e .[all]
       - name: Build source distribution
         run: |
-          python setup.py sdist
+          python -m build --sdist
       - uses: actions/upload-artifact@v3
         with:
           path: ./dist/*.tar.gz
 
@@ -19,10 +19,11 @@ jobs:
         python-version: "3.8"
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip pytest cmake scikit-build setuptools
+        python3 -m pip install --upgrade pip build
+        python3 -m pip install -e .[all]
     - name: Build source distribution
       run: |
-        python setup.py sdist
+        python3 -m build --sdist
     - name: Publish to Test PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
 
@@ -19,10 +19,11 @@ jobs:
         python-version: "3.8"
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip pytest cmake scikit-build setuptools
+        python3 -m pip install --upgrade pip build
+        python3 -m pip install -e .[all]
     - name: Build source distribution
       run: |
-        python setup.py sdist
+        python3 -m build --sdist
     - name: Publish distribution to PyPI
       # TODO: move to tag based releases
       # if: startsWith(github.ref, 'refs/tags')
 
@@ -18,7 +18,7 @@ jobs:
       - name: Install dependencies
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install --verbose llama-cpp-python[server,test]
+          python3 -m pip install --verbose llama-cpp-python[all]
       - name: Test with pytest
         run: |
           python3 -c "import llama_cpp"
@@ -38,7 +38,7 @@ jobs:
       - name: Install dependencies
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install --verbose llama-cpp-python[server,test]
+          python3 -m pip install --verbose llama-cpp-python[all]
       - name: Test with pytest
         run: |
           python3 -c "import llama_cpp"
@@ -58,7 +58,7 @@ jobs:
       - name: Install dependencies
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install --verbose llama-cpp-python[server,test]
+          python3 -m pip install --verbose llama-cpp-python[all]
       - name: Test with pytest
         run: |
           python3 -c "import llama_cpp"
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
       - uses: actions/checkout@v3
@@ -26,18 +26,18 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn pydantic-settings
-          pip install . -v
+          python3 -m pip install --upgrade pip
+          python3 -m pip install .[all] -v
       - name: Test with pytest
         run: |
-          pytest
+          python3 -m pytest
 
   build-windows:
 
     runs-on: windows-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
       - uses: actions/checkout@v3
@@ -49,18 +49,18 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn pydantic-settings
-          pip install . -v
+          python3 -m pip install --upgrade pip
+          python3 -m pip install .[all] -v
       - name: Test with pytest
         run: |
-          pytest
+          python3 -m pytest
 
   build-macos:
 
     runs-on: macos-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
       - uses: actions/checkout@v3
@@ -72,8 +72,8 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn pydantic-settings
-          pip install . -v
+          python3 -m pip install --upgrade pip
+          python3 -m pip install .[all] --verbose
       - name: Test with pytest
         run: |
-          pytest
+          python3 -m pytest
@@ -2,33 +2,37 @@ cmake_minimum_required(VERSION 3.4...3.22)
 
 project(llama_cpp)
 
-option(FORCE_CMAKE "Force CMake build of Python bindings" OFF)
+option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
 
-set(FORCE_CMAKE $ENV{FORCE_CMAKE})
 
-if (UNIX AND NOT FORCE_CMAKE)
-    add_custom_command(
-        OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
-        COMMAND make libllama.so
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp
-    )
-    add_custom_target(
-        run ALL
-        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
-    )
-    install(
-        FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
-        DESTINATION llama_cpp
-    )
-else()
+if (LLAMA_BUILD)
     set(BUILD_SHARED_LIBS "On")
+    if (APPLE)
+        # Need to disable these llama.cpp flags on Apple
+        # otherwise users may encounter invalid instruction errors
+        set(LLAMA_AVX "Off" CACHE BOOL "llama: enable AVX" FORCE)
+        set(LLAMA_AVX2 "Off" CACHE BOOL "llama: enable AVX2" FORCE)
+        set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
+        set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mtune=native")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
+    endif()
     add_subdirectory(vendor/llama.cpp)
     install(
         TARGETS llama 
-        LIBRARY DESTINATION llama_cpp
-        RUNTIME DESTINATION llama_cpp
-        ARCHIVE DESTINATION llama_cpp
-        FRAMEWORK DESTINATION llama_cpp
-        RESOURCE DESTINATION llama_cpp
+        LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
+        RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
+        ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
+        FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
+        RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
+    )
+    # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
+    install(
+        TARGETS llama 
+        LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+        RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+        ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+        FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+        RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
     )
 endif()
@@ -5,26 +5,30 @@ update:
 update.vendor:
 	cd vendor/llama.cpp && git pull origin master
 
+deps:
+	python3 -m pip install pip
+	python3 -m pip install -e ".[all]"
+
 build:
-	python3 setup.py develop
+	python3 -m pip install -e .
 
 build.cuda:
-	CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
+	CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install -e .
 
 build.opencl:
-	CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 python3 setup.py develop
+	CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install -e .
 
 build.openblas:
-	CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
+	CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install -e .
 
 build.blis:
-	CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" FORCE_CMAKE=1 python3 setup.py develop
+	CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" python3 -m pip install -e .
 
 build.metal:
-	CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 python3 setup.py develop
+	CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install -e .
 
 build.sdist:
-	python3 setup.py sdist
+	python3 -m build --sdist
 
 deploy.pypi:
 	python3 -m twine upload dist/*
 
@@ -1,4 +1,4 @@
-# 🦙 Python Bindings for `llama.cpp`
+# 🦙 Python Bindings for [`llama.cpp`](https://github.com/ggerganov/llama.cpp)
 
 [![Documentation Status](https://readthedocs.org/projects/llama-cpp-python/badge/?version=latest)](https://llama-cpp-python.readthedocs.io/en/latest/?badge=latest)
 [![Tests](https://github.com/abetlen/llama-cpp-python/actions/workflows/test.yaml/badge.svg?branch=main)](https://github.com/abetlen/llama-cpp-python/actions/workflows/test.yaml)
@@ -48,7 +48,6 @@ Otherwise, while installing it will build the llama.ccp x86 version which will b
 ### Installation with Hardware Acceleration
 
 `llama.cpp` supports multiple BLAS backends for faster processing.
-Use the `FORCE_CMAKE=1` environment variable to force the use of `cmake` and install the pip package for the desired BLAS backend.
 
 To install with OpenBLAS, set the `LLAMA_BLAS and LLAMA_BLAS_VENDOR` environment variables before installing:
 
@@ -208,24 +207,26 @@ If you find any issues with the documentation, please open an issue or submit a
 
 This package is under active development and I welcome any contributions.
 
-To get started, clone the repository and install the package in development mode:
+To get started, clone the repository and install the package in editable / development mode:
 
 ```bash
 git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python.git
 cd llama-cpp-python
 
+# Upgrade pip (required for editable mode)
+pip install --upgrade pip
+
 # Install with pip
 pip install -e .
 
 # if you want to use the fastapi / openapi server
 pip install -e .[server]
 
-# If you're a poetry user, installing will also include a virtual environment
-poetry install --all-extras
-. .venv/bin/activate
+# to install all optional dependencies
+pip install -e .[all]
 
-# Will need to be re-run any time vendor/llama.cpp is updated
-python3 setup.py develop
+# to clear the local build cache
+make clean
 ```
 
 # How does this compare to other Python bindings of `llama.cpp`?
 
@@ -21,7 +21,7 @@ ENV LLAMA_CUBLAS=1
 RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings
 
 # Install llama-cpp-python (build with cuda)
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
+RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
 
 # Run the server
 CMD python3 -m llama_cpp.server
@@ -19,9 +19,9 @@ RUN mkdir /app
 WORKDIR /app
 COPY . /app
 
-RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings
+RUN python3 -m pip install --upgrade pip
 
-RUN make build && make clean
+RUN make deps && make build && make clean
 
 # Set environment variable for the host
 ENV HOST=0.0.0.0
 
@@ -82,9 +82,12 @@ To get started, clone the repository and install the package in development mode
 
 ```bash
 git clone [email protected]:abetlen/llama-cpp-python.git
+cd llama-cpp-python
 git submodule update --init --recursive
 # Will need to be re-run any time vendor/llama.cpp is updated
-python3 setup.py develop
+
+pip install --upgrade pip
+pip install -e .[all]
 ```
 
 ## License
 
@@ -30,7 +30,7 @@ conda activate llama
     *(you needed xcode installed in order pip to build/compile the C++ code)*
 ```
 pip uninstall llama-cpp-python -y
-CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir
+CMAKE_ARGS="-DLLAMA_METAL=on" pip install -U llama-cpp-python --no-cache-dir
 pip install 'llama-cpp-python[server]'
 
 # you should now have llama-cpp-python v0.1.62 or higher installed
 
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-from .version import __version__
+__version__ = "0.2.0"