server: ci: windows build and tests (ggml-org#5968)

phymbert · cebtenzzre · hodlen · commit 1b6836560250 · 2024-04-02T00:15:08.000+08:00
* server: ci: windows build and tests

* server: ci: remove tmp push branch

* server: ci: EOF EOL

* Use builti

Co-authored-by: Jared Van Bortel &lt;cebtenzzre@gmail.com&gt;

* server: tests: server graceful shutdown, then kill, then hard kill

* server: tests: remove python2 unicode string

* server: tests: remove wrong comment on server starting,  close_fds is always true

* server: tests: server kill, if pid exists

* server: tests: remove dependency to killall

* server: tests: ci windows: pid exists better handling

---------

Co-authored-by: Jared Van Bortel &lt;cebtenzzre@gmail.com&gt;
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -47,6 +47,8 @@ jobs:
       - name: Clone
         id: checkout
         uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
 
       - name: Dependencies
         id: depends
@@ -58,7 +60,6 @@ jobs:
             cmake \
             python3-pip \
             wget \
-            psmisc \
             language-pack-en
 
       - name: Build
@@ -90,3 +91,46 @@ jobs:
         run: |
           cd examples/server/tests
           PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
+
+
+  server-windows:
+    runs-on: windows-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake ..  -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
+          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
+
+      - name: Python setup
+        id: setup_python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Tests dependencies
+        id: test_dependencies
+        run: |
+          pip install -r examples/server/tests/requirements.txt
+
+      - name: Tests
+        id: server_integration_tests
+        run: |
+          cd examples/server/tests
+          behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
+
+      - name: Slow tests
+        id: server_integration_tests_slow
+        if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
+        run: |
+          cd examples/server/tests
+          behave.exe --stop --no-skipped --no-capture --tags slow
diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py
@@ -1,9 +1,10 @@
+import errno
 import os
 import socket
 import subprocess
 import time
 from contextlib import closing
-from signal import SIGKILL
+import signal
 
 
 def before_scenario(context, scenario):
@@ -29,44 +30,71 @@ def after_scenario(context, scenario):
                     for line in f:
                         print(line)
         if not is_server_listening(context.server_fqdn, context.server_port):
-            print("\x1b[33;101mERROR: Server stopped listening\x1b[0m")
+            print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
 
     if not pid_exists(context.server_process.pid):
         assert False, f"Server not running pid={context.server_process.pid} ..."
 
-    print(f"stopping server pid={context.server_process.pid} ...")
-    context.server_process.kill()
+    server_graceful_shutdown(context)
+
     # Wait few for socket to free up
     time.sleep(0.05)
 
     attempts = 0
-    while is_server_listening(context.server_fqdn, context.server_port):
-        print(f"stopping server pid={context.server_process.pid} ...")
-        os.kill(context.server_process.pid, SIGKILL)
+    while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
+        server_kill(context)
         time.sleep(0.1)
         attempts += 1
         if attempts > 5:
-            print(f"Server dangling exits, killing all {context.server_path} ...")
-            process = subprocess.run(['killall', '-9', context.server_path],
-                                     stderr=subprocess.PIPE,
-                                     universal_newlines=True)
-            print(process)
+            server_kill_hard(context)
+
+
+def server_graceful_shutdown(context):
+    print(f"shutting down server pid={context.server_process.pid} ...\n")
+    if os.name == 'nt':
+        os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
+    else:
+        os.kill(context.server_process.pid, signal.SIGINT)
+
+
+def server_kill(context):
+    print(f"killing server pid={context.server_process.pid} ...\n")
+    context.server_process.kill()
+
+
+def server_kill_hard(context):
+    pid = context.server_process.pid
+    path = context.server_path
+
+    print(f"Server dangling exits, hard killing force {pid}={path}...\n")
+    if os.name == 'nt':
+        process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode()
+        print(process)
+    else:
+        os.kill(-pid, signal.SIGKILL)
 
 
 def is_server_listening(server_fqdn, server_port):
     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
         result = sock.connect_ex((server_fqdn, server_port))
-        return result == 0
+        _is_server_listening = result == 0
+        if _is_server_listening:
+            print(f"server is listening on {server_fqdn}:{server_port}...\n")
+        return _is_server_listening
 
 
 def pid_exists(pid):
     """Check whether pid exists in the current process table."""
-    import errno
     if pid < 0:
         return False
-    try:
-        os.kill(pid, 0)
-    except OSError as e:
-        return e.errno == errno.EPERM
+    if os.name == 'nt':
+        output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode()
+        print(output)
+        return "No tasks are running" not in output
     else:
-        return True
+        try:
+            os.kill(pid, 0)
+        except OSError as e:
+            return e.errno == errno.EPERM
+        else:
+            return True
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
@@ -47,7 +47,7 @@ Feature: llama.cpp server
     Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
     """
     And   a completion request with no api error
-    Then  64 tokens are predicted matching fun|Annaks|popcorns
+    Then  64 tokens are predicted matching fun|Annaks|popcorns|pictry
     And   the completion is  truncated
     And   109 prompt tokens are processed
 
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py