diff --git a/Dockerfile b/Dockerfile
index 1f5376dc..232cbd5a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,10 @@
-FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build
+# Since we're using the nvidia/cuda base image, this requires nvidia-container-toolkit installed on the host system to pass through the drivers to the container.
+# see: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
+FROM nvidia/cuda:12.3.0-runtime-ubuntu22.04 AS final
 WORKDIR /app
 
 # Install Git and Git LFS
-RUN apt-get update && apt-get install -y curl
+RUN apt-get update && apt-get install -y curl wget
 RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && apt-get install -y git-lfs
 
 # Clone the Stable Diffusion 1.5 base model
@@ -11,7 +13,37 @@ RUN git clone https://huggingface.co/runwayml/stable-diffusion-v1-5 -b onnx
 # Clone the LCM Dreamshaper V7 model
 RUN git clone https://huggingface.co/TheyCallMeHex/LCM-Dreamshaper-V7-ONNX
 
+#need to install NVIDIA's gpg key before apt search will show up to date packages for cuda
+RUN wget -N -t 5 -T 10 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
+    && dpkg -i ./cuda-keyring_1.1-1_all.deb
+
+# install CUDA dependencies required according to `ldd libonnxruntime_providers_cuda.so`
+RUN apt-get update \
+    && apt-get install -y libcublaslt11 libcublas11 libcudnn8=8.9.1.23-1+cuda11.8 libcufft10 libcudart11.0
+
+# According to `ldd libortextensions.so` it depends on ssl 1.1 to run, and the dotnet/runtime-deps base image installs it which is why it works inside the dotnet base images.
+# Since we need access to the GPU to use the CUDA execution provider we need to use the nvidia/cuda base image instead.
+# The nvidia/cuda base image doesn't contain SSL 1.1, hence we have to manually install it like this ot satisfy the dependency.
+# This fixes the "The ONNX Runtime extensions library was not found" error.
+# See: https://stackoverflow.com/questions/72133316/libssl-so-1-1-cannot-open-shared-object-file-no-such-file-or-directory
+RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb && dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb
+
+# Need to install dotnet sdk since we're not using the dotnet/sdk base image.
+# Note: icu is also installed to help with globalization https://learn.microsoft.com/en-us/dotnet/core/extensions/globalization-icu
+RUN apt-get update \
+    && apt-get install -y dotnet-sdk-7.0 icu-devtools
+
+ENV \
+    # Enable detection of running in a container
+    DOTNET_RUNNING_IN_CONTAINER=true \
+    # Do not generate certificate
+    DOTNET_GENERATE_ASPNET_CERTIFICATE=false \
+    # Do not show first run text
+    DOTNET_NOLOGO=true \
+    # Skip extraction of XML docs - generally not useful within an image/container - helps performance
+    NUGET_XMLDOC_MODE=skip
+
 COPY . .
 RUN dotnet build OnnxStackCore.sln
 
-ENTRYPOINT ["dotnet", "test", "OnnxStackCore.sln"]
\ No newline at end of file
+ENTRYPOINT ["sh", "-c", "nvidia-smi && dotnet test OnnxStackCore.sln"]
\ No newline at end of file
diff --git a/OnnxStack.IntegrationTests/IntegrationTestCollection.cs b/OnnxStack.IntegrationTests/IntegrationTestCollection.cs
new file mode 100644
index 00000000..ccd0d7bb
--- /dev/null
+++ b/OnnxStack.IntegrationTests/IntegrationTestCollection.cs
@@ -0,0 +1,7 @@
+namespace OnnxStack.IntegrationTests;
+
+/// <summary>
+/// All integration tests need to go in a single collection, so tests in different classes run sequentially and not in parallel.
+/// </summary>
+[CollectionDefinition("IntegrationTests")]
+public class IntegrationTestCollection { }
\ No newline at end of file
diff --git a/OnnxStack.IntegrationTests/OnnxStack.IntegrationTests.csproj b/OnnxStack.IntegrationTests/OnnxStack.IntegrationTests.csproj
index 0d8a711a..250812fb 100644
--- a/OnnxStack.IntegrationTests/OnnxStack.IntegrationTests.csproj
+++ b/OnnxStack.IntegrationTests/OnnxStack.IntegrationTests.csproj
@@ -16,7 +16,7 @@
         <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
         <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
         <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.16.2" />
-        <PackageReference Include="Microsoft.ML.OnnxRuntime.Extensions" Version="0.9.0" />
+        <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" Version="1.16.2" />
         <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
         <PackageReference Include="xunit" Version="2.4.2" />
         <PackageReference Include="Xunit.Extensions.Logging" Version="1.1.0" />
diff --git a/OnnxStack.IntegrationTests/StableDiffusionTests.cs b/OnnxStack.IntegrationTests/StableDiffusionTests.cs
index 161c7ed9..73d0d9fd 100644
--- a/OnnxStack.IntegrationTests/StableDiffusionTests.cs
+++ b/OnnxStack.IntegrationTests/StableDiffusionTests.cs
@@ -1,4 +1,3 @@
-using System.Security.Cryptography;
 using FluentAssertions;
 using FluentAssertions.Execution;
 using Microsoft.Extensions.DependencyInjection;
@@ -13,13 +12,10 @@
 namespace OnnxStack.IntegrationTests;
 
 /// <summary>
-/// These tests just run on CPU execution provider for now, but could switch it to CUDA and run on GPU
-/// if the necessary work is done to setup the docker container to allow GPU passthrough to the container.
-/// See https://blog.roboflow.com/use-the-gpu-in-docker/ for an example of how to do this.
-///
-/// Can then also setup a self-hosted runner in Github Actions to run the tests on your own GPU as part of the CI/CD pipeline.
+/// These tests could be run via a self-hosted runner in Github Actions to run the tests on your own GPU as part of the CI/CD pipeline.
 /// Maybe something like https://www.youtube.com/watch?v=rVq-SCNyxVc
 /// </summary>
+[Collection("IntegrationTests")]
 public class StableDiffusionTests
 {
     private readonly IStableDiffusionService _stableDiffusion;
diff --git a/OnnxStack.IntegrationTests/Usings.cs b/OnnxStack.IntegrationTests/Usings.cs
index 8c927eb7..9af5aa1a 100644
--- a/OnnxStack.IntegrationTests/Usings.cs
+++ b/OnnxStack.IntegrationTests/Usings.cs
@@ -1 +1,4 @@
-global using Xunit;
\ No newline at end of file
+global using Xunit;
+
+// need all tests to run one at a time sequentially to not overwhelm the GPU
+[assembly: CollectionBehavior(DisableTestParallelization = true)]
\ No newline at end of file
diff --git a/OnnxStack.IntegrationTests/appsettings.json b/OnnxStack.IntegrationTests/appsettings.json
index f12635cf..3ba41c45 100644
--- a/OnnxStack.IntegrationTests/appsettings.json
+++ b/OnnxStack.IntegrationTests/appsettings.json
@@ -24,7 +24,7 @@
         "InterOpNumThreads": 0,
         "IntraOpNumThreads": 0,
         "ExecutionMode": "ORT_SEQUENTIAL",
-        "ExecutionProvider": "Cpu",
+        "ExecutionProvider": "Cuda",
         "ModelConfigurations": [
           {
             "Type": "Tokenizer",
@@ -65,7 +65,7 @@
         "InterOpNumThreads": 0,
         "IntraOpNumThreads": 0,
         "ExecutionMode": "ORT_SEQUENTIAL",
-        "ExecutionProvider": "Cpu",
+        "ExecutionProvider": "Cuda",
         "ModelConfigurations": [
           {
             "Type": "Tokenizer",
diff --git a/OnnxStackCore.sln b/OnnxStackCore.sln
index 810ecdef..f691cc93 100644
--- a/OnnxStackCore.sln
+++ b/OnnxStackCore.sln
@@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SolutionItems", "SolutionIt
 		.gitignore = .gitignore
 		docker-compose.yml = docker-compose.yml
 		README.md = README.md
+		run-integration-tests-cuda.sh = run-integration-tests-cuda.sh
 	EndProjectSection
 EndProject
 Global
diff --git a/README.md b/README.md
index c0556e24..30813e74 100644
--- a/README.md
+++ b/README.md
@@ -157,7 +157,21 @@ Other `Microsoft.ML.OnnxRuntime.*` executors like `Cuda` may work but are untest
 
 `DirectML` > 10GB VRAM
 
-
+## Troubleshooting
+
+ - I'm running on linux but it's not working citing:`The ONNX Runtime extensions library was not found`?
+   - It's having a problem loading `libortextensions.so`
+   - From the project root run `find -name "libortextensions.so"` to locate that file
+   - Then run `ldd libortextensions.so` against it to see what dependencies it needs versus what your system has.
+   - It has a dependency on SSL 1.1 which was removed from Ubuntu based OSes and causes this error.
+   - It can be remedied by manually installing the dependency. 
+   - See: https://stackoverflow.com/questions/72133316/libssl-so-1-1-cannot-open-shared-object-file-no-such-file-or-directory
+ - I've installed `Microsoft.ML.OnnxRuntime` and `Microsoft.ML.OnnxRuntime.Gpu` into my project and set the execution provider to `Cuda`, but it's complaining it can't find an entry point for CUDA?
+   - `System.EntryPointNotFoundException : Unable to find an entry point named 'OrtSessionOptionsAppendExecutionProvider_CUDA' in shared library 'onnxruntime'`
+   - Adding both `Microsoft.ML.OnnxRuntime` AND `Microsoft.ML.OnnxRuntime.Gpu` at the same time causes this.
+   - Remove `Microsoft.ML.OnnxRuntime` and try again.
+ - I'm trying to run via CUDA execution provider but it's complaining about missing `libcublaslt11`, `libcublas11`, or `libcudnn8`?
+   - Aside from just the NVIDIA Drivers you also need to install CUDA, and cuDNN.
 
 ## Contribution
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 2f93a27f..e02e227a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,5 +3,12 @@ version: '3.7'
 services:
   app:
     build: .
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
     volumes:
       - "./docker-test-output:/app/OnnxStack.IntegrationTests/bin/Debug/net7.0/images"
\ No newline at end of file
diff --git a/run-integration-tests-cuda.sh b/run-integration-tests-cuda.sh
new file mode 100755
index 00000000..9cc0a5e1
--- /dev/null
+++ b/run-integration-tests-cuda.sh
@@ -0,0 +1,7 @@
+#! /bin/bash
+# running this requires:
+# - nvidia GPU with sufficient VRAM
+# - nvidia drivers installed on the host system
+# - nvidia-container-toolkit installed on the host system (see: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+# - nvidia-smi also reports peak VRAM close 24GB while running the tests
+docker-compose up --build
\ No newline at end of file