diff --git a/helm/Chart.yaml b/helm/Chart.yaml
index fb905ebc5..9616a22c2 100644
--- a/helm/Chart.yaml
+++ b/helm/Chart.yaml
@@ -15,20 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
-
-keywords:
-  - vllm
-  - lora
-  - adapter
-home: https://github.com/vllm-project/production-stack
-sources:
-  - https://github.com/vllm-project/production-stack
+version: 0.1.1
 
 maintainers:
   - name: apostac
-
-# Specifies that CRDs should be created/updated first
-annotations:
-  "helm.sh/hook": pre-install,pre-upgrade
-  "helm.sh/hook-weight": "-5"
diff --git a/helm/templates/deployment-lora-controller.yaml b/helm/templates/deployment-lora-controller.yaml
index c89d3143d..c01b4b742 100644
--- a/helm/templates/deployment-lora-controller.yaml
+++ b/helm/templates/deployment-lora-controller.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.loraController.enabled -}}
+{{- if and .Values.loraController .Values.loraController.enabled -}}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/helm/templates/deployment-vllm-multi.yaml b/helm/templates/deployment-vllm-multi.yaml
index 7ca02007b..1aa831c9b 100644
--- a/helm/templates/deployment-vllm-multi.yaml
+++ b/helm/templates/deployment-vllm-multi.yaml
@@ -54,6 +54,7 @@ spec:
       containers:
         - name: "vllm"
           image: "{{ required "Required value 'modelSpec.repository' must be defined !" $modelSpec.repository }}:{{ required "Required value 'modelSpec.tag' must be defined !" $modelSpec.tag }}"
+
           command:
           - "vllm"
           - "serve"
@@ -65,6 +66,13 @@ spec:
           {{- if $modelSpec.enableLoRA }}
           - "--enable-lora"
           {{- end }}
+          {{- if $modelSpec.enableTool }}
+          - "--enable-auto-tool-choice"
+          {{- end }}
+          {{- if $modelSpec.toolCallParser }}
+          - "--tool-call-parser"
+          - {{ $modelSpec.toolCallParser | quote }}
+          {{- end }}
           {{- with $modelSpec.vllmConfig }}
           {{-   if hasKey . "enableChunkedPrefill" }}
           - "--enable-chunked-prefill"
@@ -99,7 +107,7 @@ spec:
           {{- end }}
           {{- if $modelSpec.chatTemplate }}
           - "--chat-template"
-          - "/chat_templates/chat-template.jinga"
+          - {{ $modelSpec.chatTemplate | quote }}
           {{- end }}
           {{- if .Values.servingEngineSpec.containerSecurityContext }}
           securityContext:
@@ -194,10 +202,6 @@ spec:
           - name: {{ .Release.Name }}-storage
             mountPath: /data
           {{- end }}
-          {{- if .Values.loraController.enabled }}
-          - name: shared-model-storage
-            mountPath: /models
-          {{- end }}
           {{- with $modelSpec.vllmConfig }}
           {{- if hasKey $modelSpec.vllmConfig "tensorParallelSize"}}
           - name: shm
@@ -205,12 +209,12 @@ spec:
           {{- end}}
           {{- end}}
           {{- if $modelSpec.chatTemplate }}
-          - name: {{ .Release.Name }}-chat-templates
-            mountPath: /chat_templates
-          {{- end}}
+          - name: vllm-templates
+            mountPath: /templates
+          {{- end }}
           {{- if hasKey $modelSpec "extraVolumeMounts" }}
           {{- toYaml $modelSpec.extraVolumeMounts | nindent 10 }}
-          {{- end}}
+          {{- end }}
       {{- if $modelSpec.imagePullSecret }}
       imagePullSecrets:
         - name: {{ $modelSpec.imagePullSecret }}
@@ -223,11 +227,6 @@ spec:
           persistentVolumeClaim:
             claimName: "{{ .Release.Name }}-{{$modelSpec.name}}-storage-claim"
         {{- end }}
-        {{- if .Values.loraController.enabled }}
-        - name: shared-model-storage
-          persistentVolumeClaim:
-            claimName: {{ .Release.Name }}-shared-storage-claim
-        {{- end }}
         {{- with $modelSpec.vllmConfig }}
         {{- if hasKey $modelSpec.vllmConfig "tensorParallelSize"}}
         - name: shm
@@ -237,9 +236,15 @@ spec:
         {{- end}}
         {{- end}}
         {{- if $modelSpec.chatTemplate}}
+        {{- if hasKey $modelSpec "chatTemplateConfigMap" }}
         - name: {{ .Release.Name }}-chat-templates
           configMap:
             name: "{{ .Release.Name }}-{{$modelSpec.name}}-chat-templates"
+        {{- else }}
+        - name: vllm-templates
+          persistentVolumeClaim:
+            claimName: vllm-templates-pvc
+        {{- end }}
         {{- end}}
         {{- if hasKey $modelSpec "extraVolumes" }}
         {{- toYaml $modelSpec.extraVolumes | nindent 8 }}
@@ -266,7 +271,7 @@ spec:
             {{- toYaml . | nindent 12 }}
             {{- end }}
       {{- end }}
-{{- if $modelSpec.chatTemplate }}
+{{- if and $modelSpec.chatTemplate (hasKey $modelSpec "chatTemplateConfigMap") }}
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -274,8 +279,8 @@ metadata:
   name: "{{ .Release.Name }}-{{$modelSpec.name}}-chat-templates"
   namespace: "{{ .Release.Namespace }}"
 data:
-  chat-template.jinga: |-
-    {{ $modelSpec.chatTemplate}}
+  {{ $modelSpec.chatTemplate }}: |-
+    {{ $modelSpec.chatTemplateConfigMap }}
 {{- end }}
 {{- end }}
 ---
diff --git a/helm/templates/loraadapter-crd.yaml b/helm/templates/loraadapter-crd.yaml
index c08d10dd6..172d985a5 100644
--- a/helm/templates/loraadapter-crd.yaml
+++ b/helm/templates/loraadapter-crd.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.loraController.enabled -}}
+{{- if and .Values.loraController .Values.loraController.enabled -}}
 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
diff --git a/helm/templates/rbac-lora-controller.yaml b/helm/templates/rbac-lora-controller.yaml
index 77c2a6e2a..0f4bf27d4 100644
--- a/helm/templates/rbac-lora-controller.yaml
+++ b/helm/templates/rbac-lora-controller.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.loraController.enabled -}}
+{{- if and .Values.loraController .Values.loraController.enabled -}}
 ---
 apiVersion: v1
 kind: ServiceAccount
diff --git a/helm/templates/shared-storage.yaml b/helm/templates/shared-storage.yaml
index 688cfc637..0e769f110 100644
--- a/helm/templates/shared-storage.yaml
+++ b/helm/templates/shared-storage.yaml
@@ -1,4 +1,4 @@
-{{- if or .Values.sharedStorage.enabled .Values.loraController.enabled }}
+{{- if or (and .Values.loraController .Values.loraController.enabled) (and .Values.sharedStorage .Values.sharedStorage.enabled) }}
 ---
 apiVersion: v1
 kind: PersistentVolume
diff --git a/helm/values.yaml b/helm/values.yaml
index ab727ca6e..a8b23b979 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -329,41 +329,3 @@ routerSpec:
   #  httpGet:
   #    # -- Path to access on the HTTP server
   #
-
-loraController:
-  enabled: false
-  replicaCount: 1
-  image:
-    repository: lmcache/lora-controller
-    tag: latest
-    pullPolicy: Always
-  resources:
-    limits:
-      cpu: 500m
-      memory: 512Mi
-    requests:
-      cpu: 100m
-      memory: 128Mi
-  podAnnotations: {}
-  podSecurityContext: {}
-  securityContext: {}
-  nodeSelector: {}
-  tolerations: []
-  affinity: {}
-  extraEnv: []
-  extraVolumes: []
-  extraVolumeMounts: []
-  imagePullSecrets: []
-
-# Shared storage configuration for LoRA adapters
-sharedStorage:
-  enabled: false
-  storageClass: "standard"
-  size: "100Gi"
-  accessModes:
-    - ReadWriteMany
-  # Use either hostPath or nfs configuration
-  hostPath: ""
-  nfs:
-    server: ""
-    path: ""
diff --git a/scripts/setup_vllm_templates.sh b/scripts/setup_vllm_templates.sh
new file mode 100755
index 000000000..4c72d4146
--- /dev/null
+++ b/scripts/setup_vllm_templates.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Configuration
+PV_NAME="vllm-templates-pv"  # Fixed name for template storage
+PVC_NAME="vllm-templates-pvc"  # Fixed name for template storage
+STORAGE_SIZE="1Gi"
+STORAGE_CLASS="standard"
+HOST_TEMPLATES_DIR="/mnt/templates"  # This is where the PV will be mounted on the host
+TEMP_POD_NAME="vllm-templates-setup"
+
+echo "Setting up vLLM templates..."
+echo "Using PV: $PV_NAME"
+echo "Using PVC: $PVC_NAME"
+
+# Check if host directory exists and create if needed
+echo "Checking host directory at $HOST_TEMPLATES_DIR..."
+if [ ! -d "$HOST_TEMPLATES_DIR" ]; then
+    echo "Creating host directory at $HOST_TEMPLATES_DIR..."
+    sudo mkdir -p "$HOST_TEMPLATES_DIR"
+    sudo chmod 777 "$HOST_TEMPLATES_DIR"  # Allow read/write access
+fi
+
+# Verify directory permissions
+if [ ! -w "$HOST_TEMPLATES_DIR" ]; then
+    echo "Error: No write permission to $HOST_TEMPLATES_DIR"
+    echo "Please ensure you have sudo access or the directory has proper permissions"
+    exit 1
+fi
+
+# Create PersistentVolume with ReadWriteMany
+echo "Creating PersistentVolume..."
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: $PV_NAME
+spec:
+  capacity:
+    storage: $STORAGE_SIZE
+  accessModes:
+    - ReadWriteMany
+  hostPath:
+    path: /templates
+    type: DirectoryOrCreate
+  storageClassName: $STORAGE_CLASS
+EOF
+
+# Create PersistentVolumeClaim with ReadWriteMany
+echo "Creating PersistentVolumeClaim..."
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: $PVC_NAME
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: $STORAGE_SIZE
+  storageClassName: $STORAGE_CLASS
+EOF
+
+# Create temporary pod to clone and extract templates
+echo "Creating temporary pod to download templates..."
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Pod
+metadata:
+  name: $TEMP_POD_NAME
+spec:
+  containers:
+  - name: template-setup
+    image: ubuntu:latest
+    command: ["/bin/bash", "-c"]
+    args:
+    - |
+      apt-get update && apt-get install -y git
+      git clone https://github.com/vllm-project/vllm.git /tmp/vllm
+      cp /tmp/vllm/examples/*.jinja /templates/
+      rm -rf /tmp/vllm
+    volumeMounts:
+    - name: templates-volume
+      mountPath: /templates
+  volumes:
+  - name: templates-volume
+    persistentVolumeClaim:
+      claimName: $PVC_NAME
+  restartPolicy: Never
+EOF
+
+# Wait for pod to complete
+echo "Waiting for template download to complete..."
+kubectl wait --for=jsonpath='{.status.phase}'=Succeeded pod/vllm-templates-setup  --timeout=300s
+# Delete the temporary pod
+echo "Cleaning up temporary pod..."
+kubectl delete pod $TEMP_POD_NAME
+
+# Verify the setup
+echo "Verifying setup..."
+kubectl get pv $PV_NAME
+kubectl get pvc $PVC_NAME
+
+# List downloaded templates
+echo "Downloaded templates:"
+ls -l "$HOST_TEMPLATES_DIR"
+
+echo "Setup complete! The templates are now available in the PersistentVolume."
+echo "Host path: $HOST_TEMPLATES_DIR"
+echo "You can now deploy the vLLM stack with tool calling support."
diff --git a/src/examples/tool_calling_example.py b/src/examples/tool_calling_example.py
new file mode 100644
index 000000000..41c470836
--- /dev/null
+++ b/src/examples/tool_calling_example.py
@@ -0,0 +1,63 @@
+import json
+
+import openai
+
+
+def get_weather(location: str, unit: str):
+    """Mock weather function for demonstration."""
+    return f"Getting the weather for {location} in {unit}..."
+
+
+def main():
+    # Configure OpenAI
+    openai.api_base = "http://localhost:8000/v1"
+    openai.api_key = "dummy"  # Not needed for local vLLM server
+
+    # Define the tools that the model can use
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City and state, e.g., 'San Francisco, CA'",
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "The unit of temperature to use",
+                        },
+                    },
+                    "required": ["location", "unit"],
+                },
+            },
+        }
+    ]
+
+    # Make a request to the model
+    response = openai.ChatCompletion.create(
+        model="meta-llama/Llama-3.1-8B-Instruct",  # Use the model we deployed
+        messages=[
+            {"role": "user", "content": "What's the weather like in San Francisco?"}
+        ],
+        tools=tools,
+        tool_choice="auto",
+    )
+
+    # Extract and process the tool call
+    tool_call = response.choices[0].message.tool_calls[0].function
+    print(f"Function called: {tool_call.name}")
+    print(f"Arguments: {tool_call.arguments}")
+
+    # Execute the tool with the provided arguments
+    result = get_weather(**json.loads(tool_call.arguments))
+    print(f"Result: {result}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tutorials/13-tool-enabled-installation.md b/tutorials/13-tool-enabled-installation.md
new file mode 100644
index 000000000..73dfc31d8
--- /dev/null
+++ b/tutorials/13-tool-enabled-installation.md
@@ -0,0 +1,182 @@
+# Tutorial: Setting up vLLM with Tool Calling Support
+
+## Introduction
+
+This tutorial guides you through setting up the vLLM Production Stack with tool calling support using the Llama-3.1-8B-Instruct model. This setup enables your model to interact with external tools and functions through a structured interface.
+
+## Prerequisites
+
+1. All prerequisites from the [minimal installation tutorial](01-minimal-helm-installation.md)
+2. A Hugging Face account with access to Llama-3.1-8B-Instruct
+3. Accepted terms for meta-llama/Llama-3.1-8B-Instruct on Hugging Face
+4. A valid Hugging Face token
+5. Python 3.7+ installed on your local machine
+6. The `openai` Python package installed (`pip install openai`)
+7. Access to a Kubernetes cluster with storage provisioner support
+
+## Steps
+
+### 1. Set up vLLM Templates and Storage
+
+First, run the setup script to download templates and create the necessary Kubernetes resources:
+
+```bash
+# Make the script executable
+chmod +x scripts/setup_vllm_templates.sh
+
+# Run the setup script
+./scripts/setup_vllm_templates.sh
+```
+
+This script will:
+
+1. Download the required templates from the vLLM repository
+2. Create a PersistentVolume for storing the templates
+3. Create a PersistentVolumeClaim for accessing the templates
+4. Verify the setup is complete
+
+The script uses consistent naming that matches the deployment configuration:
+
+- PersistentVolume: `vllm-templates-pv`
+- PersistentVolumeClaim: `vllm-templates-pvc`
+
+### 2. Set up Hugging Face Credentials
+
+Create a Kubernetes secret with your Hugging Face token:
+
+```bash
+kubectl create secret generic huggingface-credentials \
+  --from-literal=HUGGING_FACE_HUB_TOKEN=your_token_here
+```
+
+### 3. Deploy vLLM Instance with Tool Calling Support
+
+#### 3.1: Use the Example Configuration
+
+We'll use the example configuration file located at `tutorials/assets/values-08-tool-enabled.yaml`. This file contains all the necessary settings for enabling tool calling:
+
+```yaml
+servingEngineSpec:
+  runtimeClassName: ""
+  modelSpec:
+  - name: "llama3-8b"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Tool calling configuration
+    enableTool: true
+    toolCallParser: "llama3_json"  # Parser to use for tool calls (e.g., "llama3_json" for Llama models)
+    chatTemplate: "tool_chat_template_llama3.1_json.jinja"  # Template file name (will be mounted at /vllm/templates)
+
+    # Mount Hugging Face credentials
+    env:
+      - name: HUGGING_FACE_HUB_TOKEN
+        valueFrom:
+          secretKeyRef:
+            name: huggingface-credentials
+            key: HUGGING_FACE_HUB_TOKEN
+
+    replicaCount: 1
+
+    # Resource requirements for Llama-3.1-8B-Instruct
+    requestCPU: 8
+    requestMemory: "32Gi"
+    requestGPU: 1
+```
+
+> **Note**: The tool calling configuration is now simplified:
+
+> - `enableTool: true` enables the feature
+> - `toolCallParser`: specifies how the model's tool calls are parsed (using "llama3_json" for Llama-3 models)
+> - `chatTemplate`: specifies the template file name (will be mounted at `/vllm/templates/`)
+
+> The chat templates are managed through a PersistentVolume that we created in step 1, which provides several benefits:
+
+> - Templates are downloaded once and stored persistently
+> - Templates can be shared across multiple deployments
+> - Templates can be updated by updating the files in the PersistentVolume
+> - Templates are version controlled with the vLLM repository
+
+#### 3.2: Deploy the Helm Chart
+
+```bash
+# Add the vLLM Helm repository if you haven't already
+helm repo add vllm https://vllm-project.github.io/production-stack
+
+# Deploy the vLLM stack with tool calling support using the example configuration
+helm install vllm-tool vllm/vllm-stack -f tutorials/assets/values-08-tool-enabled.yaml
+```
+
+The deployment will:
+
+1. Use the PersistentVolume we created in step 1 to access the templates
+2. Mount the templates at `/vllm/templates` in the container
+3. Configure the model to use the specified template for tool calling
+
+You can verify the deployment with:
+
+```bash
+# Check the deployment status
+kubectl get deployments
+
+# Check the pods
+kubectl get pods
+
+# Check the logs
+kubectl logs -f deployment/vllm-tool-llama3-8b-deployment-vllm
+```
+
+### 4. Test Tool Calling Setup
+
+Now that the deployment is running, let's test the tool calling functionality using the example script.
+
+#### 4.1: Port Forward the Router Service
+
+First, we need to set up port forwarding to access the router service:
+
+```bash
+# Get the service name
+kubectl get svc
+
+# Set up port forwarding to the router service
+kubectl port-forward svc/vllm-tool-router-service 8000:80
+```
+
+#### 4.2: Run the Example Script
+
+In a new terminal, run the example script to test tool calling:
+
+```bash
+# Navigate to the examples directory
+cd src/examples
+
+# Run the example script
+python tool_calling_example.py
+```
+
+The script will:
+
+1. Connect to the vLLM service through the port-forwarded endpoint
+2. Send a test query asking about the weather
+3. Demonstrate the model's ability to:
+   - Understand the available tools
+   - Make appropriate tool calls
+   - Process the tool responses
+
+Expected output should look something like:
+
+```text
+Function called: get_weather
+Arguments: {"location": "San Francisco, CA", "unit": "celsius"}
+Result: Getting the weather for San Francisco, CA in celsius...
+```
+
+This confirms that:
+
+1. The vLLM service is running correctly
+2. Tool calling is properly enabled
+3. The model can understand and use the defined tools
+4. The template system is working as expected
+
+> **Note**: The example uses a mock weather function for demonstration. In a real application, you would replace this with actual API calls to weather services.
diff --git a/tutorials/assets/values-09-tool-enabled.yaml b/tutorials/assets/values-09-tool-enabled.yaml
new file mode 100644
index 000000000..e8390fedf
--- /dev/null
+++ b/tutorials/assets/values-09-tool-enabled.yaml
@@ -0,0 +1,48 @@
+servingEngineSpec:
+  runtimeClassName: ""
+  modelSpec:
+  - name: "llama3-8b"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Tool calling configuration
+    enableTool: true
+    toolCallParser: "llama3_json"  # Parser to use for tool calls (e.g., "llama3_json" for Llama models)
+    chatTemplate: "/templates/tool_chat_template_llama3.1_json.jinja"  # Full path to template file
+
+    # Mount Hugging Face credentials and vLLM configuration
+    env:
+      - name: HUGGING_FACE_HUB_TOKEN
+        valueFrom:
+          secretKeyRef:
+            name: huggingface-credentials
+            key: HUGGING_FACE_HUB_TOKEN
+      - name: VLLM_TEMPLATE_DIR
+        value: "/templates"
+
+    replicaCount: 1
+
+    # Resource requirements for Llama-3.1-8B-Instruct
+    # resources:
+    #   requests:
+    #     cpu: 8
+    #     memory: "32Gi"
+    #     nvidia.com/gpu: 1
+    #   limits:
+    #     cpu: 8
+    #     memory: "32Gi"
+    #     nvidia.com/gpu: 1
+    requestCPU: 8
+    requestMemory: "32Gi"
+    requestGPU: 1
+
+    # vLLM configuration
+    vllmConfig:
+      maxModelLen: 4096
+      dtype: "bfloat16"
+      tensorParallelSize: 1
+
+# Disable shared storage
+# sharedStorage:
+#   enabled: false