Merge pull request #65 from perplexityai/kesku/strip-thinking-tokens

kesku · web-flow · commit c82fcf000535 · 2025-11-09T00:54:21.000Z
diff --git a/README.md b/README.md
@@ -18,6 +18,11 @@ Deep, comprehensive research using the `sonar-deep-research` model. Ideal for th
 ### **perplexity_reason**
 Advanced reasoning and problem-solving using the `sonar-reasoning-pro` model. Perfect for complex analytical tasks.
 
+> [!TIP]
+> Available as an optional parameter for **perplexity_reason** and **perplexity_research**: `strip_thinking`
+>
+> Set to `true` to remove `<think>...</think>` tags from the response, saving context tokens. Default: `false`
+
 ## Configuration
 
 ### Get Your API Key
diff --git a/index.test.ts b/index.test.ts
@@ -626,4 +626,43 @@ describe("Perplexity MCP Server", () => {
       expect(formatted).not.toContain("12345");
     });
   });
+
+  describe("strip_thinking parameter", () => {
+    it("should strip thinking tokens when true and keep them when false", async () => {
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: "<think>This is my reasoning process</think>\n\nThe answer is 4.",
+            },
+          },
+        ],
+      };
+
+      // Test with stripThinking = true
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: async () => mockResponse,
+      } as Response);
+
+      const messages = [{ role: "user", content: "What is 2+2?" }];
+      const resultStripped = await performChatCompletion(messages, "sonar-reasoning-pro", true);
+
+      expect(resultStripped).not.toContain("<think>");
+      expect(resultStripped).not.toContain("</think>");
+      expect(resultStripped).not.toContain("This is my reasoning process");
+      expect(resultStripped).toContain("The answer is 4.");
+
+      // Test with stripThinking = false
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: async () => mockResponse,
+      } as Response);
+
+      const resultKept = await performChatCompletion(messages, "sonar-reasoning-pro", false);
+
+      expect(resultKept).toContain("<think>This is my reasoning process</think>");
+      expect(resultKept).toContain("The answer is 4.");
+    });
+  });
 });
diff --git a/index.ts b/index.ts
@@ -76,6 +76,10 @@ const PERPLEXITY_RESEARCH_TOOL: Tool = {
         },
         description: "Array of conversation messages",
       },
+      strip_thinking: {
+        type: "boolean",
+        description: "If true, removes <think>...</think> tags and their content from the response to save context tokens. Default is false.",
+      },
     },
     required: ["messages"],
   },
@@ -112,6 +116,10 @@ const PERPLEXITY_REASON_TOOL: Tool = {
         },
         description: "Array of conversation messages",
       },
+      strip_thinking: {
+        type: "boolean",
+        description: "If true, removes <think>...</think> tags and their content from the response to save context tokens. Default is false.",
+      },
     },
     required: ["messages"],
   },
@@ -188,18 +196,31 @@ function validateMessages(messages: any, toolName: string): void {
   }
 }
 
+/**
+ * Strips thinking tokens (content within <think>...</think> tags) from the response.
+ * This helps reduce context usage when the thinking process is not needed.
+ *
+ * @param {string} content - The content to process
+ * @returns {string} The content with thinking tokens removed
+ */
+function stripThinkingTokens(content: string): string {
+  return content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+}
+
 /**
  * Performs a chat completion by sending a request to the Perplexity API.
  * Appends citations to the returned message content if they exist.
  *
  * @param {Array<{ role: string; content: string }>} messages - An array of message objects.
  * @param {string} model - The model to use for the completion.
+ * @param {boolean} stripThinking - If true, removes <think>...</think> tags from the response.
  * @returns {Promise<string>} The chat completion result with appended citations.
  * @throws Will throw an error if the API request fails.
  */
 export async function performChatCompletion(
   messages: Array<{ role: string; content: string }>,
-  model: string = "sonar-pro"
+  model: string = "sonar-pro",
+  stripThinking: boolean = false
 ): Promise<string> {
   // Read timeout fresh each time to respect env var changes
   const TIMEOUT_MS = parseInt(process.env.PERPLEXITY_TIMEOUT_MS || "300000", 10);
@@ -271,6 +292,11 @@ export async function performChatCompletion(
   // Directly retrieve the main message content from the response
   let messageContent = firstChoice.message.content;
 
+  // Strip thinking tokens if requested
+  if (stripThinking) {
+    messageContent = stripThinkingTokens(messageContent);
+  }
+
   // If citations are provided, append them to the message content
   if (data.citations && Array.isArray(data.citations) && data.citations.length > 0) {
     messageContent += "\n\nCitations:\n";
@@ -433,7 +459,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case "perplexity_research": {
         validateMessages(args.messages, "perplexity_research");
         const messages = args.messages as Array<{ role: string; content: string }>;
-        const result = await performChatCompletion(messages, "sonar-deep-research");
+        const stripThinking = typeof args.strip_thinking === "boolean" ? args.strip_thinking : false;
+        const result = await performChatCompletion(messages, "sonar-deep-research", stripThinking);
         return {
           content: [{ type: "text", text: result }],
           isError: false,
@@ -442,7 +469,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case "perplexity_reason": {
         validateMessages(args.messages, "perplexity_reason");
         const messages = args.messages as Array<{ role: string; content: string }>;
-        const result = await performChatCompletion(messages, "sonar-reasoning-pro");
+        const stripThinking = typeof args.strip_thinking === "boolean" ? args.strip_thinking : false;
+        const result = await performChatCompletion(messages, "sonar-reasoning-pro", stripThinking);
         return {
           content: [{ type: "text", text: result }],
           isError: false,