diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua index 454f67b13d44..1129900f666c 100644 --- a/apisix/cli/ngx_tpl.lua +++ b/apisix/cli/ngx_tpl.lua @@ -807,6 +807,7 @@ http { {% end %} set $llm_content_risk_level ''; + set $apisix_upstream_response_time $upstream_response_time; set $request_type 'traditional_http'; set $request_llm_model ''; diff --git a/apisix/core/ctx.lua b/apisix/core/ctx.lua index dcd50962ba58..9b80aa74ec25 100644 --- a/apisix/core/ctx.lua +++ b/apisix/core/ctx.lua @@ -236,6 +236,7 @@ do llm_content_risk_level = true, request_type = true, + apisix_upstream_response_time = true, llm_time_to_first_token = true, request_llm_model = true, llm_model = true, diff --git a/apisix/init.lua b/apisix/init.lua index d56fbc3e1860..020a85040ebc 100644 --- a/apisix/init.lua +++ b/apisix/init.lua @@ -957,6 +957,15 @@ end function _M.http_log_phase() + local api_ctx = ngx.ctx.api_ctx + if not api_ctx then + return + end + + if not api_ctx.var.apisix_upstream_response_time or + api_ctx.var.apisix_upstream_response_time == "" then + api_ctx.var.apisix_upstream_response_time = ngx.var.upstream_response_time + end local api_ctx = common_phase("log") if not api_ctx then return diff --git a/apisix/plugins/ai-drivers/openai-base.lua b/apisix/plugins/ai-drivers/openai-base.lua index d660de68d021..fcf956ba0ddf 100644 --- a/apisix/plugins/ai-drivers/openai-base.lua +++ b/apisix/plugins/ai-drivers/openai-base.lua @@ -90,6 +90,8 @@ local function read_response(ctx, res) local contents = {} while true do local chunk, err = body_reader() -- will read chunk by chunk + ctx.var.apisix_upstream_response_time = math.floor((ngx_now() - + ctx.llm_request_start_time) * 1000) if err then core.log.warn("failed to read response chunk: ", err) return handle_error(err) @@ -158,6 +160,7 @@ local function read_response(ctx, res) end ngx.status = res.status ctx.var.llm_time_to_first_token = math.floor((ngx_now() - ctx.llm_request_start_time) * 1000) + ctx.var.apisix_upstream_response_time = ctx.var.llm_time_to_first_token local res_body, err = core.json.decode(raw_res_body) if err then core.log.warn("invalid response body from ai service: ", raw_res_body, " err: ", err, diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua index 944d977f5552..4fc032e47f0e 100644 --- a/apisix/plugins/ai-proxy/base.lua +++ b/apisix/plugins/ai-proxy/base.lua @@ -32,6 +32,7 @@ function _M.set_logging(ctx, summaries, payloads) duration = ctx.var.llm_time_to_first_token, prompt_tokens = ctx.var.llm_prompt_tokens, completion_tokens = ctx.var.llm_completion_tokens, + upstream_response_time = ctx.var.apisix_upstream_response_time, } end if payloads then diff --git a/t/APISIX.pm b/t/APISIX.pm index 305044e4c382..1bf43b888796 100644 --- a/t/APISIX.pm +++ b/t/APISIX.pm @@ -674,7 +674,7 @@ _EOC_ require("apisix").http_exit_worker() } - log_format main escape=default '\$remote_addr - \$remote_user [\$time_local] \$http_host "\$request" \$status \$body_bytes_sent \$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr \$upstream_status \$upstream_response_time "\$upstream_scheme://\$upstream_host\$upstream_uri" \$request_llm_model \$llm_model \$llm_time_to_first_token \$llm_prompt_tokens \$llm_completion_tokens'; + log_format main escape=default '\$remote_addr - \$remote_user [\$time_local] \$http_host "\$request" \$status \$body_bytes_sent \$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr \$upstream_status \$apisix_upstream_response_time "\$upstream_scheme://\$upstream_host\$upstream_uri" \$request_llm_model \$llm_model \$llm_time_to_first_token \$llm_prompt_tokens \$llm_completion_tokens'; # fake server, only for test server { @@ -866,6 +866,7 @@ _EOC_ set \$llm_prompt_tokens '0'; set \$llm_completion_tokens '0'; + set \$apisix_upstream_response_time \$upstream_response_time; access_log $apisix_home/t/servroot/logs/access.log main; access_by_lua_block { diff --git a/t/core/ctx4.t b/t/core/ctx4.t new file mode 100644 index 000000000000..ffd2e55abc9c --- /dev/null +++ b/t/core/ctx4.t @@ -0,0 +1,97 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +use t::APISIX 'no_plan'; + +repeat_each(1); +log_level('warn'); +no_long_string(); +no_root_location(); + +add_block_preprocessor(sub { + my ($block) = @_; + + if (!$block->request) { + $block->set_value("request", "GET /t"); + } + + if (!$block->response_body) { + $block->set_value("response_body", "passed\n"); + } + + if (!$block->no_error_log && !$block->error_log) { + $block->set_value("no_error_log", "[error]\n[alert]"); + } +}); + +run_tests; + +__DATA__ + +=== TEST 1: set route with serverless function to verify apisix_upstream_response_time +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/hello", + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + }, + "plugins": { + "serverless-post-function": { + "phase": "log", + "functions": [ + "return function(conf, ctx) + local apisix_urt = ctx.var.apisix_upstream_response_time + local ngx_urt = ngx.var.upstream_response_time + if apisix_urt and ngx_urt and apisix_urt == ngx_urt then + ngx.log(ngx.WARN, 'SUCCESS: apisix_upstream_response_time matches') + else + ngx.log(ngx.ERR, 'ERROR: apisix_upstream_response_time mismatch. APISIX: ', tostring(apisix_urt), ' NGX: ', tostring(ngx_urt)) + end + end" + ] + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 2: verify apisix_upstream_response_time matches ngx.upstream_response_time +--- request +GET /hello +--- response_body +hello world +--- error_log +SUCCESS: apisix_upstream_response_time matches +--- no_error_log +ERROR: apisix_upstream_response_time mismatch diff --git a/t/plugin/ai-proxy3.t b/t/plugin/ai-proxy3.t index 4f13823718f8..2813e981edef 100644 --- a/t/plugin/ai-proxy3.t +++ b/t/plugin/ai-proxy3.t @@ -176,7 +176,7 @@ POST /anything --- response_body eval qr/.*completion_tokens.*/ --- access_log eval -qr/.*gpt-4 gpt-3.5-turbo \d+ 10 20.*/ +qr/.*[\d.]+ \"http:\/\/localhost\" gpt-4 gpt-3.5-turbo \d+ 10 20.*/ @@ -328,4 +328,4 @@ passed --- response_body_like eval qr/6data: \[DONE\]\n\n/ --- access_log eval -qr/.*gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/ +qr/.*[\d.]+ \"http:\/\/localhost:1984\" gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/