-
Notifications
You must be signed in to change notification settings - Fork 617
/
Copy pathclient.py
87 lines (74 loc) · 3.01 KB
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from openai import OpenAI
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def get_infer_request():
messages = [{'role': 'user', 'content': "How's the weather in Beijing today?"}]
tools = [{
'name': 'get_current_weather',
'description': 'Get the current weather in a given location',
'parameters': {
'type': 'object',
'properties': {
'location': {
'type': 'string',
'description': 'The city and state, e.g. San Francisco, CA'
},
'unit': {
'type': 'string',
'enum': ['celsius', 'fahrenheit']
}
},
'required': ['location']
}
}]
return messages, tools
def infer(client, model: str, messages, tools):
messages = messages.copy()
query = messages[0]['content']
resp = client.chat.completions.create(model=model, messages=messages, tools=tools, max_tokens=512, temperature=0)
response = resp.choices[0].message.content
print(f'query: {query}')
print(f'response: {response}')
print(f'tool_calls: {resp.choices[0].message.tool_calls}')
tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
print(f'tool_response: {tool}')
messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
resp = client.chat.completions.create(model=model, messages=messages, tools=tools, max_tokens=512, temperature=0)
response2 = resp.choices[0].message.content
print(f'response2: {response2}')
# streaming
def infer_stream(client, model: str, messages, tools):
messages = messages.copy()
query = messages[0]['content']
gen = client.chat.completions.create(
model=model, messages=messages, tools=tools, max_tokens=512, temperature=0, stream=True)
response = ''
print(f'query: {query}\nresponse: ', end='')
for chunk in gen:
delta = chunk.choices[0].delta.content
response += delta
print(delta, end='', flush=True)
print()
print(f'tool_calls: {chunk.choices[0].delta.tool_calls}')
tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}'
print(f'tool_response: {tool}')
messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}]
gen = client.chat.completions.create(
model=model, messages=messages, tools=tools, max_tokens=512, temperature=0, stream=True)
print(f'query: {query}\nresponse2: ', end='')
for chunk in gen:
print(chunk.choices[0].delta.content, end='', flush=True)
print()
if __name__ == '__main__':
host: str = '127.0.0.1'
port: int = 8000
client = OpenAI(
api_key='EMPTY',
base_url=f'http://{host}:{port}/v1',
)
model = client.models.list().data[0].id
print(f'model: {model}')
messages, tools = get_infer_request()
infer(client, model, messages, tools)
infer_stream(client, model, messages, tools)