-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
118 lines (103 loc) · 4.45 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import requests
class ollama_chatter:
def __init__(self, host='http://localhost:11434'):
self.host = host
self.msg_history = []
self.headers = {
"Content-Type": "application/json"
}
def communicate(self, prompt, greedy=True, reset=True, max_tokens=4096):
f = codecs.open("conversation_history.txt", "a", "utf-8")
if reset:
self.msg_history = []
self.msg_history.append({"role": "user", "content": prompt})
f.write("=" * 10 + "\n")
for item in self.msg_history:
f.write(item['role'] + ":\n" + str(item['content']) + "\n")
# Configure the request data
data = {
"model": "qwen2.5-7b", # This can be made configurable
"messages": self.msg_history,
"stream": False,
"options": {
"num_predict": max_tokens, # This corresponds to max_tokens
"temperature": 0.001 if not greedy else 0,
}
}
# Make the API call
response = requests.post(f"{self.host}/api/chat", headers=self.headers, json=data)
# Extract the response content
# Note: Ollama's response format might need adjustment depending on the actual response structure
answer = json.loads(response.content.decode())['message']['content']
f.write("-" * 10 + "\n" + "response:\n")
f.write(answer + "\n")
f.close()
self.msg_history.append({"role": "assistant", "content": answer})
# Process special formats (JSON/YAML)
assistant_message = answer
# Extract JSON if present
if "```json" in assistant_message:
json_start = assistant_message.index("```json") + 7
json_end = assistant_message.rindex("```", json_start)
json_str = assistant_message[json_start:json_end].strip()
try:
assistant_message = json.loads(json_str)
except json.JSONDecodeError:
print("Warning: Failed to parse JSON. Returning original message.")
print("-=-=-=-=\nJsonstring:\n")
print(json_str)
print("-=-=-=-=")
# Extract YAML if present
elif "```yaml" in assistant_message or "```yml" in assistant_message:
yaml_start = assistant_message.index("```yaml") + 7 if "```yaml" in assistant_message else assistant_message.index("```yml") + 6
yaml_end = assistant_message.rindex("```", yaml_start)
yaml_str = assistant_message[yaml_start:yaml_end].strip()
try:
assistant_message = yaml.safe_load(yaml_str)
except yaml.YAMLError:
print("Warning: Failed to parse YAML. Returning original message.")
print("-=-=-=-=\nYamlstring:\n")
print(yaml_str)
print("-=-=-=-=")
return assistant_message
class llm_chatter:
def __init__(self,host='http://127.0.0.1:1234/v1/chat/completions'):
self.host = host
self.msg_history = []
self.headers = {
"Content-Type": "application/json"
}
def communicate(self,prompt,greedy=True,reset=False,max_tokens=2048,template="Llama-v3"):
if reset:
self.msg_history = []
self.msg_history.append({"role": "user", "content": prompt})
data = {
"mode": "instruct",
"max_tokens": max_tokens,
"instruction_template":template,
"messages": self.msg_history
}
if greedy:
data['temperature'] = 0
response = requests.post(self.host, headers=self.headers, json=data, verify=False)
answer = response.json()['choices'][0]['message']['content']
self.msg_history.append({"role": "assistant", "content": answer})
return answer
class llm_completion:
def __init__(self,host='http://127.0.0.1:5000/v1/completions'):
self.host = host
self.headers = {
"Content-Type": "application/json"
}
def complete(self,prompt,greedy=False,max_tokens=2048):
data = {
"max_tokens": max_tokens,
"prompt": prompt,
"temperature": 1,
"top_p": 0.9,
}
if greedy:
data['temperature'] = 0
response = requests.post(self.host, headers=self.headers, json=data, verify=False)
answer = response.json()['choices'][0]['text']
return answer