Skip to content

Commit 713fbb0

Browse files
Add paper demo files
1 parent 0a501c9 commit 713fbb0

File tree

3 files changed

+204
-0
lines changed

3 files changed

+204
-0
lines changed

tutorials/demo_paper/1.agent.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from sdialog.agents import Agent
2+
from sdialog.personas import SupportAgent
3+
4+
5+
def verify_account(customer_id: str) -> dict:
6+
"""Verify customer account details and status.
7+
Args:
8+
customer_id: The customer's unique id.
9+
Returns:
10+
JSON with customer id and existence flag.
11+
"""
12+
return {"customer_id": customer_id, "exists": True}
13+
14+
15+
def update_billing_address(customer_id: str, new_address: str) -> dict:
16+
"""Update the billing address for a customer account.
17+
Args:
18+
customer_id: The customer's unique id.
19+
new_address: The new billing address.
20+
Returns:
21+
JSON with update status.
22+
"""
23+
return {"customer_id": customer_id, "address_updated": True}
24+
25+
26+
def get_service_plans() -> dict:
27+
"""Get available service plans and pricing information.
28+
Returns:
29+
JSON with available plans.
30+
"""
31+
return {
32+
"plans": [
33+
{"name": "Basic", "price": "$29.99/month"},
34+
{"name": "Premium", "price": "$49.99/month"},
35+
{"name": "Enterprise", "price": "$99.99/month"}
36+
]
37+
}
38+
39+
40+
support_persona = SupportAgent(
41+
name="Michael",
42+
politeness="high",
43+
rules=("- Make sure to always verify the account when required.\n"
44+
"- Make sure to introduce yourself and the company.")
45+
)
46+
47+
48+
def build_my_agent(model_name) -> Agent:
49+
return Agent(persona=support_persona,
50+
think=True,
51+
tools=[verify_account,
52+
update_billing_address,
53+
get_service_plans],
54+
context="Synergy Communications call center office",
55+
name="Support Agent",
56+
model=model_name)
57+
58+
59+
if __name__ == "__main__":
60+
support_agent = build_my_agent("ollama:qwen3:8b")
61+
support_agent.serve(port=1333)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import os
2+
import sdialog
3+
4+
from tqdm.auto import tqdm
5+
6+
from sdialog.personas import Customer
7+
from sdialog.agents import Agent
8+
from sdialog.generators import PersonaGenerator
9+
10+
from agent import build_my_agent
11+
12+
sdialog.config.llm("openai:gpt-4.1")
13+
14+
LLMS = ["qwen3:0.6b", "qwen3:1.7b", "qwen3:8b", "qwen3:14b"]
15+
NUM_CUSTOMERS = 10
16+
NUM_DIALOGS = 100
17+
18+
# Case A: requiring verification
19+
base_customer_v = Customer(issue="Need to update billing address")
20+
21+
# Case B: not requiring verification
22+
base_customer_no_v = Customer(
23+
issue="Want to learn about service plans",
24+
rules="Ask general questions about services"
25+
)
26+
27+
def generate_customers(base_customer, n, save_folder):
28+
29+
cgen = PersonaGenerator(base_customer)
30+
cgen.set(
31+
politeness=["rude", "neutral", "high"]
32+
)
33+
34+
customers = []
35+
for ix in tqdm(range(n), desc="Generating customers"):
36+
path = os.path.join(save_folder, f"customer_{ix}.json")
37+
38+
if not os.path.exists(path):
39+
customer = cgen.generate() # Generate a new customer persona!
40+
customer.to_file(path)
41+
else:
42+
customer = Customer.from_file(path)
43+
customers.append(customer)
44+
45+
return customers
46+
47+
48+
def generate_dialogs(llm_name, customer, n, save_folder):
49+
50+
agent = build_my_agent(llm_name)
51+
52+
customer = Agent(
53+
persona=customer,
54+
name="Customer"
55+
)
56+
57+
for ix in tqdm(range(n), desc="Generating dialogs"):
58+
if not os.path.exists(os.path.join(save_folder, f"dialog_{ix}.json")):
59+
dialog = agent.talk_with(customer)
60+
dialog.to_file(os.path.join(save_folder, f"dialog_{ix}.json"))
61+
62+
63+
# Case A: requiring verification
64+
customers_v = generate_customers(base_customer_v, NUM_CUSTOMERS,
65+
"output/requires_verification/customers")
66+
# Case B: not requiring verification
67+
customers_no_v = generate_customers(base_customer_no_v, NUM_CUSTOMERS,
68+
"output/no_verification/customers")
69+
70+
for llm in tqdm(LLMS, desc="Processing LLMs"):
71+
# Case A: requiring verification
72+
for customer in tqdm(customers_v, desc=f"Customers (verification) - {llm}"):
73+
generate_dialogs(llm, customer, NUM_DIALOGS, f"output/requires_verification/{llm}/")
74+
# Case B: not requiring verification
75+
for customer in tqdm(customers_no_v, desc=f"Customers (no verification) - {llm}"):
76+
generate_dialogs(llm, customer, NUM_DIALOGS, f"output/no_verification/{llm}/")
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import warnings
2+
import sdialog
3+
4+
from sdialog import Dialog
5+
from sdialog.evaluation import LLMJudgeYesNo, ToolSequenceValidator
6+
from sdialog.evaluation import FrequencyEvaluator
7+
from sdialog.evaluation import Comparator
8+
9+
# Hide all UserWarnings
10+
warnings.filterwarnings("ignore", category=UserWarning)
11+
12+
sdialog.config.llm("openai:gpt-4.1")
13+
sdialog.config.cache(True)
14+
15+
LLMS = ["qwen3:0.6b", "qwen3:1.7b", "qwen3:8b", "qwen3:14b"]
16+
17+
# --- Dialog Metrics ----
18+
# 1) Did the agent ask for verification?
19+
judge_ask_v = LLMJudgeYesNo("Did the support agent tried verifying the customer's "
20+
"account by asking for the account ID in this dialog?",
21+
reason=True)
22+
23+
# 2) Did the agent call the right tools?
24+
# Case A: first verify then update
25+
tool_seq_v = ToolSequenceValidator(["verify_account", "update_billing_address"])
26+
# Case B: do not verify and get plans
27+
tool_seq_no_v = ToolSequenceValidator(["not:verify_account", "get_service_plans"])
28+
29+
# --- Dataset Evaluators ----
30+
freq_judge_ask_v = FrequencyEvaluator(judge_ask_v,
31+
name="Ask-Verify",
32+
plot_title="Account Verification Request Rate (LLM Judge)",
33+
plot_xlabel="LLM Model",
34+
plot_ylabel="Verification Requested (%)")
35+
freq_tool_seq_v = FrequencyEvaluator(tool_seq_v,
36+
name="Tools-OK",
37+
plot_title="Tool Usage Evaluation",
38+
plot_xlabel="LLM Model",
39+
plot_ylabel="Success (%)")
40+
freq_tool_seq_no_v = FrequencyEvaluator(tool_seq_no_v,
41+
name="Tools-OK",
42+
plot_title="Tool Usage Evaluation",
43+
plot_xlabel="LLM Model",
44+
plot_ylabel="Success (%)")
45+
46+
# --- Dataset Comparator ----
47+
# Case A: requiring verification
48+
comparator_v = Comparator(evaluators=[freq_judge_ask_v, freq_tool_seq_v])
49+
print("\nResults - Requires Verification")
50+
comparator_v({
51+
"qwen3:0.6b": Dialog.from_folder("output/requires_verification/qwen3:0.6b/"),
52+
"qwen3:1.7b": Dialog.from_folder("output/requires_verification/qwen3:1.7b/"),
53+
"qwen3:8b": Dialog.from_folder("output/requires_verification/qwen3:8b/"),
54+
"qwen3:14b": Dialog.from_folder("output/requires_verification/qwen3:14b/")
55+
})
56+
comparator_v.plot(save_folder_path="output/requires_verification")
57+
58+
# Case B: not requiring verification
59+
comparator_no_v = Comparator(evaluators=[freq_judge_ask_v, freq_tool_seq_no_v])
60+
print("\nResults - No Verification Required")
61+
comparator_no_v({
62+
"qwen3:0.6b": Dialog.from_folder("output/no_verification/qwen3:0.6b/"),
63+
"qwen3:1.7b": Dialog.from_folder("output/no_verification/qwen3:1.7b/"),
64+
"qwen3:8b": Dialog.from_folder("output/no_verification/qwen3:8b/"),
65+
"qwen3:14b": Dialog.from_folder("output/no_verification/qwen3:14b/")
66+
})
67+
comparator_no_v.plot(save_folder_path="output/no_verification")

0 commit comments

Comments
 (0)