Add paper demo files

sergioburdisso · sergioburdisso · commit 713fbb0c76c9 · 2025-12-02T12:57:06.000+01:00
diff --git a/tutorials/demo_paper/1.agent.py b/tutorials/demo_paper/1.agent.py
@@ -0,0 +1,61 @@
+from sdialog.agents import Agent
+from sdialog.personas import SupportAgent
+
+
+def verify_account(customer_id: str) -> dict:
+    """Verify customer account details and status.
+    Args:
+        customer_id: The customer's unique id.
+    Returns:
+        JSON with customer id and existence flag.
+    """
+    return {"customer_id": customer_id, "exists": True}
+
+
+def update_billing_address(customer_id: str, new_address: str) -> dict:
+    """Update the billing address for a customer account.
+    Args:
+        customer_id: The customer's unique id.
+        new_address: The new billing address.
+    Returns:
+        JSON with update status.
+    """
+    return {"customer_id": customer_id, "address_updated": True}
+
+
+def get_service_plans() -> dict:
+    """Get available service plans and pricing information.
+    Returns:
+        JSON with available plans.
+    """
+    return {
+        "plans": [
+            {"name": "Basic", "price": "$29.99/month"},
+            {"name": "Premium", "price": "$49.99/month"},
+            {"name": "Enterprise", "price": "$99.99/month"}
+        ]
+    }
+
+
+support_persona = SupportAgent(
+    name="Michael",
+    politeness="high",
+    rules=("- Make sure to always verify the account when required.\n"
+           "- Make sure to introduce yourself and the company.")
+)
+
+
+def build_my_agent(model_name) -> Agent:
+    return Agent(persona=support_persona,
+                 think=True,
+                 tools=[verify_account,
+                        update_billing_address,
+                        get_service_plans],
+                 context="Synergy Communications call center office",
+                 name="Support Agent",
+                 model=model_name)
+
+
+if __name__ == "__main__":
+    support_agent = build_my_agent("ollama:qwen3:8b")
+    support_agent.serve(port=1333)
diff --git a/tutorials/demo_paper/2.dialog_generation.py b/tutorials/demo_paper/2.dialog_generation.py
@@ -0,0 +1,76 @@
+import os
+import sdialog
+
+from tqdm.auto import tqdm
+
+from sdialog.personas import Customer
+from sdialog.agents import Agent
+from sdialog.generators import PersonaGenerator
+
+from agent import build_my_agent
+
+sdialog.config.llm("openai:gpt-4.1")
+
+LLMS = ["qwen3:0.6b", "qwen3:1.7b", "qwen3:8b", "qwen3:14b"]
+NUM_CUSTOMERS = 10
+NUM_DIALOGS = 100
+
+# Case A: requiring verification
+base_customer_v = Customer(issue="Need to update billing address")
+
+# Case B: not requiring verification
+base_customer_no_v = Customer(
+    issue="Want to learn about service plans",
+    rules="Ask general questions about services"
+)
+
+def generate_customers(base_customer, n, save_folder):
+
+  cgen = PersonaGenerator(base_customer)
+  cgen.set(
+    politeness=["rude", "neutral", "high"]
+  )
+
+  customers = []
+  for ix in tqdm(range(n), desc="Generating customers"):
+    path = os.path.join(save_folder, f"customer_{ix}.json")
+
+    if not os.path.exists(path):
+        customer = cgen.generate()  # Generate a new customer persona!
+        customer.to_file(path)
+    else:
+        customer = Customer.from_file(path)
+    customers.append(customer)
+
+  return customers
+
+
+def generate_dialogs(llm_name, customer, n, save_folder):
+
+  agent = build_my_agent(llm_name)
+
+  customer = Agent(
+    persona=customer,
+    name="Customer"
+  )
+
+  for ix in tqdm(range(n), desc="Generating dialogs"):
+    if not os.path.exists(os.path.join(save_folder, f"dialog_{ix}.json")):
+        dialog = agent.talk_with(customer)
+        dialog.to_file(os.path.join(save_folder, f"dialog_{ix}.json"))
+
+
+# Case A: requiring verification
+customers_v = generate_customers(base_customer_v, NUM_CUSTOMERS,
+                                 "output/requires_verification/customers")
+# Case B: not requiring verification
+customers_no_v = generate_customers(base_customer_no_v, NUM_CUSTOMERS,
+                                    "output/no_verification/customers")
+
+for llm in tqdm(LLMS, desc="Processing LLMs"):
+  # Case A: requiring verification
+  for customer in tqdm(customers_v, desc=f"Customers (verification) - {llm}"):
+    generate_dialogs(llm, customer, NUM_DIALOGS, f"output/requires_verification/{llm}/")
+  # Case B: not requiring verification
+  for customer in tqdm(customers_no_v, desc=f"Customers (no verification) - {llm}"):
+    generate_dialogs(llm, customer, NUM_DIALOGS, f"output/no_verification/{llm}/")
diff --git a/tutorials/demo_paper/3.evaluation.py b/tutorials/demo_paper/3.evaluation.py
@@ -0,0 +1,67 @@
+import warnings
+import sdialog
+
+from sdialog import Dialog
+from sdialog.evaluation import LLMJudgeYesNo, ToolSequenceValidator
+from sdialog.evaluation import FrequencyEvaluator
+from sdialog.evaluation import Comparator
+
+# Hide all UserWarnings
+warnings.filterwarnings("ignore", category=UserWarning)
+
+sdialog.config.llm("openai:gpt-4.1")
+sdialog.config.cache(True)
+
+LLMS = ["qwen3:0.6b", "qwen3:1.7b", "qwen3:8b", "qwen3:14b"]
+
+# --- Dialog Metrics ----
+# 1) Did the agent ask for verification?
+judge_ask_v = LLMJudgeYesNo("Did the support agent tried verifying the customer's "
+                            "account by asking for the account ID in this dialog?",
+                            reason=True)
+
+# 2) Did the agent call the right tools?
+# Case A: first verify then update
+tool_seq_v = ToolSequenceValidator(["verify_account", "update_billing_address"])
+# Case B: do not verify and get plans
+tool_seq_no_v = ToolSequenceValidator(["not:verify_account", "get_service_plans"])
+
+# --- Dataset Evaluators ----
+freq_judge_ask_v = FrequencyEvaluator(judge_ask_v,
+                                      name="Ask-Verify",
+                                      plot_title="Account Verification Request Rate (LLM Judge)",
+                                      plot_xlabel="LLM Model",
+                                      plot_ylabel="Verification Requested (%)")
+freq_tool_seq_v = FrequencyEvaluator(tool_seq_v,
+                                     name="Tools-OK",
+                                     plot_title="Tool Usage Evaluation",
+                                     plot_xlabel="LLM Model",
+                                     plot_ylabel="Success (%)")
+freq_tool_seq_no_v = FrequencyEvaluator(tool_seq_no_v,
+                                        name="Tools-OK",
+                                        plot_title="Tool Usage Evaluation",
+                                        plot_xlabel="LLM Model",
+                                        plot_ylabel="Success (%)")
+
+# --- Dataset Comparator ----
+# Case A: requiring verification
+comparator_v = Comparator(evaluators=[freq_judge_ask_v, freq_tool_seq_v])
+print("\nResults - Requires Verification")
+comparator_v({
+  "qwen3:0.6b": Dialog.from_folder("output/requires_verification/qwen3:0.6b/"),
+  "qwen3:1.7b": Dialog.from_folder("output/requires_verification/qwen3:1.7b/"),
+  "qwen3:8b": Dialog.from_folder("output/requires_verification/qwen3:8b/"),
+  "qwen3:14b": Dialog.from_folder("output/requires_verification/qwen3:14b/")
+})
+comparator_v.plot(save_folder_path="output/requires_verification")
+
+# Case B: not requiring verification
+comparator_no_v = Comparator(evaluators=[freq_judge_ask_v, freq_tool_seq_no_v])
+print("\nResults - No Verification Required")
+comparator_no_v({
+  "qwen3:0.6b": Dialog.from_folder("output/no_verification/qwen3:0.6b/"),
+  "qwen3:1.7b": Dialog.from_folder("output/no_verification/qwen3:1.7b/"),
+  "qwen3:8b": Dialog.from_folder("output/no_verification/qwen3:8b/"),
+  "qwen3:14b": Dialog.from_folder("output/no_verification/qwen3:14b/")
+})
+comparator_no_v.plot(save_folder_path="output/no_verification")