Skip to content

Commit 5b8932c

Browse files
Improve JSON serialization with UTF-8 support (#114)
- Added `ensure_ascii` parameter to handle non-ASCII characters - Use UTF-8 JSON in prompts to reduce token count and improve readability for the LLM
1 parent ee76b04 commit 5b8932c

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

src/sdialog/__init__.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,9 @@ def description(self, turn_template: str = None):
451451

452452
def prompt(self) -> str:
453453
"""Generates a prompt string for the entire dialogue."""
454-
return json.dumps(self.json()["turns"], indent=2)
454+
return json.dumps(self.json()["turns"], indent=2, ensure_ascii=False)
455455

456-
def json(self, string: bool = False, indent: int = 2,ensure_ascii: bool = False):
456+
def json(self, string: bool = False, indent: int = 2, ensure_ascii: bool = False):
457457
"""
458458
Serializes the dialogue to JSON.
459459
@@ -483,7 +483,9 @@ def print(self, *a, **kw):
483483
"""
484484
_print_dialog(self, *a, **kw)
485485

486-
def to_file(self, path: str = None, type: str = "auto", makedir: bool = True, overwrite: bool = True, human_readable: bool = False):
486+
def to_file(self, path: str = None, type: str = "auto",
487+
makedir: bool = True, overwrite: bool = True,
488+
ensure_ascii: bool = False):
487489
"""
488490
Saves the dialogue to a file in JSON, CSV, or plain text format.
489491
@@ -495,8 +497,8 @@ def to_file(self, path: str = None, type: str = "auto", makedir: bool = True, ov
495497
:type makedir: bool
496498
:param overwrite: If False and the file exists, raise FileExistsError instead of overwriting.
497499
:type overwrite: bool
498-
:param human_readable: If True and type is "json", pretty-print the JSON output.
499-
:type human_readable: bool
500+
:param ensure_ascii: If True and type is "json", escape non-ASCII characters in the output.
501+
:type ensure_ascii: bool
500502
"""
501503
if not path:
502504
if self._path:
@@ -518,7 +520,7 @@ def to_file(self, path: str = None, type: str = "auto", makedir: bool = True, ov
518520

519521
with open(path, "w", newline='', encoding='utf-8') as writer:
520522
if type == "json":
521-
writer.write(self.json(string=True, ensure_ascii=not human_readable))
523+
writer.write(self.json(string=True, ensure_ascii=ensure_ascii))
522524
elif type in ["csv", "tsv"]:
523525
# set delimiter based on desired type
524526
delimiter = {"csv": ",", "tsv": "\t"}[type]

0 commit comments

Comments
 (0)