From 3045f32a8a41b190aa6ebcb070f0043f76831305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Pozo?= Date: Wed, 24 Jul 2024 19:24:48 +0200 Subject: [PATCH 1/3] feat!: orjson instead of simplejson to load and save JSON objects --- lab/tools.py | 49 +++++++++++++++++++------------------------------ setup.py | 2 +- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/lab/tools.py b/lab/tools.py index 9cd7d2034..3b85d5650 100644 --- a/lab/tools.py +++ b/lab/tools.py @@ -13,19 +13,7 @@ import sys from pathlib import Path -# Use simplejson where it's available, because it is compatible (just separately -# maintained), puts no blanks at line endings and loads json much faster: -# json_dump: 44.41s, simplejson_dump: 45.90s -# json_load: 7.32s, simplejson_load: 2.92s -# We cannot use cjson or ujson for dumping, because the resulting files are -# hard to read for humans (cjson_dump: 5.78, ujson_dump: 2.44). Using ujson for -# loading might be feasible, but it would only result in a very small speed gain -# (ujson_load: 2.49). cjson loads even slower than simplejson (cjson_load: 3.28). -try: - import simplejson as json -except ImportError: - import json - +import orjson as json DEFAULT_ENCODING = "utf-8" @@ -260,19 +248,18 @@ def compute_log_score(success, value, lower_bound, upper_bound): class Properties(dict): - class _PropertiesEncoder(json.JSONEncoder): - def default(self, o): - if isinstance(o, Path): - return str(o) - else: - return super().default(o) + def default(self, o): + if isinstance(o, Path): + return str(o) + else: + return super().default(o) - JSON_ARGS = { - "cls": _PropertiesEncoder, - "indent": 2, - "separators": (",", ": "), - "sort_keys": True, - } + # JSON_ARGS = { + # "cls": _PropertiesEncoder, + # "indent": 2, + # "separators": (",", ": "), + # "sort_keys": True, + # } """Transparently handle properties files compressed with xz.""" @@ -289,14 +276,16 @@ def __init__(self, filename=None): dict.__init__(self) def __str__(self): - return json.dumps(self, **self.JSON_ARGS) + return json.dumps( + self, default=self.default, option=json.OPT_INDENT_2 | json.OPT_SORT_KEYS + ) def load(self, filename): path = Path(filename) open_func = lzma.open if path.suffix == ".xz" else open - with open_func(path) as f: + with open_func(path, "rb") as f: try: - self.update(json.load(f)) + self.update(json.loads(f.read())) except ValueError as e: logging.critical(f"JSON parse error in file '{path}': {e}") @@ -308,8 +297,8 @@ def write(self): assert self.path self.path.parent.mkdir(parents=True, exist_ok=True) open_func = lzma.open if self.path.suffix == ".xz" else open - with open_func(self.path, "w") as f: - json.dump(self, f, **self.JSON_ARGS) + with open_func(self.path, "wb") as f: + f.write(self.__str__()) class RunFilter: diff --git a/setup.py b/setup.py index 235158c44..44d04ee6e 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ ], install_requires=[ "matplotlib", # for scatter plots - "simplejson", # optional, speeds up reading properties files + "orjson", "txt2tags>=3.6", # for HTML and Latex reports ], python_requires=">=3.7", From 85ea338fa20bc679e1043c78bf8be4c74df29b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Pozo?= Date: Tue, 30 Jul 2024 10:59:49 +0200 Subject: [PATCH 2/3] fix: error due to using bytes instead of str in __str__ --- lab/tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lab/tools.py b/lab/tools.py index 3b85d5650..3c9042a4b 100644 --- a/lab/tools.py +++ b/lab/tools.py @@ -276,6 +276,9 @@ def __init__(self, filename=None): dict.__init__(self) def __str__(self): + return self.to_bytes().decode() + + def to_bytes(self): return json.dumps( self, default=self.default, option=json.OPT_INDENT_2 | json.OPT_SORT_KEYS ) @@ -298,7 +301,7 @@ def write(self): self.path.parent.mkdir(parents=True, exist_ok=True) open_func = lzma.open if self.path.suffix == ".xz" else open with open_func(self.path, "wb") as f: - f.write(self.__str__()) + f.write(self.to_bytes()) class RunFilter: From ece2e286101dfa0463d30ac78705fc3748786fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Pozo?= Date: Sat, 21 Dec 2024 19:35:17 +0100 Subject: [PATCH 3/3] refactor: orjson code style requested changes --- lab/tools.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/lab/tools.py b/lab/tools.py index 3c9042a4b..9493987cc 100644 --- a/lab/tools.py +++ b/lab/tools.py @@ -13,7 +13,7 @@ import sys from pathlib import Path -import orjson as json +import orjson DEFAULT_ENCODING = "utf-8" @@ -248,18 +248,11 @@ def compute_log_score(success, value, lower_bound, upper_bound): class Properties(dict): - def default(self, o): - if isinstance(o, Path): - return str(o) + def default(self, obj): + if isinstance(obj, Path): + return str(obj) else: - return super().default(o) - - # JSON_ARGS = { - # "cls": _PropertiesEncoder, - # "indent": 2, - # "separators": (",", ": "), - # "sort_keys": True, - # } + return obj """Transparently handle properties files compressed with xz.""" @@ -279,8 +272,10 @@ def __str__(self): return self.to_bytes().decode() def to_bytes(self): - return json.dumps( - self, default=self.default, option=json.OPT_INDENT_2 | json.OPT_SORT_KEYS + return orjson.dumps( + self, + default=self.default, + option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS, ) def load(self, filename): @@ -288,7 +283,7 @@ def load(self, filename): open_func = lzma.open if path.suffix == ".xz" else open with open_func(path, "rb") as f: try: - self.update(json.loads(f.read())) + self.update(orjson.loads(f.read())) except ValueError as e: logging.critical(f"JSON parse error in file '{path}': {e}")