|
7 | 7 | # pyright: reportImportCycles=false |
8 | 8 |
|
9 | 9 | from pathlib import Path |
10 | | -from typing import TYPE_CHECKING, ClassVar |
| 10 | +from typing import TYPE_CHECKING, ClassVar, cast |
11 | 11 | from typing_extensions import override |
12 | 12 |
|
13 | | -from ._constants import MAX_RECORD_SIZE |
| 13 | +from ._constants import JSONLT_VERSION, MAX_RECORD_SIZE |
14 | 14 | from ._encoding import validate_no_surrogates |
15 | 15 | from ._exceptions import ( |
16 | 16 | ConflictError, |
|
20 | 20 | TransactionError, |
21 | 21 | ) |
22 | 22 | from ._filesystem import FileSystem, RealFileSystem |
23 | | -from ._header import serialize_header |
| 23 | +from ._header import Header, serialize_header |
24 | 24 | from ._json import serialize_json, utf8_byte_length |
25 | 25 | from ._keys import ( |
26 | 26 | Key, |
|
36 | 36 | from ._state import compute_logical_state |
37 | 37 |
|
38 | 38 | if TYPE_CHECKING: |
39 | | - from ._header import Header |
40 | | - from ._json import JSONObject |
| 39 | + from collections.abc import Iterable, Mapping |
| 40 | + |
| 41 | + from ._json import JSONObject, JSONValue |
41 | 42 | from ._transaction import Transaction |
42 | 43 |
|
43 | 44 | __all__ = ["Table"] |
@@ -144,6 +145,176 @@ def __init__( |
144 | 145 | # Initial load |
145 | 146 | self._load(key) |
146 | 147 |
|
| 148 | + @classmethod |
| 149 | + def from_records( # noqa: PLR0913 |
| 150 | + cls, |
| 151 | + path: "Path | str", |
| 152 | + records: "Mapping[str, object] | Iterable[Mapping[str, object]]", |
| 153 | + key: KeySpecifier, |
| 154 | + *, |
| 155 | + auto_reload: bool = True, |
| 156 | + lock_timeout: float | None = None, |
| 157 | + max_file_size: int | None = None, |
| 158 | + _fs: "FileSystem | None" = None, |
| 159 | + ) -> "Table": |
| 160 | + """Create a table from a list of records. |
| 161 | +
|
| 162 | + Creates a new file at the specified path with the given records. |
| 163 | + All records are validated before writing, and the file is written |
| 164 | + atomically. If any record is invalid, no file is written. |
| 165 | +
|
| 166 | + A header with the key specifier is always written, making the |
| 167 | + file self-describing. |
| 168 | +
|
| 169 | + Args: |
| 170 | + path: Path to create the JSONLT file at. |
| 171 | + records: A single record dict or iterable of record dicts. |
| 172 | + key: Key specifier for the table. |
| 173 | + auto_reload: If True (default), check for file changes before |
| 174 | + each read operation and reload if necessary. |
| 175 | + lock_timeout: Maximum seconds to wait for file lock on write |
| 176 | + operations. None means wait indefinitely. |
| 177 | + max_file_size: Maximum allowed file size in bytes when loading. |
| 178 | + If the file exceeds this limit, LimitError is raised. |
| 179 | + _fs: Internal filesystem abstraction for testing. Do not use. |
| 180 | +
|
| 181 | + Returns: |
| 182 | + A new Table instance backed by the created file. |
| 183 | +
|
| 184 | + Raises: |
| 185 | + InvalidKeyError: If any record is missing required key fields, |
| 186 | + has invalid key values, or contains $-prefixed fields. |
| 187 | + LimitError: If any key exceeds 1024 bytes or any record exceeds 1 MiB. |
| 188 | + FileError: If the file cannot be created. |
| 189 | +
|
| 190 | + Example: |
| 191 | + >>> table = Table.from_records( |
| 192 | + ... "users.jsonlt", |
| 193 | + ... [ |
| 194 | + ... {"id": "alice", "role": "admin"}, |
| 195 | + ... {"id": "bob", "role": "user"}, |
| 196 | + ... ], |
| 197 | + ... key="id", |
| 198 | + ... ) |
| 199 | + >>> table.count() |
| 200 | + 2 |
| 201 | + """ |
| 202 | + file_path = Path(path) if isinstance(path, str) else path |
| 203 | + fs = RealFileSystem() if _fs is None else _fs |
| 204 | + normalized_key = normalize_key_specifier(key) |
| 205 | + |
| 206 | + # Normalize records: single dict -> list |
| 207 | + if isinstance(records, dict): |
| 208 | + record_list = cast("list[Mapping[str, object]]", [records]) |
| 209 | + else: |
| 210 | + record_list = cast("list[Mapping[str, object]]", list(records)) |
| 211 | + |
| 212 | + # Build lines: header + validated records |
| 213 | + lines: list[str] = [ |
| 214 | + serialize_header(Header(version=JSONLT_VERSION, key=normalized_key)) |
| 215 | + ] |
| 216 | + |
| 217 | + for index, record in enumerate(record_list): |
| 218 | + try: |
| 219 | + record_value = cast("JSONValue", record) |
| 220 | + record_obj = cast("JSONObject", record) |
| 221 | + |
| 222 | + validate_no_surrogates(record_value) |
| 223 | + validate_record(record_obj, normalized_key) |
| 224 | + extracted_key = extract_key(record_obj, normalized_key) |
| 225 | + validate_key_length(extracted_key) |
| 226 | + |
| 227 | + serialized = serialize_json(record) |
| 228 | + if utf8_byte_length(serialized) > MAX_RECORD_SIZE: |
| 229 | + msg = f"record size exceeds maximum {MAX_RECORD_SIZE}" |
| 230 | + raise LimitError(msg) # noqa: TRY301 |
| 231 | + |
| 232 | + lines.append(serialized) |
| 233 | + except (InvalidKeyError, LimitError) as e: # noqa: PERF203 |
| 234 | + msg = f"record at index {index}: {e}" |
| 235 | + raise type(e)(msg) from e |
| 236 | + |
| 237 | + fs.ensure_parent_dir(file_path) |
| 238 | + fs.atomic_replace(file_path, lines) |
| 239 | + |
| 240 | + return cls( |
| 241 | + file_path, |
| 242 | + key=normalized_key, |
| 243 | + auto_reload=auto_reload, |
| 244 | + lock_timeout=lock_timeout, |
| 245 | + max_file_size=max_file_size, |
| 246 | + _fs=_fs, |
| 247 | + ) |
| 248 | + |
| 249 | + @classmethod |
| 250 | + def from_file( |
| 251 | + cls, |
| 252 | + path: "Path | str", |
| 253 | + key: "KeySpecifier | None" = None, |
| 254 | + *, |
| 255 | + auto_reload: bool = True, |
| 256 | + lock_timeout: float | None = None, |
| 257 | + max_file_size: int | None = None, |
| 258 | + _fs: "FileSystem | None" = None, |
| 259 | + ) -> "Table": |
| 260 | + """Load a table from an existing file. |
| 261 | +
|
| 262 | + Opens an existing JSONLT file. If the file has a header with a |
| 263 | + key specifier, uses that key. An explicit key parameter can be |
| 264 | + provided to override or when the file has no header. |
| 265 | +
|
| 266 | + This method is semantically equivalent to the Table constructor |
| 267 | + but explicitly indicates the intent to load an existing file |
| 268 | + (as opposed to potentially creating a new one). |
| 269 | +
|
| 270 | + Args: |
| 271 | + path: Path to the existing JSONLT file. |
| 272 | + key: Optional key specifier. If None, auto-detected from the |
| 273 | + file header. If provided, must match the header key (if any). |
| 274 | + auto_reload: If True (default), check for file changes before |
| 275 | + each read operation and reload if necessary. |
| 276 | + lock_timeout: Maximum seconds to wait for file lock on write |
| 277 | + operations. None means wait indefinitely. |
| 278 | + max_file_size: Maximum allowed file size in bytes when loading. |
| 279 | + If the file exceeds this limit, LimitError is raised. |
| 280 | + _fs: Internal filesystem abstraction for testing. Do not use. |
| 281 | +
|
| 282 | + Returns: |
| 283 | + A Table instance backed by the file. |
| 284 | +
|
| 285 | + Raises: |
| 286 | + FileError: If the file does not exist or cannot be read. |
| 287 | + InvalidKeyError: If no key specifier can be determined (file |
| 288 | + has no header and key not provided), or if the provided |
| 289 | + key doesn't match the header key. |
| 290 | + ParseError: If the file contains invalid content. |
| 291 | +
|
| 292 | + Example: |
| 293 | + >>> # File has header with key |
| 294 | + >>> table = Table.from_file("users.jsonlt") |
| 295 | + >>> table.key_specifier |
| 296 | + 'id' |
| 297 | +
|
| 298 | + >>> # File without header, provide key explicitly |
| 299 | + >>> table = Table.from_file("data.jsonlt", key="name") |
| 300 | + """ |
| 301 | + file_path = Path(path) if isinstance(path, str) else path |
| 302 | + fs = RealFileSystem() if _fs is None else _fs |
| 303 | + |
| 304 | + stats = fs.stat(file_path) |
| 305 | + if not stats.exists: |
| 306 | + msg = f"file not found: {file_path}" |
| 307 | + raise FileError(msg) |
| 308 | + |
| 309 | + return cls( |
| 310 | + file_path, |
| 311 | + key=key, |
| 312 | + auto_reload=auto_reload, |
| 313 | + lock_timeout=lock_timeout, |
| 314 | + max_file_size=max_file_size, |
| 315 | + _fs=_fs, |
| 316 | + ) |
| 317 | + |
147 | 318 | def _load(self, caller_key: "KeySpecifier | None" = None) -> None: |
148 | 319 | """Load or reload the table from disk. |
149 | 320 |
|
|
0 commit comments