| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- """Parsers used to convert raw LLM responses into structured payloads."""
- from __future__ import annotations
- import json
- from dataclasses import dataclass
- from typing import Any, Dict, Optional
- @dataclass
- class ParsedResult:
- """Normalized structure returned by every parser."""
- data: Any
- metadata: Dict[str, Any]
- class BaseParser:
- """Base class for all parsers."""
- name: str = "base"
- def parse(self, text: str, **_: Any) -> ParsedResult: # pragma: no cover - abstract
- raise NotImplementedError
- class JsonParser(BaseParser):
- """Parser that tries to coerce the response into JSON."""
- name = "json"
- def __init__(self, root_key: Optional[str] = None) -> None:
- self.root_key = root_key
- def parse(self, text: str, **_: Any) -> ParsedResult:
- content = text.strip()
- if content.startswith("```"):
- lines = content.splitlines()[1:-1]
- content = "\n".join(lines)
- if content.startswith("json"):
- content = content[4:].lstrip()
- payload = json.loads(content or "{}")
- if self.root_key and isinstance(payload, dict):
- payload = payload.get(self.root_key, payload)
- return ParsedResult(data=payload, metadata={"parser": self.name})
- class TextParser(BaseParser):
- """Return the verbatim text result."""
- name = "text"
- def parse(self, text: str, **_: Any) -> ParsedResult:
- return ParsedResult(data=text.strip(), metadata={"parser": self.name})
- PARSER_REGISTRY = {
- JsonParser.name: JsonParser,
- TextParser.name: TextParser,
- }
|