parsers.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """Parsers used to convert raw LLM responses into structured payloads."""
  2. from __future__ import annotations
  3. import json
  4. from dataclasses import dataclass
  5. from typing import Any, Dict, Optional
  6. @dataclass
  7. class ParsedResult:
  8. """Normalized structure returned by every parser."""
  9. data: Any
  10. metadata: Dict[str, Any]
  11. class BaseParser:
  12. """Base class for all parsers."""
  13. name: str = "base"
  14. def parse(self, text: str, **_: Any) -> ParsedResult: # pragma: no cover - abstract
  15. raise NotImplementedError
  16. class JsonParser(BaseParser):
  17. """Parser that tries to coerce the response into JSON."""
  18. name = "json"
  19. def __init__(self, root_key: Optional[str] = None) -> None:
  20. self.root_key = root_key
  21. def parse(self, text: str, **_: Any) -> ParsedResult:
  22. content = text.strip()
  23. if content.startswith("```"):
  24. lines = content.splitlines()[1:-1]
  25. content = "\n".join(lines)
  26. if content.startswith("json"):
  27. content = content[4:].lstrip()
  28. payload = json.loads(content or "{}")
  29. if self.root_key and isinstance(payload, dict):
  30. payload = payload.get(self.root_key, payload)
  31. return ParsedResult(data=payload, metadata={"parser": self.name})
  32. class TextParser(BaseParser):
  33. """Return the verbatim text result."""
  34. name = "text"
  35. def parse(self, text: str, **_: Any) -> ParsedResult:
  36. return ParsedResult(data=text.strip(), metadata={"parser": self.name})
  37. PARSER_REGISTRY = {
  38. JsonParser.name: JsonParser,
  39. TextParser.name: TextParser,
  40. }