field_similarity.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. def _longest_common_substring_length(a, b):
  2. if not a or not b:
  3. return 0
  4. prev = [0] * (len(b) + 1)
  5. max_len = 0
  6. for i in range(1, len(a) + 1):
  7. curr = [0] * (len(b) + 1)
  8. for j in range(1, len(b) + 1):
  9. if a[i - 1] == b[j - 1]:
  10. curr[j] = prev[j - 1] + 1
  11. if curr[j] > max_len:
  12. max_len = curr[j]
  13. prev = curr
  14. return max_len
  15. def analyze_fields(question, field_list):
  16. if question is None or field_list is None:
  17. return {}
  18. fields = field_list.split("@@")
  19. ratios = []
  20. field_count = 0
  21. for field in fields:
  22. if not field:
  23. continue
  24. field_count += 1
  25. lcs_len = _longest_common_substring_length(question, field)
  26. ratios.append(lcs_len / len(field))
  27. if field_count == 0:
  28. return {}
  29. non_zero = sum(1 for r in ratios if r != 0)
  30. avg = sum(ratios) / field_count
  31. max_ratio = max(ratios) if ratios else 0
  32. return {
  33. "field_count": field_count,
  34. "non_zero_count": non_zero,
  35. "avg_ratio": avg,
  36. "max_ratio": max_ratio,
  37. }