embedding.py 884 B

123456789101112131415161718192021222324252627282930313233
  1. from typing import List
  2. import numpy as np
  3. from text2vec import SentenceModel
  4. import config.config
  5. MODEL_PATH = config.config.MODEL_PATH
  6. _EMBEDDING_MODEL = None
  7. def get_embedding_model():
  8. global _EMBEDDING_MODEL
  9. if _EMBEDDING_MODEL is None:
  10. _EMBEDDING_MODEL = SentenceModel(MODEL_PATH, device="cpu")
  11. return _EMBEDDING_MODEL
  12. def compute_embedding(text: str) -> List[float]:
  13. model = get_embedding_model()
  14. vector = model.encode(text or "")
  15. if hasattr(vector, "tolist"):
  16. return vector.tolist()
  17. return list(vector)
  18. def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
  19. if not vec_a or not vec_b:
  20. return 0.0
  21. a = np.array(vec_a, dtype=float)
  22. b = np.array(vec_b, dtype=float)
  23. denom = np.linalg.norm(a) * np.linalg.norm(b)
  24. if denom == 0:
  25. return 0.0
  26. return float(np.dot(a, b) / denom)