| 123456789101112131415161718192021222324252627282930313233 |
- from typing import List
- import numpy as np
- from text2vec import SentenceModel
- import config.config
- MODEL_PATH = config.config.MODEL_PATH
- _EMBEDDING_MODEL = None
- def get_embedding_model():
- global _EMBEDDING_MODEL
- if _EMBEDDING_MODEL is None:
- _EMBEDDING_MODEL = SentenceModel(MODEL_PATH, device="cpu")
- return _EMBEDDING_MODEL
- def compute_embedding(text: str) -> List[float]:
- model = get_embedding_model()
- vector = model.encode(text or "")
- if hasattr(vector, "tolist"):
- return vector.tolist()
- return list(vector)
- def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
- if not vec_a or not vec_b:
- return 0.0
- a = np.array(vec_a, dtype=float)
- b = np.array(vec_b, dtype=float)
- denom = np.linalg.norm(a) * np.linalg.norm(b)
- if denom == 0:
- return 0.0
- return float(np.dot(a, b) / denom)
|