-
Notifications
You must be signed in to change notification settings - Fork 282
/
Copy path17_embeddings.py
41 lines (32 loc) · 1.07 KB
/
17_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
Obtain data from https://sectors.app
Accompanying course material: https://sectors.app/bulletin/ai-search
"""
import numpy as np
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import OpenAIEmbeddings
embed = OpenAIEmbeddings()
input_text = "supertype financial statements?"
input_docs = ["financial_algoritma",
"annual_report_algoritma",
"financial_supertype",
"agm_supertype",
"egm_supertype",
"financial_hypergrowth",
"annual_report_hypergrowth",
"agm_sectors",
"financial_statements_template"
]
query = embed.embed_query(input_text)
docs = embed.embed_documents(input_docs)
def cosine_similarity(vec1, vec2):
dot_product = np.dot(vec1, vec2)
norm_vec1 = np.linalg.norm(vec1)
norm_vec2 = np.linalg.norm(vec2)
return dot_product / (norm_vec1 * norm_vec2)
similarities = [cosine_similarity(query, doc) for doc in docs]
for i, sim in enumerate(similarities):
print(f"Cosine Similarity with doc {i}: {sim}")
most_similar_index = np.argmax(similarities)
print(f"Document: {input_docs[most_similar_index]}")