jina name
Browse files- configs/datasets.yaml +2 -2
- src/paper_manager.py +1 -1
- src/utils/hash.py +1 -1
- src/utils/paper_retriever.py +1 -1
configs/datasets.yaml
CHANGED
|
@@ -5,7 +5,7 @@ DEFAULT:
|
|
| 5 |
log_dir: ./log
|
| 6 |
# embedding: sentence-transformers/all-MiniLM-L6-v2
|
| 7 |
# embedding: BAAI/llm-embedder
|
| 8 |
-
embedding: jina-embeddings-v3
|
| 9 |
embedding_task: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
| 10 |
embedding_database: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
| 11 |
|
|
@@ -39,4 +39,4 @@ RETRIEVE:
|
|
| 39 |
s_summary: 0.0
|
| 40 |
s_abstract: 0.0
|
| 41 |
similarity_threshold: 0.95
|
| 42 |
-
# similarity_threshold: 0.55
|
|
|
|
| 5 |
log_dir: ./log
|
| 6 |
# embedding: sentence-transformers/all-MiniLM-L6-v2
|
| 7 |
# embedding: BAAI/llm-embedder
|
| 8 |
+
embedding: jinaai/jina-embeddings-v3
|
| 9 |
embedding_task: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
| 10 |
embedding_database: text-matching # ONLY FOR JINA_v3, retrieval.passage, text-matching, retrieval.query
|
| 11 |
|
|
|
|
| 39 |
s_summary: 0.0
|
| 40 |
s_abstract: 0.0
|
| 41 |
similarity_threshold: 0.95
|
| 42 |
+
# similarity_threshold: 0.55
|
src/paper_manager.py
CHANGED
|
@@ -664,7 +664,7 @@ class PaperManager:
|
|
| 664 |
postfix_set = {
|
| 665 |
"sentence-transformers/all-MiniLM-L6-v2": "",
|
| 666 |
"BAAI/llm-embedder": "_llm_embedder",
|
| 667 |
-
"jina-embeddings-v3": "_jina_v3"
|
| 668 |
}
|
| 669 |
postfix = postfix_set[self.config.DEFAULT.embedding]
|
| 670 |
if "jina" in postfix:
|
|
|
|
| 664 |
postfix_set = {
|
| 665 |
"sentence-transformers/all-MiniLM-L6-v2": "",
|
| 666 |
"BAAI/llm-embedder": "_llm_embedder",
|
| 667 |
+
"jina/jina-embeddings-v3": "_jina_v3"
|
| 668 |
}
|
| 669 |
postfix = postfix_set[self.config.DEFAULT.embedding]
|
| 670 |
if "jina" in postfix:
|
src/utils/hash.py
CHANGED
|
@@ -32,7 +32,7 @@ def check_embedding(repo_id):
|
|
| 32 |
"vocab.txt",
|
| 33 |
]
|
| 34 |
elif repo_id in [
|
| 35 |
-
"jina-embeddings-v3",
|
| 36 |
]:
|
| 37 |
files_to_download = [
|
| 38 |
"model.safetensors",
|
|
|
|
| 32 |
"vocab.txt",
|
| 33 |
]
|
| 34 |
elif repo_id in [
|
| 35 |
+
"jinaai/jina-embeddings-v3",
|
| 36 |
]:
|
| 37 |
files_to_download = [
|
| 38 |
"model.safetensors",
|
src/utils/paper_retriever.py
CHANGED
|
@@ -110,7 +110,7 @@ class Retriever(object):
|
|
| 110 |
self.embedding_postfix = ""
|
| 111 |
elif self.config.DEFAULT.embedding == "BAAI/llm-embedder":
|
| 112 |
self.embedding_postfix = "_llm_embedder"
|
| 113 |
-
elif self.config.DEFAULT.embedding == "jina-embeddings-v3":
|
| 114 |
self.embedding_postfix = "_jina_v3"
|
| 115 |
if self.config.DEFAULT.embedding_database == "text-matching":
|
| 116 |
self.embedding_postfix += "_text_matching"
|
|
|
|
| 110 |
self.embedding_postfix = ""
|
| 111 |
elif self.config.DEFAULT.embedding == "BAAI/llm-embedder":
|
| 112 |
self.embedding_postfix = "_llm_embedder"
|
| 113 |
+
elif self.config.DEFAULT.embedding == "jinaai/jina-embeddings-v3":
|
| 114 |
self.embedding_postfix = "_jina_v3"
|
| 115 |
if self.config.DEFAULT.embedding_database == "text-matching":
|
| 116 |
self.embedding_postfix += "_text_matching"
|