thankrandomness commited on
Commit
7cae346
·
1 Parent(s): 8735941

max embeddings length 512

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -20,11 +20,11 @@ model = AutoModel.from_pretrained("neuml/pubmedbert-base-embeddings-matryoshka")
20
 
21
  # Function to embed text using mean pooling
22
  def embed_text(text):
23
- inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
24
  with torch.no_grad():
25
  output = model(**inputs)
26
  embeddings = meanpooling(output, inputs['attention_mask'])
27
- return embeddings[:, :256].numpy()
28
 
29
  # Initialize ChromaDB client
30
  client = chromadb.Client()
 
20
 
21
  # Function to embed text using mean pooling
22
  def embed_text(text):
23
+ inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
24
  with torch.no_grad():
25
  output = model(**inputs)
26
  embeddings = meanpooling(output, inputs['attention_mask'])
27
+ return embeddings.numpy()
28
 
29
  # Initialize ChromaDB client
30
  client = chromadb.Client()