Spaces:
Sleeping
Sleeping
| import duckdb | |
| import polars as pl | |
| from datasets import load_dataset | |
| from model2vec import StaticModel | |
| # Load a model from the HuggingFace hub (in this case the potion-base-8M model) | |
| model_name = "minishlab/potion-base-8M" | |
| model = StaticModel.from_pretrained(model_name) | |
| # Make embeddings | |
| ds = load_dataset("fka/awesome-chatgpt-prompts") | |
| df = ds["train"].to_polars() | |
| embeddings = model.encode(df["act"]) | |
| df = df.with_columns(pl.Series(embeddings).alias("embeddings")) | |
| vector = model.encode("An Ethereum Developer", show_progress_bar=True) | |
| duckdb.sql( | |
| query=f""" | |
| SELECT * | |
| FROM df | |
| ORDER BY array_cosine_distance(embeddings, {vector.tolist()}::FLOAT[256]) | |
| LIMIT 10 | |
| """ | |
| ).show() | |