Spaces:
Running
Running
| from typing import Any | |
| from langflow.custom import Component | |
| from langflow.inputs.inputs import DictInput, DropdownInput, MessageTextInput, SecretStrInput | |
| from langflow.template.field.base import Output | |
| class AstraVectorizeComponent(Component): | |
| display_name: str = "Astra Vectorize [DEPRECATED]" | |
| description: str = ( | |
| "Configuration options for Astra Vectorize server-side embeddings. " | |
| "This component is deprecated. Please use the Astra DB Component directly." | |
| ) | |
| documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html" | |
| icon = "AstraDB" | |
| name = "AstraVectorize" | |
| VECTORIZE_PROVIDERS_MAPPING = { | |
| "Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], | |
| "Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]], | |
| "Hugging Face - Serverless": [ | |
| "huggingface", | |
| [ | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| "intfloat/multilingual-e5-large", | |
| "intfloat/multilingual-e5-large-instruct", | |
| "BAAI/bge-small-en-v1.5", | |
| "BAAI/bge-base-en-v1.5", | |
| "BAAI/bge-large-en-v1.5", | |
| ], | |
| ], | |
| "Jina AI": [ | |
| "jinaAI", | |
| [ | |
| "jina-embeddings-v2-base-en", | |
| "jina-embeddings-v2-base-de", | |
| "jina-embeddings-v2-base-es", | |
| "jina-embeddings-v2-base-code", | |
| "jina-embeddings-v2-base-zh", | |
| ], | |
| ], | |
| "Mistral AI": ["mistral", ["mistral-embed"]], | |
| "NVIDIA": ["nvidia", ["NV-Embed-QA"]], | |
| "OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], | |
| "Upstage": ["upstageAI", ["solar-embedding-1-large"]], | |
| "Voyage AI": [ | |
| "voyageAI", | |
| ["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"], | |
| ], | |
| } | |
| VECTORIZE_MODELS_STR = "\n\n".join( | |
| [provider + ": " + (", ".join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()] | |
| ) | |
| inputs = [ | |
| DropdownInput( | |
| name="provider", | |
| display_name="Provider", | |
| options=VECTORIZE_PROVIDERS_MAPPING.keys(), | |
| value="", | |
| required=True, | |
| ), | |
| MessageTextInput( | |
| name="model_name", | |
| display_name="Model Name", | |
| info="The embedding model to use for the selected provider. Each provider has a different set of models " | |
| f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", | |
| required=True, | |
| ), | |
| MessageTextInput( | |
| name="api_key_name", | |
| display_name="API Key name", | |
| info="The name of the embeddings provider API key stored on Astra. " | |
| "If set, it will override the 'ProviderKey' in the authentication parameters.", | |
| ), | |
| DictInput( | |
| name="authentication", | |
| display_name="Authentication parameters", | |
| is_list=True, | |
| advanced=True, | |
| ), | |
| SecretStrInput( | |
| name="provider_api_key", | |
| display_name="Provider API Key", | |
| info="An alternative to the Astra Authentication that passes an API key for the provider with each request " | |
| "to Astra DB. " | |
| "This may be used when Vectorize is configured for the collection, " | |
| "but no corresponding provider secret is stored within Astra's key management system.", | |
| advanced=True, | |
| ), | |
| DictInput( | |
| name="authentication", | |
| display_name="Authentication Parameters", | |
| is_list=True, | |
| advanced=True, | |
| ), | |
| DictInput( | |
| name="model_parameters", | |
| display_name="Model Parameters", | |
| advanced=True, | |
| is_list=True, | |
| ), | |
| ] | |
| outputs = [ | |
| Output(display_name="Vectorize", name="config", method="build_options", types=["dict"]), | |
| ] | |
| def build_options(self) -> dict[str, Any]: | |
| provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0] | |
| authentication = {**(self.authentication or {})} | |
| api_key_name = self.api_key_name | |
| if api_key_name: | |
| authentication["providerKey"] = api_key_name | |
| return { | |
| # must match astrapy.info.CollectionVectorServiceOptions | |
| "collection_vector_service_options": { | |
| "provider": provider_value, | |
| "modelName": self.model_name, | |
| "authentication": authentication, | |
| "parameters": self.model_parameters or {}, | |
| }, | |
| "collection_embedding_api_key": self.provider_api_key, | |
| } | |