Spaces:
Running
Running
File size: 10,493 Bytes
18a28ff 9f0a178 d30b281 f7cc9ad 8b05c02 7bcabe0 6cc39d6 d120873 e46dbee eaf1d7d 75255bc 9346da7 de649ff 75255bc d1936ad d120873 c1dac64 4e5cd58 eaec419 d3a5771 4e5cd58 eaec419 4e5cd58 53869cd 1d71a28 a696db8 c3a14f0 d3a5771 75255bc 4e5cd58 1d71a28 75255bc d120873 42c14b6 75255bc 698fb3f 75255bc d120873 eaf1d7d 564da3b 42c14b6 eaf1d7d 14f07c3 eaf1d7d 3f6a5ed eaf1d7d 14f07c3 9346da7 de649ff 7db8a97 2ee9294 14f07c3 4e95015 d43d990 dc27630 678e936 05b4630 0a92611 05b4630 0a92611 05b4630 0a92611 05b4630 0a92611 eec15ee f873413 eb10e5e 678e936 f873413 d43d990 0734240 ebbb8ba d43d990 05b4630 d43d990 c9781a7 67b893a d43d990 fd04a0b 05b4630 515b269 a06c4f8 b025c2f fd04a0b 3dddf92 0bb264b edfc533 3dddf92 d5c671b 0519180 7ebabfa 0a92611 05b4630 198b77a 0a92611 4c7bc2b 40d6045 d1936ad 98d0958 9797698 d1936ad 9797698 6cc39d6 4e95015 8b05c02 1d71a28 8b05c02 27ced65 c832cd2 53869cd f7f2bbc 8b05c02 a696db8 4e95015 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
from fastapi import FastAPI, Request, Query
import src.Paraphrase as Paraphrase
import src.Translate
from typing import Optional
from fastapi_mcp import FastApiMCP
from huggingface_hub import hf_hub_download, list_repo_files
from sentence_transformers import SentenceTransformer
app = FastAPI()
# app = FastAPI(docs_url="/docs")
MODELS = {'benro': 'BlackKakapo/opus-mt-en-ro',
'broen': 'BlackKakapo/opus-mt-ro-en',
'mttcbig': 'Helsinki-NLP/opus-mt-tc-big-en-ro',
'gemma': 'Gargaz/gemma-2b-romanian-better',
'mbartenro': 'ancebuc/mbart-translation-en-ro',
't5enro': 'ancebuc/t5-translation-en-ro',
'pegasus': 'ancebuc/pegasus-translation-en-ro',
'mbart': 'facebook/mbart-large-cc25',
'paraphrase': 'tuner007/pegasus_paraphrase'}
EMBEDDING_MODELS = {"all-MiniLM-L6-v2":384,
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2":384,
"sentence-transformers/distiluse-base-multilingual-cased-v2":512,
"sentence-transformers/stsb-xlm-r-multilingual":768,
"sentence-transformers/use-cmlm-multilingual":768,
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2":768}
EMBEDDING_MODEL = "sentence-transformers/distiluse-base-multilingual-cased-v2"
@app.get("/")
def index(request: Request):
from fastapi.responses import HTMLResponse
host_url = "https://" + request.url.netloc
mcp_config = '''{"mcpServers": {"fastapi-mcp": {"url": "https://tiberiucristianleon-fastapimt.hf.space/mcp"}}}'''
html_content = f'''
<html>
<head>
<title>FastAPI with MCP</title>
</head>
<body>
<h2>FastAPI URLS</h2>
<p><a href="{host_url}" target="_blank">Host URL:</a> {host_url}</p>
<p><a href="{host_url}/docs" target="_blank">DOCS</a></p>
<p><a href="{host_url}/redoc" target="_blank">REDOC</a></p>
<p><a href="{host_url}/openapi.json" target="_blank">openapi.json</a></p>
<p><a href="{host_url}/mcp" target="_blank">MCP</a></p>
<p>MCP configuration: {mcp_config}</a></p>
<p>MODELS: {list(MODELS.values())}"</p>
</body>
</html>
'''
return HTMLResponse(content=html_content)
# @app.get("/")
# async def get_host_url(request: Request):
# host_url = request.url.scheme + "s://" + request.url.netloc
# return {"host_url": host_url, 'endpoints': ['/paraphrase', '/translate', f'{host_url}/docs', f'{host_url}/redoc', f'{host_url}/openapi.json'], 'models': MODELS}
@app.get("/paraphrase", operation_id="get_paraphrase", description="Paraphrase text", tags=["paraphrase"], summary="Paraphrase text")
def paraphrase(text: str, model: str = MODELS['paraphrase']):
resultValue, exception = Paraphrase.paraphraseParaphraseMethod(text, model)
return {"input": text, "result": resultValue, "exception": exception}
@app.get("/listmodels", operation_id="list_models", description="List models", tags=["listmodels"], summary="List models")
def listmodels():
return {"MODELS": MODELS, "EMBEDDING_MODELS": EMBEDDING_MODELS}
# model: Optional[str] = MODELS['benro']
@app.get("/translate", operation_id="get_translate", description="Translate text", tags=["translate"], summary="Translate text")
def translate(input_text: str, model_name: str = MODELS['mttcbig'], sl: str = 'en', tl: str = 'ro'):
message = f'Translated from {sl} to {tl} with {model_name}'
if 'BlackKakapo' in model_name:
translation, model_name = src.Translate.paraphraseTranslateMethod(input_text, model_name)
elif 'Helsinki-NLP' in model_name:
translation, message = src.Translate.Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
# text2textgenerationpipe, translationpipe
# elif model_name == MODELS['mbartenro']:
# translation, message = src.Translate.Translators(model_name, sl, tl, input_text).text2textgenerationpipe()
elif model_name == MODELS['t5enro'] or model_name == MODELS['pegasus'] or model_name == MODELS['mbartenro']:
translation, message = src.Translate.Translators(model_name, sl, tl, input_text).translationpipe()
elif model_name == MODELS['mbart']:
translation, message = src.Translate.Translators(model_name, sl, tl, input_text).mbartlarge()
else:
translation: str = src.Translate.gemma_direct(input_text, model_name)
return {"input_text": input_text, "translation": translation, "model_name": model_name, "message": message}
# Keep track of installed (src, tgt) pairs
installed_pairs = set()
# https://tiberiucristianleon-fastapimt.hf.space/bergamot?input_text=das%20ist%20keine%20gute%20Frau&input_text=das%20ist%20eine%20gute%20Nachricht&sl=de&tl=en&model=bergamot
@app.get("/bergamot", operation_id="get_bergamot", description="Translate text with Bergamot", tags=["bergamot"], summary="Translate text with Bergamot")
def bergamot(input_text: list[str] = Query(description="Input string or list of strings"), sl: str = 'de', tl: str = 'en', model_name: Optional[str] = 'base/deen'):
"""
Translates the input text from the source language to the target language using a specified model.
Parameters:
input_text (str | list[str]): The source text to be translated, can be either a string or a list of strings
sl (str): The source language of the input text
tl (str): The target language into which the input text is translated
model_name (str): The selected translation model name
Returns:
dict:
input_text(str): The input text in the source language
translated_text(str): The input text translated into the selected target language
message_text(str): A descriptive message summarizing the translation process. Example: "Translated from English to German with base/ende."
Example:
>>> bergamot("Hello world", "en", "de", "base/ende")
{"input_text": "Hello world", "translated_text": "Hallo Welt", "message_text": "Translated from English to German with base/ende."}
"""
try:
import bergamot
# print(type(input_text))
# input_text = [input_text] if isinstance(input_text, str) else input_text
config = bergamot.ServiceConfig(numWorkers=4)
service = bergamot.Service(config)
repo_id="TiberiuCristianLeon/Bergamot"
branches = ['base', 'base-memory', 'tiny']
subfolder = f"{sl}{tl}"
localfolder = f"{subfolder}/{model_name}"
# List all files in the repo
all_files = list_repo_files(repo_id, repo_type='model')
print('input text type:', type(input_text), len(all_files), 'installed_pairs', installed_pairs, 'defaultlocalfolder', localfolder)
for branch in branches:
branch_files = [f for f in all_files if f.startswith(branch)]
fullmodel_files = [f for f in branch_files if f.startswith(model_name)]
print('branch_files', len(branch_files), 'fullmodel_files', fullmodel_files)
model_files = [f.split(f'{model_name}/')[1] for f in fullmodel_files]
print('branch_files', len(branch_files), 'model_files', model_files)
for file_path in model_files:
if localfolder not in installed_pairs:
# local_files_only (bool, optional, defaults to False) — If True, avoid downloading the file and return the path to the local cached file if it exists.
# dry_run (bool, optional, defaults to False) — If True, perform a dry run without actually downloading the file. Returns a DryRunFileInfo object containing information about what would be downloaded.
local_path = hf_hub_download(repo_id=repo_id, subfolder=model_name, filename=file_path, local_dir=subfolder)
print(f"Downloaded to: {local_path}") # Downloaded to: deen/base/deen/config.yml
# localfolder = local_path.rsplit('/', 1)[0]
installed_pairs.add(localfolder)
try:
dry_run = hf_hub_download(repo_id=repo_id, subfolder=model_name, filename='config.yml', local_dir=subfolder)
print('installed_pairs', installed_pairs, 'localfolder', localfolder, 'dry_run', dry_run)
except Exception as dryrunerror:
print('installed_pairs', installed_pairs, 'localfolder', localfolder, 'dry_run', dryrunerror)
model = service.modelFromConfigPath(f"{localfolder}/config.yml")
# model = service.modelFromConfig(localfolder)
options = bergamot.ResponseOptions(alignment=False, sentenceMappings=False, qualityScores=False, HTML=False)
rawresponse = service.translate(model, bergamot.VectorString(input_text), options)
response: list|str = [r.target.text for r in rawresponse] if len(rawresponse) > 1 else next(iter(rawresponse)).target.text
print(type(input_text), len(input_text), len(rawresponse), type(response), response)
# response = [r.target.text for r in model_response][0] if isinstance(response, bergamot._bergamot.VectorResponse) else next(iter(response)).target.text
# response is of type bergamot._bergamot.VectorResponse, an iterable of bergamot._bergamot.Response
message_text = f"Translated from {sl} to {tl} with {model_name}."
except Exception as generalerror:
response, message_text = str(generalerror), f"Error translating from {sl} to {tl} with {model_name}: {generalerror}."
print(generalerror)
return {"input": input_text, "translated_text": response, "message_text": message_text}
@app.get("/embed", operation_id="get_embeddings", description="Embed text", tags=["embed"], summary="Embed text")
def embed(text: str, model: str = EMBEDDING_MODEL):
model = SentenceTransformer(model)
embeddings = model.encode(text)
print(embeddings.shape, len(embeddings))
# similarities = model.similarity(embeddings, embeddings)
return {"input": text, "embeddings": embeddings.tolist(), "shape": embeddings.shape}
# Create an MCP server based on this app
mcp = FastApiMCP(
app,
name="Translate and paraphrase FASTAPI MCP",
description="MCP server to translate and paraphrase text",
describe_all_responses=True,
describe_full_response_schema=True,
include_operations=["get_translate", "get_paraphrase"],
include_tags=["paraphrase", "translate", "bergamot"]
)
# Mount the MCP server directly to the FASTAPI app
mcp.mount() |