FastAPIMT

Running

File size: 10,493 Bytes

18a28ff
9f0a178
d30b281
f7cc9ad
8b05c02
7bcabe0
6cc39d6
d120873
e46dbee
 
eaf1d7d
 
 
75255bc
9346da7
de649ff
 
 
75255bc
d1936ad
 
 
 
 
 
 
d120873
c1dac64
4e5cd58
 
eaec419
d3a5771
4e5cd58
 
 
eaec419
4e5cd58
 
53869cd
1d71a28
a696db8
 
c3a14f0
 
d3a5771
75255bc
4e5cd58
 
 
 
 
 
 
1d71a28
75255bc
d120873
42c14b6
75255bc
698fb3f
75255bc
d120873
eaf1d7d
 
 
 
564da3b
42c14b6
eaf1d7d
14f07c3
eaf1d7d
3f6a5ed
eaf1d7d
14f07c3
9346da7
de649ff
 
 
 
 
 
7db8a97
2ee9294
14f07c3
4e95015
d43d990
 
 
dc27630
678e936
05b4630
0a92611
 
 
05b4630
0a92611
 
 
 
 
 
05b4630
 
0a92611
 
05b4630
 
0a92611
eec15ee
f873413
eb10e5e
 
678e936
f873413
d43d990
 
0734240
ebbb8ba
 
d43d990
05b4630
d43d990
 
c9781a7
 
 
67b893a
d43d990
fd04a0b
05b4630
 
515b269
a06c4f8
b025c2f
fd04a0b
3dddf92
 
 
 
 
0bb264b
 
edfc533
3dddf92
d5c671b
 
0519180
7ebabfa
0a92611
05b4630
 
198b77a
0a92611
4c7bc2b
40d6045
d1936ad
98d0958
9797698
d1936ad
 
9797698
6cc39d6
4e95015
8b05c02
 
1d71a28
8b05c02
27ced65
c832cd2
53869cd
f7f2bbc
8b05c02
a696db8
4e95015

from fastapi import FastAPI, Request, Query
import src.Paraphrase as Paraphrase
import src.Translate
from typing import Optional
from fastapi_mcp import FastApiMCP
from huggingface_hub import hf_hub_download, list_repo_files
from sentence_transformers import SentenceTransformer

app = FastAPI()
# app = FastAPI(docs_url="/docs")
MODELS = {'benro': 'BlackKakapo/opus-mt-en-ro',
          'broen': 'BlackKakapo/opus-mt-ro-en',
          'mttcbig': 'Helsinki-NLP/opus-mt-tc-big-en-ro',
          'gemma': 'Gargaz/gemma-2b-romanian-better',
          'mbartenro': 'ancebuc/mbart-translation-en-ro',
          't5enro': 'ancebuc/t5-translation-en-ro',
          'pegasus': 'ancebuc/pegasus-translation-en-ro',
          'mbart': 'facebook/mbart-large-cc25',
          'paraphrase': 'tuner007/pegasus_paraphrase'}
EMBEDDING_MODELS = {"all-MiniLM-L6-v2":384,
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2":384,
"sentence-transformers/distiluse-base-multilingual-cased-v2":512,
"sentence-transformers/stsb-xlm-r-multilingual":768,
"sentence-transformers/use-cmlm-multilingual":768,
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2":768}
EMBEDDING_MODEL = "sentence-transformers/distiluse-base-multilingual-cased-v2"

@app.get("/")
def index(request: Request):
    from fastapi.responses import HTMLResponse
    host_url = "https://" + request.url.netloc
    mcp_config = '''{"mcpServers": {"fastapi-mcp": {"url": "https://tiberiucristianleon-fastapimt.hf.space/mcp"}}}'''
    html_content = f'''
        <html>
            <head>
                <title>FastAPI with MCP</title>
            </head>
            <body>
                <h2>FastAPI URLS</h2>
                <p><a href="{host_url}" target="_blank">Host URL:</a> {host_url}</p>
                <p><a href="{host_url}/docs" target="_blank">DOCS</a></p>
                <p><a href="{host_url}/redoc" target="_blank">REDOC</a></p>
                <p><a href="{host_url}/openapi.json" target="_blank">openapi.json</a></p>
                <p><a href="{host_url}/mcp" target="_blank">MCP</a></p>
                <p>MCP configuration: {mcp_config}</a></p>
                <p>MODELS: {list(MODELS.values())}"</p>
            </body>
        </html>
        '''
    return HTMLResponse(content=html_content)

# @app.get("/")
# async def get_host_url(request: Request):
#     host_url = request.url.scheme + "s://" + request.url.netloc
#     return {"host_url": host_url, 'endpoints': ['/paraphrase', '/translate', f'{host_url}/docs', f'{host_url}/redoc', f'{host_url}/openapi.json'], 'models': MODELS}

@app.get("/paraphrase", operation_id="get_paraphrase", description="Paraphrase text", tags=["paraphrase"], summary="Paraphrase text")
def paraphrase(text: str, model: str  = MODELS['paraphrase']):
    resultValue, exception = Paraphrase.paraphraseParaphraseMethod(text, model)
    return {"input": text, "result": resultValue, "exception": exception}

@app.get("/listmodels", operation_id="list_models", description="List models", tags=["listmodels"], summary="List models")
def listmodels():
    return {"MODELS": MODELS, "EMBEDDING_MODELS": EMBEDDING_MODELS}

# model: Optional[str] = MODELS['benro']
@app.get("/translate", operation_id="get_translate", description="Translate text", tags=["translate"], summary="Translate text")
def translate(input_text: str, model_name: str = MODELS['mttcbig'], sl: str = 'en', tl: str = 'ro'):
    message = f'Translated from {sl} to {tl} with {model_name}'
    if 'BlackKakapo' in model_name:
        translation, model_name = src.Translate.paraphraseTranslateMethod(input_text, model_name)
    elif 'Helsinki-NLP' in model_name:
        translation, message = src.Translate.Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
     # text2textgenerationpipe, translationpipe
    # elif model_name == MODELS['mbartenro']:
    #     translation, message = src.Translate.Translators(model_name, sl, tl, input_text).text2textgenerationpipe()
    elif model_name == MODELS['t5enro'] or model_name == MODELS['pegasus'] or model_name == MODELS['mbartenro']:
        translation, message = src.Translate.Translators(model_name, sl, tl, input_text).translationpipe()
    elif model_name == MODELS['mbart']:
        translation, message = src.Translate.Translators(model_name, sl, tl, input_text).mbartlarge()
    else:
        translation: str = src.Translate.gemma_direct(input_text, model_name)
    return {"input_text": input_text, "translation": translation, "model_name": model_name, "message": message}

# Keep track of installed (src, tgt) pairs
installed_pairs = set()

# https://tiberiucristianleon-fastapimt.hf.space/bergamot?input_text=das%20ist%20keine%20gute%20Frau&input_text=das%20ist%20eine%20gute%20Nachricht&sl=de&tl=en&model=bergamot
@app.get("/bergamot", operation_id="get_bergamot", description="Translate text with Bergamot", tags=["bergamot"], summary="Translate text with Bergamot")
def bergamot(input_text: list[str] = Query(description="Input string or list of strings"), sl: str = 'de', tl: str = 'en', model_name: Optional[str] = 'base/deen'):
    """
    Translates the input text from the source language to the target language using a specified model.
    Parameters:
        input_text (str | list[str]): The source text to be translated, can be either a string or a list of strings
        sl (str): The source language of the input text
        tl (str): The target language into which the input text is translated
        model_name (str): The selected translation model name
    Returns:
        dict:
            input_text(str): The input text in the source language
            translated_text(str): The input text translated into the selected target language
            message_text(str):  A descriptive message summarizing the translation process. Example: "Translated from English to German with base/ende."
    
    Example:
        >>> bergamot("Hello world", "en", "de", "base/ende")
        {"input_text": "Hello world", "translated_text": "Hallo Welt", "message_text": "Translated from English to German with base/ende."}
    """
    try:
        import bergamot
        # print(type(input_text))
        # input_text = [input_text] if isinstance(input_text, str) else input_text           
        config = bergamot.ServiceConfig(numWorkers=4)
        service = bergamot.Service(config)
        repo_id="TiberiuCristianLeon/Bergamot"
        branches = ['base', 'base-memory', 'tiny']
        subfolder = f"{sl}{tl}"
        localfolder = f"{subfolder}/{model_name}"
        # List all files in the repo
        all_files = list_repo_files(repo_id, repo_type='model')
        print('input text type:', type(input_text), len(all_files), 'installed_pairs', installed_pairs, 'defaultlocalfolder', localfolder)
        for branch in branches:
            branch_files = [f for f in all_files if f.startswith(branch)] 
            fullmodel_files = [f for f in branch_files if f.startswith(model_name)]
            print('branch_files', len(branch_files), 'fullmodel_files', fullmodel_files)
            model_files = [f.split(f'{model_name}/')[1] for f in fullmodel_files]
            print('branch_files', len(branch_files), 'model_files', model_files)
            for file_path in model_files:
                if localfolder not in installed_pairs:
                    # local_files_only (bool, optional, defaults to False) — If True, avoid downloading the file and return the path to the local cached file if it exists.
                    # dry_run (bool, optional, defaults to False) — If True, perform a dry run without actually downloading the file. Returns a DryRunFileInfo object containing information about what would be downloaded.
                    local_path = hf_hub_download(repo_id=repo_id, subfolder=model_name, filename=file_path, local_dir=subfolder)
                    print(f"Downloaded to: {local_path}") # Downloaded to: deen/base/deen/config.yml
                    # localfolder = local_path.rsplit('/', 1)[0]
        installed_pairs.add(localfolder)
        try:
            dry_run = hf_hub_download(repo_id=repo_id, subfolder=model_name, filename='config.yml', local_dir=subfolder)
            print('installed_pairs', installed_pairs, 'localfolder', localfolder, 'dry_run', dry_run)
        except Exception as dryrunerror:
            print('installed_pairs', installed_pairs, 'localfolder', localfolder, 'dry_run', dryrunerror)
        model = service.modelFromConfigPath(f"{localfolder}/config.yml")
        # model = service.modelFromConfig(localfolder)
        options = bergamot.ResponseOptions(alignment=False, sentenceMappings=False, qualityScores=False, HTML=False)
        rawresponse = service.translate(model, bergamot.VectorString(input_text), options)
        response: list|str = [r.target.text for r in rawresponse] if len(rawresponse) > 1 else next(iter(rawresponse)).target.text
        print(type(input_text), len(input_text), len(rawresponse), type(response), response)
        # response = [r.target.text for r in model_response][0] if isinstance(response, bergamot._bergamot.VectorResponse) else next(iter(response)).target.text
        # response is of type bergamot._bergamot.VectorResponse, an iterable of bergamot._bergamot.Response
        message_text = f"Translated from {sl} to {tl} with {model_name}."
    except Exception as generalerror:
        response, message_text = str(generalerror), f"Error translating from {sl} to {tl} with {model_name}: {generalerror}."
        print(generalerror)
    return {"input": input_text, "translated_text": response, "message_text": message_text}

@app.get("/embed", operation_id="get_embeddings", description="Embed text", tags=["embed"], summary="Embed text")
def embed(text: str, model: str  = EMBEDDING_MODEL):
    model = SentenceTransformer(model)
    embeddings = model.encode(text)
    print(embeddings.shape, len(embeddings))
    # similarities = model.similarity(embeddings, embeddings)
    return {"input": text, "embeddings": embeddings.tolist(), "shape": embeddings.shape}

# Create an MCP server based on this app
mcp = FastApiMCP(
    app,
    name="Translate and paraphrase FASTAPI MCP",
    description="MCP server to translate and paraphrase text",
    describe_all_responses=True,
    describe_full_response_schema=True,
    include_operations=["get_translate", "get_paraphrase"],
    include_tags=["paraphrase", "translate", "bergamot"]
)
# Mount the MCP server directly to the FASTAPI app
mcp.mount()