File size: 8,497 Bytes
918a36b
9b5b26a
 
 
c19d193
6aae614
8fe992b
9b5b26a
 
fa475b4
 
 
 
 
 
5df72d6
9b5b26a
3d1237b
9b5b26a
8fbbdda
9b5b26a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c01ffb
 
fa475b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aae614
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
 
9b5b26a
 
8c01ffb
861422e
 
9b5b26a
8c01ffb
8fe992b
8fbbdda
8c01ffb
 
 
 
 
 
861422e
8fe992b
 
9b5b26a
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI

import sys
import urllib.parse
import time
import re
import argparse

# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
    #Keep this format for the description / args / args description but feel free to modify the tool
    """A tool that does nothing yet [BLAST???]
    Args:
        arg1: the first argument
        arg2: the second argument
    """
    return "What magic will you build ?"

@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """A tool that fetches the current local time in a specified timezone.
    Args:
        timezone: A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        # Create timezone object
        tz = pytz.timezone(timezone)
        # Get current time in that timezone
        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"


@tool
def run_blast(arguments_string: str) -> str:
    """
    Submits a BLAST job to NCBI and retrieves the results.
    Parses arguments from a string input.  Now accepts a sequence directly, not a file.

    Args:
        arguments_string (str): String containing program, database, and query sequence.
                                 e.g., "blastp nr ATGCGTAGCTAGCTAG...".  Sequence should be directly in the string.

    Returns:
        str: BLAST results in text format on success, or an error message string on failure.
             Possible error messages include:
                "Error: Invalid arguments"
                "Error submitting request: {error_details}"
                "Error: Could not parse RID or RTOE from BLAST response."
                "Search {rid} failed; please report to blast-help@ncbi.nlm.nih.gov."
                "Search {rid} expired."
                "No hits found."
                "Unknown error during polling."
                "Error polling for results: {error_details}"
                "Error retrieving results: {error_details}"
    """

    parser = argparse.ArgumentParser(
        description="Submit and retrieve BLAST jobs from NCBI with a direct sequence input.",
        formatter_class=argparse.RawTextHelpFormatter # To keep help message formatting nice
    )
    parser.add_argument("program", help="BLAST program (e.g., megablast, blastn, blastp, rpsblast, blastx, tblastn, tblastx)")
    parser.add_argument("database", help="BLAST database name")
    parser.add_argument("query_sequence", nargs='+', help="Query sequence (directly input as string)")

    try:
        args = parser.parse_args(arguments_string.split()) # Parse arguments from the input string
    except SystemExit as e: # Catch argparse exit on error and return error string
        if e.code == 2: #  2 is the exit code for incorrect usage in argparse
            return "Error: Invalid arguments. Usage: program database query_sequence"
        else:
            return "Error: Argument parsing failed." # Unexpected argparse error

    program = args.program
    database = args.database
    query_sequence_list = args.query_sequence #  List of strings if sequence is split by spaces
    query_sequence = " ".join(query_sequence_list) # Rejoin if sequence was split by spaces in input string

    if program == "megablast":
        program = "blastn&MEGABLAST=on"
    elif program == "rpsblast":
        program = "blastp&SERVICE=rpsblast"

    encoded_query = urllib.parse.quote(query_sequence) # Encode the sequence directly

    # build the request
    api_url = 'https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi'
    payload = {
        'CMD': 'Put',
        'PROGRAM': program,
        'DATABASE': database,
        'QUERY': encoded_query
    }

    try:
        response = requests.post(api_url, data=payload)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
    except requests.exceptions.RequestException as e:
        return f"Error submitting request: {e}"

    response_content = response.text

    rid_match = re.search(r"RID = (.*)", response_content)
    rtoe_match = re.search(r"RTOE = (.*)", response_content)

    if rid_match and rtoe_match:
        rid = rid_match.group(1).strip()
        rtoe = int(rtoe_match.group(1).strip())
    else:
        return "Error: Could not parse RID or RTOE from BLAST response.\nResponse content:\n" + response_content

    time.sleep(rtoe)

    # poll for results
    while True:
        time.sleep(5)
        poll_url = f"{api_url}?CMD=Get&FORMAT_OBJECT=SearchInfo&RID={rid}"
        try:
            response = requests.get(poll_url)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            return f"Error polling for results: {e}"

        status_content = response.text

        if re.search(r"\s+Status=WAITING", status_content):
            # print(sys.stderr, "Searching...") # Optional: print to stderr if needed
            continue
        elif re.search(r"\s+Status=FAILED", status_content):
            return f"Search {rid} failed; please report to blast-help@ncbi.nlm.nih.gov."
        elif re.search(r"\s+Status=UNKNOWN", status_content):
            return f"Search {rid} expired."
        elif re.search(r"\s+Status=READY", status_content):
            if re.search(r"\s+ThereAreHits=yes", status_content):
                # print(sys.stderr, "Search complete, retrieving results...") # Optional: print to stderr if needed
                break
            else:
                return "No hits found."
        else:
            # if we get here, something unexpected happened.
            return "Unknown error during polling.\nStatus response content:\n" + status_content

    # retrieve and display results
    result_url = f"{api_url}?CMD=Get&FORMAT_TYPE=Text&RID={rid}"
    try:
        result_response = requests.get(result_url)
        result_response.raise_for_status()
    except requests.exceptions.RequestException as e:
        return f"Error retrieving results: {e}"

    return result_response.text

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage: python blast_tool.py program database query_sequence", file=sys.stderr)
        print("       query_sequence should be the sequence itself, not a file.", file=sys.stderr)
        sys.exit(1)

    arguments_string = " ".join(sys.argv[1:]) # Reconstruct arguments string from command line
    results = run_blast(arguments_string)

    if results.startswith("Error:"):
        print(results, file=sys.stderr) # Print errors to stderr
        if results == "Error: Invalid arguments. Usage: program database query_sequence":
            sys.exit(1)
        elif results == "No hits found.":
            sys.exit(2)
        elif results.startswith("Search ") and results.endswith("expired."):
            sys.exit(3)
        elif results.startswith("Search ") and results.endswith("failed; please report to blast-help@ncbi.nlm.nih.gov."):
            sys.exit(4)
        else: # Catch-all for other errors
            sys.exit(5)
    else:
        print(results)
        sys.exit(0)


final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)


# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[DuckDuckGoSearchTool(),final_answer], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()