jlgaralc commited on
Commit
fa475b4
·
verified ·
1 Parent(s): 8fbbdda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -0
app.py CHANGED
@@ -7,6 +7,12 @@ from tools.final_answer import FinalAnswerTool
7
 
8
  from Gradio_UI import GradioUI
9
 
 
 
 
 
 
 
10
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
11
  @tool
12
  def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
@@ -34,6 +40,151 @@ def get_current_time_in_timezone(timezone: str) -> str:
34
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  final_answer = FinalAnswerTool()
38
 
39
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
 
7
 
8
  from Gradio_UI import GradioUI
9
 
10
+ import sys
11
+ import urllib.parse
12
+ import time
13
+ import re
14
+ import argparse
15
+
16
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
17
  @tool
18
  def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
 
40
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
41
 
42
 
43
+ @tool
44
+ def run_blast(arguments_string: str) -> str:
45
+ """
46
+ Submits a BLAST job to NCBI and retrieves the results.
47
+ Parses arguments from a string input. Now accepts a sequence directly, not a file.
48
+
49
+ Args:
50
+ arguments_string (str): String containing program, database, and query sequence.
51
+ e.g., "blastp nr ATGCGTAGCTAGCTAG...". Sequence should be directly in the string.
52
+
53
+ Returns:
54
+ str: BLAST results in text format on success, or an error message string on failure.
55
+ Possible error messages include:
56
+ "Error: Invalid arguments"
57
+ "Error submitting request: {error_details}"
58
+ "Error: Could not parse RID or RTOE from BLAST response."
59
+ "Search {rid} failed; please report to blast-help@ncbi.nlm.nih.gov."
60
+ "Search {rid} expired."
61
+ "No hits found."
62
+ "Unknown error during polling."
63
+ "Error polling for results: {error_details}"
64
+ "Error retrieving results: {error_details}"
65
+ """
66
+
67
+ parser = argparse.ArgumentParser(
68
+ description="Submit and retrieve BLAST jobs from NCBI with a direct sequence input.",
69
+ formatter_class=argparse.RawTextHelpFormatter # To keep help message formatting nice
70
+ )
71
+ parser.add_argument("program", help="BLAST program (e.g., megablast, blastn, blastp, rpsblast, blastx, tblastn, tblastx)")
72
+ parser.add_argument("database", help="BLAST database name")
73
+ parser.add_argument("query_sequence", nargs='+', help="Query sequence (directly input as string)")
74
+
75
+ try:
76
+ args = parser.parse_args(arguments_string.split()) # Parse arguments from the input string
77
+ except SystemExit as e: # Catch argparse exit on error and return error string
78
+ if e.code == 2: # 2 is the exit code for incorrect usage in argparse
79
+ return "Error: Invalid arguments. Usage: program database query_sequence"
80
+ else:
81
+ return "Error: Argument parsing failed." # Unexpected argparse error
82
+
83
+ program = args.program
84
+ database = args.database
85
+ query_sequence_list = args.query_sequence # List of strings if sequence is split by spaces
86
+ query_sequence = " ".join(query_sequence_list) # Rejoin if sequence was split by spaces in input string
87
+
88
+ if program == "megablast":
89
+ program = "blastn&MEGABLAST=on"
90
+ elif program == "rpsblast":
91
+ program = "blastp&SERVICE=rpsblast"
92
+
93
+ encoded_query = urllib.parse.quote(query_sequence) # Encode the sequence directly
94
+
95
+ # build the request
96
+ api_url = 'https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi'
97
+ payload = {
98
+ 'CMD': 'Put',
99
+ 'PROGRAM': program,
100
+ 'DATABASE': database,
101
+ 'QUERY': encoded_query
102
+ }
103
+
104
+ try:
105
+ response = requests.post(api_url, data=payload)
106
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
107
+ except requests.exceptions.RequestException as e:
108
+ return f"Error submitting request: {e}"
109
+
110
+ response_content = response.text
111
+
112
+ rid_match = re.search(r"RID = (.*)", response_content)
113
+ rtoe_match = re.search(r"RTOE = (.*)", response_content)
114
+
115
+ if rid_match and rtoe_match:
116
+ rid = rid_match.group(1).strip()
117
+ rtoe = int(rtoe_match.group(1).strip())
118
+ else:
119
+ return "Error: Could not parse RID or RTOE from BLAST response.\nResponse content:\n" + response_content
120
+
121
+ time.sleep(rtoe)
122
+
123
+ # poll for results
124
+ while True:
125
+ time.sleep(5)
126
+ poll_url = f"{api_url}?CMD=Get&FORMAT_OBJECT=SearchInfo&RID={rid}"
127
+ try:
128
+ response = requests.get(poll_url)
129
+ response.raise_for_status()
130
+ except requests.exceptions.RequestException as e:
131
+ return f"Error polling for results: {e}"
132
+
133
+ status_content = response.text
134
+
135
+ if re.search(r"\s+Status=WAITING", status_content):
136
+ # print(sys.stderr, "Searching...") # Optional: print to stderr if needed
137
+ continue
138
+ elif re.search(r"\s+Status=FAILED", status_content):
139
+ return f"Search {rid} failed; please report to blast-help@ncbi.nlm.nih.gov."
140
+ elif re.search(r"\s+Status=UNKNOWN", status_content):
141
+ return f"Search {rid} expired."
142
+ elif re.search(r"\s+Status=READY", status_content):
143
+ if re.search(r"\s+ThereAreHits=yes", status_content):
144
+ # print(sys.stderr, "Search complete, retrieving results...") # Optional: print to stderr if needed
145
+ break
146
+ else:
147
+ return "No hits found."
148
+ else:
149
+ # if we get here, something unexpected happened.
150
+ return "Unknown error during polling.\nStatus response content:\n" + status_content
151
+
152
+ # retrieve and display results
153
+ result_url = f"{api_url}?CMD=Get&FORMAT_TYPE=Text&RID={rid}"
154
+ try:
155
+ result_response = requests.get(result_url)
156
+ result_response.raise_for_status()
157
+ except requests.exceptions.RequestException as e:
158
+ return f"Error retrieving results: {e}"
159
+
160
+ return result_response.text
161
+
162
+ if __name__ == "__main__":
163
+ if len(sys.argv) < 3:
164
+ print("Usage: python blast_tool.py program database query_sequence", file=sys.stderr)
165
+ print(" query_sequence should be the sequence itself, not a file.", file=sys.stderr)
166
+ sys.exit(1)
167
+
168
+ arguments_string = " ".join(sys.argv[1:]) # Reconstruct arguments string from command line
169
+ results = run_blast(arguments_string)
170
+
171
+ if results.startswith("Error:"):
172
+ print(results, file=sys.stderr) # Print errors to stderr
173
+ if results == "Error: Invalid arguments. Usage: program database query_sequence":
174
+ sys.exit(1)
175
+ elif results == "No hits found.":
176
+ sys.exit(2)
177
+ elif results.startswith("Search ") and results.endswith("expired."):
178
+ sys.exit(3)
179
+ elif results.startswith("Search ") and results.endswith("failed; please report to blast-help@ncbi.nlm.nih.gov."):
180
+ sys.exit(4)
181
+ else: # Catch-all for other errors
182
+ sys.exit(5)
183
+ else:
184
+ print(results)
185
+ sys.exit(0)
186
+
187
+
188
  final_answer = FinalAnswerTool()
189
 
190
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: