Spaces:
Running
on
L4
Running
on
L4
Commit
·
9d2e7c6
1
Parent(s):
3c09cd6
fix wrong chains and error for seq alignment
Browse files- __pycache__/msa.cpython-310.pyc +0 -0
- app.py +1 -1
- msa.py +43 -44
__pycache__/msa.cpython-310.pyc
ADDED
|
Binary file (6.83 kB). View file
|
|
|
app.py
CHANGED
|
@@ -128,7 +128,7 @@ with gr.Blocks() as blocks:
|
|
| 128 |
|
| 129 |
gr.Examples([
|
| 130 |
["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
|
| 131 |
-
["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "chain": "
|
| 132 |
],
|
| 133 |
inputs = [jobname, inp]
|
| 134 |
)
|
|
|
|
| 128 |
|
| 129 |
gr.Examples([
|
| 130 |
["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
|
| 131 |
+
["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "sdf":"","name":"","chain": "B"}], "covMods":[]}]
|
| 132 |
],
|
| 133 |
inputs = [jobname, inp]
|
| 134 |
)
|
msa.py
CHANGED
|
@@ -148,55 +148,54 @@ def run_mmseqs2(x, prefix, use_env=True, use_filter=True,
|
|
| 148 |
[seqs_unique.append(x) for x in seqs if x not in seqs_unique]
|
| 149 |
Ms = [N + seqs_unique.index(seq) for seq in seqs]
|
| 150 |
# lets do it!
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
out = submit(seqs_unique, mode, N)
|
| 159 |
-
while out["status"] in ["UNKNOWN", "RATELIMIT"]:
|
| 160 |
-
sleep_time = 5 + random.randint(0, 5)
|
| 161 |
-
logger.error(f"Sleeping for {sleep_time}s. Reason: {out['status']}")
|
| 162 |
-
# resubmit
|
| 163 |
-
time.sleep(sleep_time)
|
| 164 |
-
out = submit(seqs_unique, mode, N)
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
pbar.set_description(out["status"])
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
if out["status"] == "ERROR":
|
| 195 |
-
REDO = False
|
| 196 |
-
raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
|
| 197 |
-
|
| 198 |
-
# Download results
|
| 199 |
-
download(ID, tar_gz_file)
|
| 200 |
|
| 201 |
|
| 202 |
a3m_files = [f"{path}/uniref.a3m"]
|
|
|
|
| 148 |
[seqs_unique.append(x) for x in seqs if x not in seqs_unique]
|
| 149 |
Ms = [N + seqs_unique.index(seq) for seq in seqs]
|
| 150 |
# lets do it!
|
| 151 |
+
TIME_ESTIMATE = 150 * len(seqs_unique)
|
| 152 |
+
with tqdm(total=TIME_ESTIMATE, bar_format=TQDM_BAR_FORMAT) as pbar:
|
| 153 |
+
while REDO:
|
| 154 |
+
pbar.set_description("SUBMIT")
|
| 155 |
+
|
| 156 |
+
# Resubmit job until it goes through
|
| 157 |
+
out = submit(seqs_unique, mode, N)
|
| 158 |
+
while out["status"] in ["UNKNOWN", "RATELIMIT"]:
|
| 159 |
+
sleep_time = 5 + random.randint(0, 5)
|
| 160 |
+
logger.error(f"Sleeping for {sleep_time}s. Reason: {out['status']}")
|
| 161 |
+
# resubmit
|
| 162 |
+
time.sleep(sleep_time)
|
| 163 |
out = submit(seqs_unique, mode, N)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
+
if out["status"] == "ERROR":
|
| 166 |
+
raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
|
| 167 |
|
| 168 |
+
if out["status"] == "MAINTENANCE":
|
| 169 |
+
raise Exception(f'MMseqs2 API is undergoing maintenance. Please try again in a few minutes.')
|
| 170 |
|
| 171 |
+
# wait for job to finish
|
| 172 |
+
ID,TIME = out["id"],0
|
| 173 |
+
pbar.set_description(out["status"])
|
| 174 |
+
while out["status"] in ["UNKNOWN","RUNNING","PENDING"]:
|
| 175 |
+
t = 5 + random.randint(0,5)
|
| 176 |
+
logger.error(f"Sleeping for {t}s. Reason: {out['status']}")
|
| 177 |
+
time.sleep(t)
|
| 178 |
+
out = status(ID)
|
| 179 |
pbar.set_description(out["status"])
|
| 180 |
+
if out["status"] == "RUNNING":
|
| 181 |
+
TIME += t
|
| 182 |
+
pbar.update(n=t)
|
| 183 |
+
#if TIME > 900 and out["status"] != "COMPLETE":
|
| 184 |
+
# # something failed on the server side, need to resubmit
|
| 185 |
+
# N += 1
|
| 186 |
+
# break
|
| 187 |
+
|
| 188 |
+
if out["status"] == "COMPLETE":
|
| 189 |
+
if TIME < TIME_ESTIMATE:
|
| 190 |
+
pbar.update(n=(TIME_ESTIMATE-TIME))
|
| 191 |
+
REDO = False
|
| 192 |
+
|
| 193 |
+
if out["status"] == "ERROR":
|
| 194 |
+
REDO = False
|
| 195 |
+
raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
|
| 196 |
+
|
| 197 |
+
# Download results
|
| 198 |
+
download(ID, tar_gz_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
a3m_files = [f"{path}/uniref.a3m"]
|