Spaces:
Sleeping
Sleeping
Erva Ulusoy
commited on
Commit
·
873150b
1
Parent(s):
4cc17e2
merge overlapping domain locations
Browse files- run_domain2go_app.py +14 -5
run_domain2go_app.py
CHANGED
|
@@ -4,6 +4,7 @@ from Bio import SeqIO
|
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
import pandas as pd
|
|
|
|
| 7 |
|
| 8 |
def find_domains(email, sequence, name):
|
| 9 |
|
|
@@ -72,10 +73,10 @@ def find_domains(email, sequence, name):
|
|
| 72 |
entries[entry['accession']]['locations'].extend(location_list)
|
| 73 |
|
| 74 |
entries[entry['accession']]['locations'] = list(set(entries[entry['accession']]['locations']))
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
if entries:
|
| 80 |
result_text = 'Domains found.'
|
| 81 |
|
|
@@ -92,6 +93,14 @@ def find_domains(email, sequence, name):
|
|
| 92 |
|
| 93 |
# generate protein function predictions based on domain2go mappings
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def generate_function_predictions(domains_df, mapping_path):
|
| 96 |
|
| 97 |
# read domain2go mappings
|
|
@@ -115,4 +124,4 @@ def generate_function_predictions(domains_df, mapping_path):
|
|
| 115 |
# save protein function predictions
|
| 116 |
protein_name = domains_df['protein_name'].iloc[0]
|
| 117 |
result_text= 'Function predictions found.'
|
| 118 |
-
return [result_text, merged_df]
|
|
|
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
import pandas as pd
|
| 7 |
+
import intervaltree
|
| 8 |
|
| 9 |
def find_domains(email, sequence, name):
|
| 10 |
|
|
|
|
| 73 |
entries[entry['accession']]['locations'].extend(location_list)
|
| 74 |
|
| 75 |
entries[entry['accession']]['locations'] = list(set(entries[entry['accession']]['locations']))
|
| 76 |
+
if len(entries[entry['accession']]['locations']) > 1:
|
| 77 |
+
entries[entry['accession']]['locations'] = merge_locations(entries[entry['accession']]['locations'])
|
| 78 |
+
entries[entry['accession']]['locations'] = sorted([i.split('-') for i in entries[entry['accession']]['locations']], key=lambda x: (int(x[0]), int(x[1])))
|
| 79 |
+
entries[entry['accession']]['locations'] = ['-'.join(i) for i in entries[entry['accession']]['locations']]
|
| 80 |
if entries:
|
| 81 |
result_text = 'Domains found.'
|
| 82 |
|
|
|
|
| 93 |
|
| 94 |
# generate protein function predictions based on domain2go mappings
|
| 95 |
|
| 96 |
+
|
| 97 |
+
def merge_locations(locations):
|
| 98 |
+
temp_locs = [i.split('-') for i in locations]
|
| 99 |
+
tree = intervaltree.IntervalTree.from_tuples(temp_locs)
|
| 100 |
+
tree.merge_overlaps()
|
| 101 |
+
merged_locations = ['-'.join([i.begin, i.end]) for i in tree]
|
| 102 |
+
return merged_locations
|
| 103 |
+
|
| 104 |
def generate_function_predictions(domains_df, mapping_path):
|
| 105 |
|
| 106 |
# read domain2go mappings
|
|
|
|
| 124 |
# save protein function predictions
|
| 125 |
protein_name = domains_df['protein_name'].iloc[0]
|
| 126 |
result_text= 'Function predictions found.'
|
| 127 |
+
return [result_text, merged_df]
|