Spaces:
Running
Running
| import streamlit as st | |
| import requests | |
| from io import StringIO | |
| from Bio import SeqIO | |
| import os | |
| import time | |
| import pandas as pd | |
| from run_domain2go_app import * | |
| def convert_df(df): | |
| return df.to_csv(index=False).encode('utf-8') | |
| st.markdown(""" | |
| <div style="background-color:#f0f2f6;padding:10px"> | |
| <p style="color:#b22d2a;font-size:15px;">Disclaimer</p> | |
| <p style="color:#000000;font-size:14px;">This program is designed to generate predictions for a single protein due to the extended runtime of InterProScan. If you need predictions for multiple UniProtKB/Swiss-Prot proteins, we recommend utilizing our comprehensive protein function prediction dataset available in our <a href="https://github.com/HUBioDataLab/Domain2GO">Github repository</a>.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with st.sidebar: | |
| st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions") | |
| st.write("[](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [](https://github.com/HUBioDataLab/Domain2GO)") | |
| if 'example_seq_button' not in st.session_state: | |
| st.session_state.example_seq_button = False | |
| def click_button(): | |
| st.session_state.example_seq_button = not st.session_state.example_seq_button | |
| input_type = st.radio('Select input type', ['Enter sequence', 'Upload FASTA file']) | |
| if input_type == 'Enter sequence': | |
| if st.session_state.example_seq_button: | |
| st.session_state['sequence'] = st.text_area('Enter protein sequence in FASTA format.', | |
| value='>sp|O18783|PLMN_NOTEU\n' | |
| 'MEYGKVIFLFLLFLKSGQGESLENYIKTEGASLSNSQKKQFVASSTEECEALCEKETEFVCRSFEHYNKEQKCVIMSENSKTSSVERKRDVVLFEKRIYLSDCKSGNGRNYRGTLSKTKSGITCQKWSDLSPHVPNYAPSKYPDAGLEKNYCRNPDDDVKGPWCYTTNPDIRYEYCDVPECEDECMHCSGENYRGTISKTESGIECQPWDSQEPHSHEYIPSKFPSKDLKENYCRNPDGEPRPWCFTSNPEKRWEFCNIPRCSSPPPPPGPMLQCLKGRGENYRGKIAVTKSGHTCQRWNKQTPHKHNRTPENFPCRGLDENYCRNPDGELEPWCYTTNPDVRQEYCAIPSCGTSSPHTDRVEQSPVIQECYEGKGENYRGTTSTTISGKKCQAWSSMTPHQHKKTPDNFPNADLIRNYCRNPDGDKSPWCYTMDPTVRWEFCNLEKCSGTGSTVLNAQTTRVPSVDTTSHPESDCMYGSGKDYRGKRSTTVTGTLCQAWTAQEPHRHTIFTPDTYPRAGLEENYCRNPDGDPNGPWCYTTNPKKLFDYCDIPQCVSPSSFDCGKPRVEPQKCPGRIVGGCYAQPHSWPWQISLRTRFGEHFCGGTLIAPQWVLTAAHCLERSQWPGAYKVILGLHREVNPESYSQEIGVSRLFKGPLAADIALLKLNRPAAINDKVIPACLPSQDFMVPDRTLCHVTGWGDTQGTSPRGLLKQASLPVIDNRVCNRHEYLNGRVKSTELCAGHLVGRGDSCQGDSGGPLICFEDDKYVLQGVTSWGLGCARPNKPGVYVRVSRYISWIEDVMKNN') | |
| else: | |
| st.session_state['sequence'] = st.text_input('Enter protein sequence in FASTA format.') | |
| st.session_state['name'] = st.session_state['sequence'].split('\n')[0].strip('>') | |
| st.button('Use example sequence', on_click=click_button) | |
| else: | |
| protein_input = st.file_uploader('Choose file') | |
| if protein_input: | |
| protein_input_stringio = StringIO(protein_input.getvalue().decode("utf-8")) | |
| fasta_sequences = SeqIO.parse(protein_input_stringio, 'fasta') | |
| for fasta in fasta_sequences: | |
| st.session_state['name'], st.session_state['sequence'] = fasta.id, str(fasta.seq) | |
| st.session_state['email'] = st.text_input('Enter your email for InterProScan query*: ') | |
| st.markdown(""" | |
| <p style="color:#000000;font-size:12px;">*InterProScan requests your email to notify you when your job is done. Your email will not be used for any other purpose.</p> | |
| """, unsafe_allow_html=True) | |
| # prevent user from clicking submit button if email or sequence is empty | |
| submitted = False | |
| with st.sidebar: | |
| if st.button('Predict functions'): | |
| if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email: | |
| submitted = True | |
| st.session_state.disabled = True | |
| else: | |
| with st.sidebar: | |
| st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.') | |
| if not submitted: | |
| # on main page, write warning message if user has not submitted email and sequence | |
| st.markdown(""" | |
| <div style="padding:30px"> | |
| <p style="color:#2a7b36;font-size:20px;">Submit your protein sequence to start.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| no_domains = False | |
| error_in_interproscan = False | |
| if submitted: | |
| with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'): | |
| result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name) | |
| result_text = result[0] | |
| if result_text == 'Domains found.': | |
| # st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.') | |
| st.session_state['domain_df'] = result[1] | |
| elif result_text == 'No domains found.': | |
| st.warning(result_text) | |
| no_domains = True | |
| else: | |
| st.error(result_text) | |
| st.write(f'InterProScan job id: {result[1]}') | |
| st.write(f'InterProScan job response: {result[2]}') | |
| error_in_interproscan = True | |
| # if 'domain_df' in st.session_state: | |
| # with st.expander('Show domains in sequence'): | |
| # st.write(st.session_state.domain_df) | |
| # domains_csv = convert_df(st.session_state.domain_df) | |
| # st.download_button( | |
| # label="Download domains in sequence as CSV", | |
| # data=domains_csv, | |
| # file_name=f"{st.session_state.name}_domains.csv", | |
| # mime="text/csv", | |
| # ) | |
| if 'domain_df' not in st.session_state: | |
| if error_in_interproscan: | |
| st.error('Error in InterProScan. Please check InterProScan job id and response.') | |
| else: | |
| with st.spinner('Generating function predictions...'): | |
| cwd = os.getcwd() | |
| # mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0]) | |
| mapping_path = './data' | |
| pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path) | |
| pred_result_text = pred_results[0] | |
| if pred_result_text == 'Function predictions found.': | |
| st.success('Function predictions generated.') | |
| st.session_state['pred_df'] = pred_results[1] | |
| elif pred_result_text == 'No predictions made for domains found in sequence.': | |
| st.warning(pred_result_text) | |
| if 'pred_df' in st.session_state: | |
| with st.expander('Show function predictions'): | |
| st.write(st.session_state.pred_df) | |
| pred_csv = convert_df(st.session_state.pred_df) | |
| st.download_button( | |
| label="Download function predictions as CSV", | |
| data=pred_csv, | |
| file_name=f"{st.session_state.name}_function_predictions.csv", | |
| mime="text/csv", | |
| ) | |