Upload new model safetensors with trained LMHead
#3
by
VishrutThoutam
- opened
This view is limited to 50 files because it contains too many changes.
See the raw diff here.
- .gitattributes +43 -1
- .gitignore +0 -24
- README.md +33 -37
- __init__.py +0 -0
- benchmarks/.DS_Store +0 -0
- .DS_Store → benchmarks/Generation/.DS_Store +0 -0
- benchmarks/Generation/ProtGPT2/protgpt2_finetune.py +70 -0
- benchmarks/Generation/ProtGPT2/protgpt2_generate.py +55 -0
- benchmarks/Generation/ProtGPT2/protgpt2_generated_sequences.csv +101 -0
- benchmarks/Generation/ProtGPT2/protgpt2_test.txt +0 -0
- benchmarks/Generation/ProtGPT2/protgpt2_train.txt +0 -0
- benchmarks/Generation/ProtGPT2/run_clm.py +657 -0
- benchmarks/Generation/Visualize/analyze_mdlm_denovo_gen.py +7 -0
- benchmarks/Generation/Visualize/esm_umap.png +0 -0
- benchmarks/Generation/Visualize/esm_umap.py +111 -0
- benchmarks/Generation/Visualize/mdlm_de-novo_generation_results.csv +101 -0
- benchmarks/MLM/config.py +14 -0
- benchmarks/MLM/data_loader.py +48 -0
- benchmarks/MLM/esm_utils.py +16 -0
- benchmarks/MLM/mlm_generate_utils.py +108 -0
- benchmarks/MLM/mlm_lowercase_results.csv +0 -0
- benchmarks/MLM/mlm_motif_benchmarking.py +39 -0
- benchmarks/MLM/mlm_uppercase_results.csv +0 -0
- benchmarks/MLM/model.py +65 -0
- benchmarks/MLM/pretrained_models.py +12 -0
- benchmarks/MLM/screen_mlm_cosine_hamming.py +17 -0
- benchmarks/MLM/train_and_test.py +184 -0
- benchmarks/Supervised/.DS_Store +0 -0
- benchmarks/Supervised/Localization/cell_localization_predictor.py +224 -0
- benchmarks/Supervised/Localization/process_cell_local_data.py +12 -0
- benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_test.csv +0 -0
- memdlm_schematic.png → benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv +2 -2
- benchmarks/Supervised/Membrane Type/membrane_type_predictor.py +226 -0
- benchmarks/Supervised/Membrane Type/membrane_type_test.csv +0 -0
- benchmarks/Supervised/Membrane Type/membrane_type_train.csv +3 -0
- benchmarks/Supervised/Membrane Type/split_membrane_type_data.py +15 -0
- benchmarks/Supervised/Membrane Type/unsplit_membrane_type_all.csv +3 -0
- benchmarks/Supervised/Solubility/solubility_transformer.py +353 -0
- checkpoints/.DS_Store +0 -0
- config.json +30 -0
- config.yaml +127 -0
- data/.DS_Store +0 -0
- data/membrane/test.csv +0 -0
- data/membrane/train.csv +0 -0
- data/membrane/val.csv +0 -0
- data/uniref/100k_seqs/check_data.ipynb +168 -0
- data/uniref/100k_seqs/test.csv +0 -0
- data/uniref/100k_seqs/train.csv +3 -0
- data/uniref/100k_seqs/val.csv +0 -0
- data/uniref/200k_seqs/check_data.ipynb +168 -0
.gitattributes
CHANGED
|
@@ -1 +1,43 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
benchmarks/DeepLoc/cell_localization_train_val.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
benchmarks/DeepLoc/membrane_type_train.csv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
benchmarks/DeepLoc/OG_membrane_type_all.csv filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/uniref/100k_seqs/train.csv filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
data/uniref/200k_seqs/train.csv filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
benchmarks/Supervised/Membrane[[:space:]]Type/membrane_type_train.csv filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
benchmarks/Supervised/Membrane[[:space:]]Type/unsplit_membrane_type_all.csv filter=lfs diff=lfs merge=lfs -text
|
.gitignore
DELETED
|
@@ -1,24 +0,0 @@
|
|
| 1 |
-
# .gitignore
|
| 2 |
-
|
| 3 |
-
/checkpoints/
|
| 4 |
-
/data/
|
| 5 |
-
/results/
|
| 6 |
-
/build/
|
| 7 |
-
/src/scripts/
|
| 8 |
-
/src/benchmarks
|
| 9 |
-
|
| 10 |
-
/src/lm/dplm
|
| 11 |
-
/src/lm/evodiff
|
| 12 |
-
/src/lm/dplm_playground.ipynb
|
| 13 |
-
/src/lm/evoflow_playground.ipynb
|
| 14 |
-
/src/utils/ubuntu_font
|
| 15 |
-
|
| 16 |
-
/src/sampling/old_guidance.py
|
| 17 |
-
|
| 18 |
-
/MeMDLM_v2.egg-info/
|
| 19 |
-
*.pth
|
| 20 |
-
*.ckpt
|
| 21 |
-
*.err
|
| 22 |
-
*.out
|
| 23 |
-
*.csv
|
| 24 |
-
__pycache__/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,51 +1,47 @@
|
|
| 1 |
---
|
| 2 |
license: cc-by-nc-nd-4.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
| 4 |
|
| 5 |
-
|
| 6 |
|
| 7 |
-
|
| 8 |
-
<a href="https://shreygoel09.github.io/" target="_blank">Shrey Goel</a><sup>1</sup> <b>·</b> 
|
| 9 |
-
<a href="https://www.linkedin.com/in/perin-schray-96855a32b/" target="_blank">Perin Schray</a><sup>2</sup> <b>·</b> 
|
| 10 |
-
<a href="https://www.linkedin.com/in/yinuozhang98/" target="_blank">Yinuo Zhang</a><sup>3</sup> <b>·</b> 
|
| 11 |
-
<a href="https://www.linkedin.com/in/sophia-vincoff-185192146/" target="_blank">Sophia Vincoff</a><sup>4</sup> <b>·</b> 
|
| 12 |
-
<a href="https://www.linkedin.com/in/htkratochvil/" target="_blank">Huong T. Kratochvil</a><sup>2</sup> <b>·</b> 
|
| 13 |
-
<a href="https://www.chatterjeelab.com/" target="_blank">Pranam Chatterjee</a><sup>4<sup>
|
| 14 |
-
<br>
|
| 15 |
-
<p style="font-size: 16px;">
|
| 16 |
-
<sup>1</sup> Duke University  
|
| 17 |
-
<sup>2</sup> UNC—Chapel Hill  
|
| 18 |
-
<sup>3</sup> Duke-NUS Medical School  
|
| 19 |
-
<sup>4</sup> University of Pennsylvania  
|
| 20 |
-
</div>
|
| 21 |
-
|
| 22 |
-
<div align="center">
|
| 23 |
-
<a href="https://arxiv.org/abs/2410.16735"><img src="https://img.shields.io/badge/Arxiv-2506.09007-red?style=for-the-badge&logo=Arxiv" alt="arXiv"/></a>
|
| 24 |
|
| 25 |
-
|
| 26 |
|
|
|
|
| 27 |
|
|
|
|
| 28 |
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
|
|
|
| 31 |
|
|
|
|
| 32 |
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
-
-
|
| 38 |
-
- Higher BLOSUM-62 scores
|
| 39 |
-
- Improved pLDDT confidence
|
| 40 |
-
|
| 41 |
-
To enhance controllability, we develop ***Per-Token Guidance (PET)***, a novel classifier-guided sampling strategy that selectively solubilizes residues while preserving conserved TM domains. This yields sequences with reduced TM density but intact functional cores.
|
| 42 |
-
|
| 43 |
-
Importantly, MemDLM designs validated in TOXCAT β-lactamase growth assays demonstrate successful TM insertion, distinguishing high-quality generated sequences from poor ones.
|
| 44 |
-
|
| 45 |
-
Together, our framework establishes the first experimentally validated diffusion-based model for rational membrane protein generation, integrating *de novo* design, motif scaffolding, and targeted property optimization.
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
## **Repository Authors**
|
| 50 |
-
- <u>[Shrey Goel](https://shreygoel09.github.io/)</u> – undergraduate student at Duke University
|
| 51 |
-
- <u>[Pranam Chatterjee](mailto:pranam@seas.upenn.edu)</u> – Assistant Professor at University of Pennsylvania
|
|
|
|
| 1 |
---
|
| 2 |
license: cc-by-nc-nd-4.0
|
| 3 |
+
extra_gated_fields:
|
| 4 |
+
Name: text
|
| 5 |
+
Company: text
|
| 6 |
+
Country: country
|
| 7 |
+
Specific date: date_picker
|
| 8 |
+
I want to use this model for:
|
| 9 |
+
type: select
|
| 10 |
+
options:
|
| 11 |
+
- Research
|
| 12 |
+
- Education
|
| 13 |
+
- label: Other
|
| 14 |
+
value: other
|
| 15 |
+
I agree to share generated sequences and associated data with authors before publishing: checkbox
|
| 16 |
+
I agree not to file patents on any sequences generated by this model: checkbox
|
| 17 |
+
I agree to use this model for non-commercial use ONLY: checkbox
|
| 18 |
+
base_model:
|
| 19 |
+
- facebook/esm2_t30_150M_UR50D
|
| 20 |
+
pipeline_tag: fill-mask
|
| 21 |
---
|
| 22 |
|
| 23 |
+
# MeMDLM: De Novo Membrane Protein Design with Masked Diffusion Language Models
|
| 24 |
|
| 25 |
+

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
Masked Diffusion Language Models (MDLMs), introduced by Sahoo et al (arxiv.org/pdf/2406.07524), provide strong generative capabilities to BERT-style models. In this work, we pre-train and fine-tune ESM-2-150M on the MDLM objective to scaffold functional motifs while unconditionally generating realistic, high-quality membrane protein sequences.
|
| 28 |
|
| 29 |
+
## Model Usage
|
| 30 |
|
| 31 |
+
The MDLM model leverages an internal backbone model, which is a fine-tune of ESM2 (150M). This backbone model can be used through this repo:
|
| 32 |
|
| 33 |
+
```python
|
| 34 |
+
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
| 35 |
|
| 36 |
+
tokenizer = AutoTokenizer.from_pretrained("ChatterjeeLab/MeMDLM")
|
| 37 |
+
model = AutoModelForMaskedLM.from_pretrained("ChatterjeeLab/MeMDLM")
|
| 38 |
|
| 39 |
+
input_sequence = "QMMALTFITYIGCGLSSIFLSVTLVILIQLCAALLLLNLIFLLDSWIALYnTRGFCIAVAVFLHYFLLVSFTWMGLEAFHMYLKFCIVGWGIPAVVVSIVLTISPDNYGidFCWINSNVVFYITVVGYFCVIFLLNVSMFIVVLVQLCRIKKKKQLGDL"
|
| 40 |
|
| 41 |
+
inputs = tokenizer(input_sequence, return_tensors="pt")
|
| 42 |
+
output = model(**inputs)
|
| 43 |
|
| 44 |
+
filled_protein_seq = tokenizer.decode(output.squeeze()) # contains the output protein sequence with filled mask tokens
|
| 45 |
+
```
|
| 46 |
|
| 47 |
+
This backbone model can be integrated with the [MDLM formulation](https://github.com/kuleshov-group/mdlm) by setting the model backbone type to "hf_dit" and setting the HuggingFace Model ID to "ChatterjeeLab/MeMDLM"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__init__.py
DELETED
|
File without changes
|
benchmarks/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.DS_Store → benchmarks/Generation/.DS_Store
RENAMED
|
Binary files a/.DS_Store and b/benchmarks/Generation/.DS_Store differ
|
|
|
benchmarks/Generation/ProtGPT2/protgpt2_finetune.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Format sequence inputs based on ProtGPT fine-tuning requirements
|
| 8 |
+
def modify_sequences(sequence):
|
| 9 |
+
modified_sequence = sequence.upper()
|
| 10 |
+
modified_sequence = '\n'.join([modified_sequence[i:i+60] for i in range(0, len(modified_sequence), 60)])
|
| 11 |
+
|
| 12 |
+
fasta = "<|endoftext|>"
|
| 13 |
+
modified_sequence = fasta + "\n" + modified_sequence
|
| 14 |
+
|
| 15 |
+
return modified_sequence
|
| 16 |
+
|
| 17 |
+
# Function to save sequences to txt files
|
| 18 |
+
def to_txt_file(df, filename):
|
| 19 |
+
with open(filename, 'w') as f:
|
| 20 |
+
for sequence in df['Sequence']:
|
| 21 |
+
f.write(sequence + '\n')
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# Modify the sequences
|
| 25 |
+
path = "/workspace/sg666/MDpLM"
|
| 26 |
+
|
| 27 |
+
train = pd.read_csv(path + "/data/membrane/train.csv")
|
| 28 |
+
val = pd.read_csv(path + "/data/membrane/val.csv")
|
| 29 |
+
test = pd.read_csv(path + "/data/membrane/test.csv")
|
| 30 |
+
|
| 31 |
+
train = pd.concat([train, val])
|
| 32 |
+
|
| 33 |
+
train['Sequence'] = train['Sequence'].apply(modify_sequences)
|
| 34 |
+
test['Sequence'] = test['Sequence'].apply(modify_sequences)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# Save the modified sequences as txt files
|
| 38 |
+
to_txt_file(train, path + '/benchmarks/Generation/ProtGPT2/protgpt2_train.txt')
|
| 39 |
+
to_txt_file(test, path + '/benchmarks/Generation/ProtGPT2/protgpt2_test.txt')
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
tokenizer = AutoTokenizer.from_pretrained("nferruz/ProtGPT2")
|
| 43 |
+
model = AutoModelForCausalLM.from_pretrained("nferruz/ProtGPT2")
|
| 44 |
+
|
| 45 |
+
finetune_protgpt2_command = [
|
| 46 |
+
"python", "run_clm.py",
|
| 47 |
+
"--model_name_or_path", "nferruz/ProtGPT2",
|
| 48 |
+
"--train_file", "protgpt2_train.txt",
|
| 49 |
+
"--validation_file", "protgpt2_test.txt",
|
| 50 |
+
"--tokenizer_name", "nferruz/ProtGPT2",
|
| 51 |
+
"--num_train_epochs", "10",
|
| 52 |
+
"--logging_steps", "1",
|
| 53 |
+
"--logging_dir", "test",
|
| 54 |
+
"--do_train",
|
| 55 |
+
"--do_eval",
|
| 56 |
+
"--output_dir", "/workspace/sg666/MDpLM/benchmarks/Generation/ProtGPT2/finetuned_models",
|
| 57 |
+
"--overwrite_output_dir",
|
| 58 |
+
"--learning_rate", "3e-04",
|
| 59 |
+
"--per_device_train_batch_size", "2",
|
| 60 |
+
"--evaluation_strategy", "epoch"
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
result = subprocess.run(finetune_protgpt2_command, check=True, text=True, capture_output=True)
|
| 65 |
+
except subprocess.CalledProcessError as e:
|
| 66 |
+
print("Command failed with the following error:")
|
| 67 |
+
print(e.stderr) # Print standard error output
|
| 68 |
+
print("Command output:")
|
| 69 |
+
print(e.stdout) # Print standard output if needed
|
| 70 |
+
|
benchmarks/Generation/ProtGPT2/protgpt2_generate.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
+
import math
|
| 4 |
+
import torch
|
| 5 |
+
import sys
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
# Function to calculate perplexity of each generated sequence
|
| 9 |
+
def calculate_perplexity(sequence, model, tokenizer):
|
| 10 |
+
sequence = "<|endoftext|>" + sequence + "<|endoftext|>"
|
| 11 |
+
input_ids = torch.tensor(tokenizer.encode(sequence)).unsqueeze(0)
|
| 12 |
+
input_ids = input_ids.to(device)
|
| 13 |
+
with torch.no_grad():
|
| 14 |
+
outputs = model(input_ids, labels=input_ids)
|
| 15 |
+
loss, _ = outputs[:2]
|
| 16 |
+
return math.exp(loss)
|
| 17 |
+
|
| 18 |
+
if __name__ == "__main__":
|
| 19 |
+
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
|
| 20 |
+
path = "/workspace/sg666/MDpLM/benchmarks/Generation/ProtGPT2"
|
| 21 |
+
|
| 22 |
+
# Load fine-tuned model and tokenizer
|
| 23 |
+
model_path = path + "/finetuned_models/checkpoint-4510"
|
| 24 |
+
model = AutoModelForCausalLM.from_pretrained(model_path)
|
| 25 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 26 |
+
|
| 27 |
+
# Generate sequences
|
| 28 |
+
protgpt2 = pipeline('text-generation', model=model_path, device=device)
|
| 29 |
+
sequences = protgpt2("", max_length=100, do_sample=True, top_k=950, repetition_penalty=1.5, num_return_sequences=100, eos_token_id=0)
|
| 30 |
+
|
| 31 |
+
# Store generated sequences and their associated perplexities
|
| 32 |
+
generated_sequences = []
|
| 33 |
+
perplexities = []
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Calculate PPL for sequences
|
| 37 |
+
for item in sequences:
|
| 38 |
+
raw_sequence = item['generated_text']
|
| 39 |
+
ppl = calculate_perplexity(raw_sequence, model.to(device), tokenizer)
|
| 40 |
+
generated_sequences.append(raw_sequence)
|
| 41 |
+
perplexities.append(ppl)
|
| 42 |
+
|
| 43 |
+
# Clean the generated sequences
|
| 44 |
+
cleaned_sequences = [seq.replace('\n', '').replace('<|endoftext|>', '') for seq in generated_sequences]
|
| 45 |
+
|
| 46 |
+
# Create df with cleaned sequences and perplexities
|
| 47 |
+
df = pd.DataFrame({"Sequence": cleaned_sequences, "Perplexity": perplexities})
|
| 48 |
+
df.sort_values(by='Perplexity', inplace=True)
|
| 49 |
+
|
| 50 |
+
# Save results
|
| 51 |
+
df.to_csv(path + "/protgpt2_generated_sequences.csv", index=False)
|
| 52 |
+
|
| 53 |
+
# View the average de novo generation perplexity
|
| 54 |
+
avg_generation_ppl = df.loc[:, 'Perplexity'].mean()
|
| 55 |
+
print(f'Average de novo generation perplexity: {avg_generation_ppl}')
|
benchmarks/Generation/ProtGPT2/protgpt2_generated_sequences.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Sequence,Perplexity
|
| 2 |
+
LAPSVVTGVAQSSPLTIVTNPKEPRQPVPASDGADYLKTIPGFAVIRNGGSNGDPVLRGMFGSRLNILTNGGMMLGACPNRMDAPTSYISPETYDKLTVIKGPQTVLWGPGASAGTILFEREPERFGELGSRVNASLLAGSNGRFDKVLDAAAGNRLGYLRFTGNHAQSDDYEDGAGNTVPSRWKKWNGDVAVGWTPDEDTLIELTAGKGDGEARYAGRGMDGSQFKRESLGLRFVKSNVSDVLEKVEAQVYYNYADHIMDNFRLRTPDPSMPMT,2.6532732777535712
|
| 3 |
+
MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQNLNGLDNLLYMSSTSDDSGNATITITFAPGTNPDIAQVQVQNKLSLATPILPQAVQRQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPASGLGIKLATG,2.829348107084168
|
| 4 |
+
MAYRSTTLLALLALVLLYLVSGALVFRALEQPHEQQAQRELGEVREKFLRAHPCVSDQELGLLIKEVADALGGGADPETQSTSAWDLGSAFFFSGTIITTIGYGNVALRTDAGRLFCIFYAAXFGIPFTLLFLTAVGDRLGSSLRHGIGHIEAIFLKWHVPPELVRVLSEMLFLLVGCLLFVLTPTFVFCYMEDWSKLEAIYFVIVTLTTVGFGDYVAGADPRQDSPAYQPLVWFWILLGLAYFASVSAML,3.119025307842878
|
| 5 |
+
MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQQMNGLDGLRYISSNSAGNGQASIQLNFEQGVDPDIAQVQVQNKLQLAMPLLPQAVKEQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPASGLG,3.775355043694786
|
| 6 |
+
LFLTMAEAQLRYKTTEECLAYFGVSETTGLTPDQVKRHLEKYGHNELPAEEGKSLWELVIEQFEDLLVRILLLAACISFVLAWFEEGEETVTAFVEPFIILLILIANAIVGVWQERNAENAIEALKEYEPEMGKVYRADRKSVQRIKARDIVPGDIVEVAVGDKVPADIRILSIKSTTLRVDQSILTGESVSVAKSSDAVPDPRAVNQDKKNMLFSGTNIAAGKALGIVATTGVSTEIGKIRDQMAATEQDKTPLQQKLDEFGEQLSKVISLICIAVWLINIGHFNDPVHGGSWI,4.136701078251139
|
| 7 |
+
MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQNMNGIDNLMYMSSNSDSTGTAQITLTFESGTDADIAQVQVQNKLQLAMPLLPQAVQQQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPASGLG,4.210716900525416
|
| 8 |
+
MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQQMNGLDGLRYISSNSAGNGQASIQLTFESGTDADIAQVQVQNKLQLAMPLLPQEVAQQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPAS,4.526996795741569
|
| 9 |
+
MLKIIIPTTMLLPMTWMSKHNMIWINATVHSLLISLISLSLLNQLGENSLNFSLTFFSDSLSAPLLVLTTWLLPLMLMASQSHLSKETTTRKKLYITMLLILLQLFLIMTFTATELILFYIFESASLPTLLMITRWGNQTERLNAGLYFLMYTLAGSLPLLVALVYIQNTTGSLNFLIIHWSTHTSASFVSQTLLLMAWMAAMAVMAKMPLYGVHLWLPKAHVEAPIAGSMVLAAVLLKLGGYGMLRITTILNPLTNYMAYPFLMLCLWGMI,4.629232424547782
|
| 10 |
+
AKFINRWLFSTNHKDIGTLYLLFGAWAGMVGTALSLLIRAELGQPGTLLGDDQIYNVVVTGHAFVMIFFMVMPIMIGGFGNWLVPLMIGAPDMAFPRMNNMSFWLLPPSFLLLLASSMVEAGAGCGWTVYPPLAGNLAHAGASVDLTIFSLHLAGVSSILGAINFITTIINMKPPAMSQYQTPLFVWSVMITAVLLLLSLPVLAAGITMLLTDRNLNTTFFDPAGGGDPILYQHLFWFFGHPXVLILILPFFGIVTEASAIPRIFNWMVTFHGQLMYHHMWIIGVL,5.0608380016313275
|
| 11 |
+
LVEKDPIKTSFEKWAKPGHFSRTLAKGPNTTTWIWNLHADAHDFDSYTSDLEEISRKVFSAHFGHLAVVTIWLSGMIFHGAKFSNYEAWLSDPLNVRPSAQVVWPLVGQDILNGDMGDGTYNGFQVMTSGLFQLWRASGITNEYQLYCTAIGGLVMAALMLFAGWFHYHKAAPKLAWFQDVETALNHHLSGLLGLGCLSWAGHQIHVSLPVNKLLDAGVAAKDIPLPHEFILDPAKFASLLPGLTQGLTPFFTLNWSEYSDFLTFKGGLNPVTGGL,5.597917119515088
|
| 12 |
+
MVRKVYVTLQGKVQGVFFRAHTQATAKQLGVVGWVRNTSDGTVEGEAQGPADKVDEMINWLHRGPPQAQIESHEFNSEKKELEAFSSFHIRY,5.635017933300935
|
| 13 |
+
EFGFWEIKFPEYLKGRPTTGRPEWVQDVDLVNKWAVPGLNPPHHFSPPVNLTGVEDTLPVSWVMVSMVVGFVLIVATAGNILVIIAVFTSRALKAPQNLFLVSLASADILVATLVIPFAMANEVMGYWYFGKAWCEIYLALDVLFCTSSAWHLCAISLDRYWSITQAIEYNLKRTPRRTKAIIITVWVISAVISFPPRCEINDQKWYYVISSCIGSFFAPCLIMILVYVRIYQIAKRRTRDLSRKSGRPSLLSEVHAAKSLAIL,6.370992471309986
|
| 14 |
+
MVYVSRISVFAFLGALASVAYGQVTPPNFGTEQDRVNFTKQIVPVLKEKCVVCHGPDKTKGKLRLDLRIEAFKGGESGESIDVIPGDPENSELLERITSKDPEFRMPPKSEHKPLTEAEIALLKQWILEGAKYDPAWAFTPPKRTDLPKVKRDEWAKNDVDRFILAKLESEGLTPNPEADKATLIRRVTLDLTGLPPTPAEVDAFLADKSPNAYEKVVDRLLASPHFGERWGRHWLDVARWAESNGFERNTIRNIWSYRDWVIKALNDDVPYDQFTVEQL,7.0499259667086145
|
| 15 |
+
SSNAKTVLITGGTGFVGRALVKRLLSTTKHTIVVPYREEADLHDVKVLQVKGDLRDAASLDAAFEGVDCVFHLASYGMSGPEMFELNVEGTRNVVEACLRHGVRRLIHVSSIAVMGEPSDHPRREADESLPARQATAYAKSKVEAERIVLEANGSDGLETVVVRPPMVWGPGDTQFLPRLVRMARRGLRPVIGNGKSLVSMVYIDNLVDGLIAAMDHPEARGKTYFLSNDGHASQREFIETVARAIGRPAPKLTLPVPVLYWAARLLG,7.429969652690046
|
| 16 |
+
SPELIEQLLQNYLQLPDAEKRKVADQLQTSNIRYCYLLASEKGWLDRVESCLAAEGCDVLQPDHTGRNLLQVVASVSPDHTARLIRALLARGADVHAQDSLGNTVLHILILQPNKTFACQMYNEILILGAKLCPTVNLEAVLNHQGLTPFKLAGVEGNTVMFQHLMQKRKHVQWTCGPLTSTLYDLTEIDSSGDDQSLLELIVTTKKREARQILEQTPVKELVSLKWKRYGRPYFCVLGAIYILYIICFTMCCVYRPLKPRITNRTNPRDNMTSLEL,7.910941817905356
|
| 17 |
+
ADVNLNARDLHGMTPLHLAAKNGHDKVVQLLLKKGALVNIQDKLGSTPLLEAIRGRREDTVKLLVEHGADIRAQDSLGNTVLHILILQPENSTSLKFAEMLYDMILLRSGTWELETTQPNDGLTALQLAAKMGKAEILKYILSREIKEKPLRSLSRKFTDWAYGPVSSSLYDLTNVDSSGNTVLHAMIMVADNTPQNSRFVKQMYNLLLSKGARLCPNVPNHQGLTPFKLAGVEGNIVMQEILRGTTISIPFTCITCGKKDTRFRGMSCEN,8.179497248919981
|
| 18 |
+
DPFNNFFRRSKIAVCGLVFFVLFIIYMVLGSMIFSAIERDHEQQAQRELGEVREKFLISHPCVSDQELGVLIEEVADALGGGADPETQSTSAWDLGSAFFFSGTIITTIGYGNVALRTDTMGRLFCIFYALVGIPLFGILLAGVGDRLGSSLRHGIGHIEAIFLKWHVPPGLVRVLSAMLFLAIGCLLFVTLPAYVFSHMEDWSKLEAIYFVIVTLTTVGFGDYVAGADPRQDSPQYQPLVWFWILL,8.306921086116862
|
| 19 |
+
GPQSFVHFTKQSLALIEQRIAERKSKEPKPSSDLEAGKQLPFIYGDIPPGMVSEPLEDLDPYYADKKTFIVLNKGKTIFRFNATPALYMLSPFSPLRRISIKILVHSLFSMLIMCTILTNCIFMTMNNPPDWTKNVEYTFTGIYTFESLVKILARGFCVGEFTFLRDPWNWLDFVVIVFAYVTEFVVAEFVSFSALRAFRVLRALKTISVIPGLKTIVGALIQSVKKLSDVMILTVFCLSVFALIGLQLFMGNLRKKCFFPDG,8.471762198050271
|
| 20 |
+
MLKIIIPTTMLLPMTWMSKHNMIWINATVHSLLISLISLSLLNQLGENSLNFSLTFFSDSLSAPLLVLTTWLLPLMLMASQSHLSKETTTRKKLYITQLILLQLFLIMTFTATELILFYIFESATLLPTLLIILRWGYQPERLQAGLYFLFYTLIGGVLVLLSILMIYVNTNSLLIHTLPMFNSTMETSLYTKIMWFACMMAFPTKMGLFPIHMWLPVVHSESPLAGSCILAGILLKLGGYGMMRVVTILNPLTNYMAYPFLML,8.583127806228307
|
| 21 |
+
MVLRLVVLALLCWTPGLWAQQADTLTLDEVVVTATRSEQNLQDVPASVSVITAEDLQRQAPRTLGEALRYVPGVFLDGTGRTNGQDINMRGYDHRGVLVLVDGIRQGTDTGHLNGTFLDPALIKRVEIVRGPSAALYGNGAAGGVVNFITRQPSDQLTGSVRLNTSLPQHDGDNSQQFYSLMAGNRLGEEGKLGMLASFSRQEKGQARDGAGNDIASLDEDSLSGKLLWQLTPEQQLDFSLDHYRFKTNAPHNPVNTDFTRHTRQESDSTVRRFFNQVQ,10.282136779067205
|
| 22 |
+
RPLVAIDFGTTYSGYAFSFKNQPETITLHWNSEISKALRKPTVLLIDSNMKEVAFGYEAENKFATLALDAEEKHFFFEKFKMALYDKNDRSILPSMRSANGTEKKAIDVFAEAIRYFKDHALKTINSTYPIDKQDLLWSVTVPSDWDARSKEFMRQAAVKAGLGEASLASEPEAASMYCVEHEVNKFGDEIKSGTKFLVVDVGGGTVDITVHEVLENNHLKELYKASGGPYGSVGIDQEFMKLFQLIVGAEAIEQFKIK,11.589466291126676
|
| 23 |
+
MKVSVIIPTYNERENLEELFSRIDQALQGLNYEIVVVDDDSPDRTWEKAQELSSKYPIKVCRRTKEKGLSSAVIRGFKEASGDVFVVMDADLQHPPEVIPKLIEAIKNGSDIAIGSRYVKGARVENWPFYRKLISKGALVVTKIPLKDLKDMRDFACGFIAIKREVIEKIEFDENLTYGKILKILKYCWGGFSKVVEVPFTFGIRARGESKLKGKTIFEYLRHIWSLNYTFFRILKLIFALGFTFFGVSLAYLTLVLMEKYFLWYIPGWAN,12.090375297427133
|
| 24 |
+
PGMQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGNSPLGAPGGTPSDREMKLVPTSHIALPINDRLAFGFAAYSNFGLATDYGDTFVGSTTPTDLEMKLNSLSIGGNAEITDQLSFGASITYQRAKIERFAGDLGQLVAGQIMQSPAGQTQQALLQAQSQGNLGSALAYANGIDSNTKIAHLNGNQWGYGWNAGILYELDKNNRYALTYRSEVKMTFKGNYSNDMPGYYEMNVPAWHNVSLYHE,12.173339409793382
|
| 25 |
+
DASRVYYEDRSVVKEDGSVVKEGPFDLQSTLTLSGVVRDYASGTPLADAEITLTGPAFRAHTNSYGKFVFEGLAAGTYTLSVSRFGYEPVSETIAVSAGQTVESNVALFALASEVEILEVTADADPVFNTGDVATSVGTREMKEIPTVVGDVDVIKSLQLLPGVASAGEGTSGFYVRGGGIDQNLYLLDNIPVYNVGHLFGFFSTFNSDAIKDVTLYKGGVPARYGGRLSSVLDITMKEGNSDKLSGTASIGLLPASAKLQGPI,12.228122271950522
|
| 26 |
+
GAVIDLSTATFDFGGSYTGVAVGDTITAVVTAPTEDDYVFQWFKDNVLQSGATGNSYTLTAAEAGKAIKVVVSGSKSGYTSTAKTAAVTTAITASSLTLTADKTKLTVGDTVTLTASLSDKNGNAVTGRTVKWSSSNTAVATVSSSGLVTGVAAGSATITASAEGQNGNGTANITVVAASVSSISLSPASASVAVGATQQFTASGYDSSGNVVTSGRVVTWASSNTSVATVSASGLVTAVAAGTATITVTSGGKSGNATVTVTAATLSSLSVSSSNL,12.23423450162324
|
| 27 |
+
MQTYNNPEVTYDWWAGNARFANLSGLFIAAHVAQAALIMFWAGAFTLYEISWLTADQSMGEQGLILLPHLATLGLGVGDGGQVTDTFPFFVVGAVHLIASAVLGAGALFHTFRAPSDLAAASGAAKRFQNFNPDLSKLGFISRHTHAAKPELWSQLIGGKHKTTTGFAWVGVANPDGSITGMGTAGIQVKQAEGVTVGLAHYIWPLIGAAALAATICFFGYNSVITDIAYPEKKLEAVTFGYQTQAFDAFTQAGQVIGSTT,12.368396953842797
|
| 28 |
+
AEGIRFAIVDEVDSILIDEARTPLIISGQAEDRTKELYKTLTRVLKSLEGGDYSVDLKNKKVSLTEKGVERTEKLLREAGIISDGTDNLYVVGAIFHAQKVATGKDYLFRKIVEKGRVEYTIDEKLKQVVIVDEFTGRMMPGRRYSDGLHQAIEAKEGVKVQRESKTLATITYQNYFRMFKKIMKLAGMTGTAETEAEEFKKIYNLDVVVIPTNEPMKRQDHSDQVYKTKREKYNAVLKEIEELYKKGQPVLVGTTSVEASEFLSNLLKKRKIPHNVLNAKPHAREAEIIAQAGRKG,12.697313288610662
|
| 29 |
+
MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQVIEQAMNGVDNLMYMSSNSDSTGTATITLTFESGTDADIAQVQVQNKLQLAMPLLPQAVQQQQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMLIRMKPDLLNKFGVTANDVISALQAQNSQVEAGSIGQLPTLPGTPLQLSITAQSQLSSEQEYGDIMLRVNQDGSRVLLRDVAKIELGGENYDIIAE,12.907199708267516
|
| 30 |
+
DPLYYTNNGGLGFVLSALFGYIWWGYKSGTPKEVRSEAKYRMLTVVVPCYNEEKTIGRTLCSLLESDYPEDKLQIICVNDGSKDKTLKELEDFELRDVPLVVIDQENGGKARALNAGIDAASYEYFACVDADSQVEKDSLKKMVHHFADPSVGCVAGRVKIGNRWSWISRLIDLIQYLIAFNIGRRGINSITVVPGAIGAYRVSAIKKAGGFSGKTMTEDLDLTIAILRAGYKVVYEPEAICWTDVPETLKGFTRQRFRWTYGTMQ,12.993370901156627
|
| 31 |
+
DISAEDRMWSDAEKRMEWQRIDRQVANRKSHGKRGLLSRIFGWIFRRNMDEKALKLLPHIKCYTPAEIANAIQSMTPEDLQRYELRASMFSLADKSNSGTISLTEFRNILECLGVQMSPTELQTLFQVCDRDQNDMINFNEFANRFHEPAKEIGFNVAVLLTNLSEHVPHDPRLRNFLELAESVLNYFQPFLGRIEIMGSAKRIERVYFEISESSRTQWEKPQVKESREFRTMQEIYNHIYYHTKQKENENVQRNAERWKMIEENKL,13.119829828981848
|
| 32 |
+
SDITRLIVLVGTTLGVVLFLALAVWIVKSFWSPYQEINDWALALTIVDVLVVGVPAALPSTVTVTMALGAAYLAKKQALVKKLPIVESLSGVEILCSDKTGTLTKNKLSLQGAWLPGSEKPEQISGLVPEGSRQNITKCIHIAVLCNRASYKDGKLVGTPTEKAILKGLECWGVGYGEMRKKYPLVHQIPFNSTNKFQLSIHDKDNRYLLVMKGAPERVLEKCSTVLLQGKEQPLDEQWHTAFQTAYLSLGGLGERVLGFCQLYLSE,13.625918655212923
|
| 33 |
+
MEVTLFALLALVVASAIIAWGPVTKPLHPHEALVDVGGHKMHYICQGKGSPTVILEAGGGGGSIEWGWVQPQVAAVSRVCTYDRAGYGWSDPAPHARDAGIVAEELHRLLRAAQVPGPYVLVGHSIGGFNTLHFAARYPQDVAGLVLVDATHEDQYRRWKGYEQEMAPFTSGQALDNLAANVRVMESLPPVDAGKVRDLPVLVLSAGREHPPFDMKLYREQWQREVVDLSNVSDRQKHIVADRSGHHIQFDEPDLVVAAIRE,14.117540370332351
|
| 34 |
+
MDYHEDDKRFRREELCREAEFLKLKMPTKKVYHISETRGLLKTINSVLQKITDPIQPKVAEHRPQTTKRLSYPFSREKQHLFDLTDRDSFFDSKTRSTIVYEILKRTTCGITSLLANGIYWLAISTPTINEYPSFLSPSLYAAVLPFTFGFVVSFITLPRKALEYIEQNGQGKAAVHHHTHTHDHDAGDVKIVVNDKDLESHVVAGALMFVAALFSLVFHQWWSDYCDVAYTVFIRVRDVIFGHVKWT,14.986517088631075
|
| 35 |
+
PSNISAWWNFGSLLGACLILQITTGLFLAMHYSPDTTTAFSSLSHICRDVNFGWFIRNLHANGASFFFICIFLHIGRGLYYGSYLYKETWNTGVILLLTVMATAFMGYVLPWGQMSFWGATVITSLAVYLPWWGQHVQKLLFQLIPALLVLLTAWTPFLIGYTLIRETTETESTNYGTPLRLHRIISHHLLLLRAVAXXXXXXXXXXXXXXXXXXEIKAAFWSVFHFILPFMATALAAPRSLLLDEANSTNTLVTTNLIFNFIFFLLPIFPATLSMFSPNLLGDPENFTPANPLVTPPHIKPEWYFLFAYAILRSIPNKLGGVLALAASVLILFLIPFLHKSKQRSTMTF,15.230674054330438
|
| 36 |
+
SRTSELAVGIFVIIFGIALFFLAMKVSGLVGTNLSDGYTMKAQFDNVNGLKPRAKVTMSGVTIGRVDSITLDPVTRLATVTFDLDGKLTSFNAEQLKEVQKNALDELRYSSDYTQATPAQQMKACSEQMMTLLAPQQKEKKTLEVGDIIATSKSSVIYNDMSTYLNDLIGDLGTIASGVNELWPTLQANFSTVKTMAQNLLTANQQLPQLLGNVQTTSQLLAQDNNNFNKLVTDFALTIDALNAVVSKSGANLDTAIATANDLNTVLTENRQ,15.441068484289472
|
| 37 |
+
DDVTVVYQNGLPVISVRLPSRRERCQFTLKPISDSVGVFLRQLQEEDRGIDRVAIYSPDGVRVAASTGIDLLLLDDQLIIREKYQIFINDMSPGAKVAQTAPAREIKWDHEALTEELTYEGQSEKLRDKDRTEVRRTMLNLERRLSDIRRQLAPLEKVRIEISRKMEDKTIQSYALWLMLAVVVCLMGLAWWQVLASLATFCVAVIIMVFVGRNWSAVLQRRRKRMGAEELRHRAYQTHQCHLCAICFTNQKMATLVPCGHVFCEECIKQHL,17.259236085949127
|
| 38 |
+
DNTTNIVHVPVHYVFIMALPIIMCILGLLLNVLALWVFYGHMKRTTSVVYVINLAIADLLFVLSLPMYIHYYFNKTHWVFGELLCRITGTLFYMNTYCSILFLTCISIHRFLGVCYPFRLNLVKRNYAVCVSVGVWAFVMLACMPTLVFNQTEDYEGNRTICYDHLEDAQRHWALYLQVKVNVFVIGFLIPFLIITFCYSQIVATLLKVEANLAKKKSKAIRLVLTVVTVFVLSQFPYNFILLAKTIKLQQINSSCEFEKIIE,17.688518287684857
|
| 39 |
+
MDYHEDDKRFRREELCREAEFLKLKMPTKKVYHISETRGLLKTINSVLQKITDPIQPKVAEHRPQTTKRLSYPFSREKQHLFDLTDRDSFFDSKTRSTIVYEILKRTTCGITSLLANGIYSAAYPLHDGDYEGDNVEFYGDYTIHAGDPENGGQCVVITLTDYGNYEPFYSASLEFSRKHFGFSALSVQCELSDVQSFTAVKQQFINLLSSRAPITVRKFVSPEFPRNSDSHDIFSLSCDVSNTGHVTAVTCQVSARFLTRYLTD,17.749448694031326
|
| 40 |
+
MDNKLTLALAAIMVVLIAFVGINVMNNVNTNPTVVKTATVERGEYVERVDATGKVVAAQSTDLSFPATGEVTWLKVKVGDRVSKGQLLAELDTTDLEAQKNLALSQLEQSRASLALTRQTLARQQALAQTQAVSQQDLDNATNALRVQEAQLNQLRSGSRPEDIAAAQSQLRMAQDDLNRLRNGSRSEELRSLQAQLDVDKAKLNWDQKIVRRNQVRAPFAGVIAERLAEPGALVSPSQPILSLVADDNLEIEANVSEADILHLKPGQKAWFT,18.826645316378666
|
| 41 |
+
MVSVIIPAYNEEKYLEKCLESVRNQTYKNLEIILVDDGSKDKTLEIAKEYAKKDERVKVVTQENGGVSSARNRGIEESKGEWIAFLDADDYWEENHLEELVKAIESNNCDMSICNAIWYYWWDENKRIIKRLPRESVIEAEDFFKELPIFMLTVVVWNKLFKKELFDSIRFPEGKTYEDTATIVDVLMKCKKVAYLNKALVNYRIREGSASTSFNPAKAKDHLKAIEVAFKEAHAEGLGDVALRAFQRRYVNSII,19.064942570982527
|
| 42 |
+
MRPNLFLLALPFIALAAPAHAESITVNGDARIRALGKQNYAEVRTHISDNGTKATVDATGHLRIDAPLGERAQVKAYGELEAIYAKPSGDKNKASNTERLAYAGLKFADHGSIDYGRNYGILYDTNAWTDVFPLWGADVLESNTAAYKRTYGNVLTYRNNNAFGYVDGLSFALQYQGKNPTTGEVVKGDRVNSDGRRLGAATVGYDFDGFGIGFAAASSKTEQNGIKKDTDGREYAVAGSAKVGAAQVAGTYAETRNATRFGQTGKGRVE,19.328842227230165
|
| 43 |
+
DDALPLSYYGTNKGLDRPATGPDRREHRFGFIADASAYPSQQLFIRGKVDVRDYQGSDTLRDDNAYVRLRNLTVGYDNLLPGSPLNVVAQFDLFNVLNATNVKDYQEVLSGGKAAAANFPIPRTYTLGLKLTF,21.699704462300236
|
| 44 |
+
MAGRKILRDPYIIKLLELTEHNPGKRVTARCTSEGILTVPPDLICCLLIQLPIDSIDHHSFILNLQCKDDYQLILKNGSVLHSSCKYTPGKPAEVKAEGGSISIAITKLQLSDSGLYSCQPPNHEPSHGQLNLTVYKQTGFISVSDTGVGIVRVRAYAERPDDLNVTLTCLVTGVFPHDVTVQWTKNNSPLSKDSSPAEEQQHEDGTFFLYSKLTVDKSRWERGDTYTCVVAHEALPNKITKTLDRSKCQGEGLAPL,21.725914279351123
|
| 45 |
+
MIMTMTLTMMMVMISNKTHWNSFQMNLMMTSLMILSLGGLPPLTGFLPKWIIITELMKNNNLITASMAMMALLNLFFYTRLIYSTSLMKLYPTNNQTKTKPKMMTHQMKLTALMTITMSSMTLPLAPQLITTELMAFAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTLIMMATSLPIIMKPMTPWWC,22.24920166064802
|
| 46 |
+
GVVKAAVSFCLAFCLVLCIAVTAVWFLSPTSDLDKKAVLPREYEQFKANQSSDQLRAFAAQYGLDATPAEGATDAMLAKGREIYVVNCSICHGSDARGASGLGVTLNPRPPSFTDPGFAAAHTDGEIFWVIRHGIRNSAMPAWKDKISEQDRWDLVHFLRTFKPESQKELTEAEIAALSVGEKVTMGQALFQEKCIVCHGANGQGNQTVGPVLNPSPRNFTSGVFKLRSTDQGELYAIRNGIRQHGMPPWGSQLKD,23.148315124172598
|
| 47 |
+
MRTHTGEKPFACDVCDKRFNQKAHLNRHKTIHTGERPFACDICNKKFSEAGHMKIHTRTHTGERPFSCDVCSKKFSQKIHLNTHMRIHTGDKPFVCSVCNKSFSRKGDLNKHMITHTGLKPYSCDICSKSFSLKYNLITHKRIHTGEKPFVCDVCGKAFTQKCNLNIHMLIHAGVKPYSCDLCNMSFTQKSSLNTHLRTHAGVKPYACDLCGKSFALRQTLSRHHKTHTGVKAFACDFCDKTFFANQHLKRHRLTHS,24.00786399231911
|
| 48 |
+
MRITKGFTLIELMIVVAIIGILAAFAVPAYNDYIARSQAAEGLTLADGLKVRISDHLEAGTCTADNTAVNGKTIGTEGTVGALPEGVSGDCKLSVAFTAGAAGKEITVKYDHKAGALTYQSATGKTISLVLPASLITKAGSWQGSVSWDYLKNLVPTNLRYAYVRSYMGPDYNPNNWPASGSTMPSDICWKSGDPNYTGTPGCTKNNSVAWGYPINPATCTFTPVADPTPTLAPVASVSLNKCYSAGTATLTATAA,24.29027038113483
|
| 49 |
+
MVGRVGGWIVSVDPDGRFGPKPYKRHRAGIKDALSYLYQLKCRLRIDPDTWREWASPLRESITLEECRYTMPSFAVQASFMTLYWSVCEALFGCRFVYGPFNPILGETYEAHVADSDDEGQKTRYFAEQVSHHPPISACHVDSEKFYLDGHTCIRSKLTGKAISVHHVGQSCLTFKRAGETYLIHMPNQYCRSILTVPWAEQETVHCPTENHSAILEFTKGGFSAKFTGRWSSVLHVISAPHAATAEEKYPVTQVD,24.583101326751542
|
| 50 |
+
GFHYFDITLAYFIPALLALLSSAWLIRAIRMDRADERAALTARIDELEQHNAALQARVDELERHVAMRTSELLETEQALAAERAALLDKGNHLASNFDTLKQRVAQLESERDALAADRDNLRGERDTLSGQVATLEAQRDEFARQLDAARQQAATAEERARQAEAAAASLRQRLDEALARVTELAGQNSELQAALARERQNNDALNARVRELEEQVARAQAGANQAQAARDRAQADAERLRQLEQQLAGANEAARRRIADLEDQLNRANRTIAEL,25.434296722653517
|
| 51 |
+
QDTVADETGFFETELTVGTKEDRYSTVFNYRRINRDLKEPQDVNVYYARYEWQVAEDWKLRPGIRLDHDDFFGLTSSPKAYLMYEHDNGDTYKLGVARAYKAPNLYQSNPNYILYSKGQGCYGSSSCYLQGNGDLKAETSVNKELGVEYHHDRFAAALNLFHNDYKDKIVAGISTGVSGNSEMTTANYMEGWMTSVKWDWQIADNWKTDTSISWSRNKPKTSSSLDYQLRPENTLNSTLTWQARENLDFGWRVVHYG,26.433123728733182
|
| 52 |
+
PPSECPPSPCGEKEYFDVCGQCCKKCKPMEGKISTACRKISDAVCDSGEWVEHPASDKCYACQKTCATRRPTQKACAAMRDCKCLDYFYRQLCVSCIPKCPRACDNQFCTAICNPGCVCPEGLFQDEFTGLCVPESECRTGCSNGQVYRECTSPCPSTCGNPNPRPSCSKTCFDGCACPEGMVLDDQNICVLPEQCGCTLYGRHYKPGETFTSDCGNPCEPTCENAYRTVVCTR,26.468925245048567
|
| 53 |
+
ENKYSLLYKNQTLFDEWGIKYQVKSRMIEKSLYSVVFNVNDKKYNIIMRLYDKETKRIYSKREIINYIKNNSSINYKIDLIENGEYYAIAMPYIKGCTLRQYINKHISEKDFINILQPLIETLKVLHDKGIYHRDLKPENILIEQDENLFMIDLGLAIDLTNAIPTIDYGTDGFMAPEQALGNKPTFASDIYSLGVIAIELLTLKNPFDSNISLSESNWISTLHKKDKPLSSVLSKLILKMLEPSPNDRPNIKDVLNSLNSLEVLQRGVN,27.369412815985804
|
| 54 |
+
EKKRKRDAVTWPPEKRQDAILFYLKNHNAPGMEFTEVAKAAGIHKSTVSRELKDPTFPPDASSRAGPGRPKKLSAKADELLNAWIKDTYVEGDLRREVTANILREKALEHGIIELSASTVWRILHKQLGYSSKKMSNRAIAADRRQVQEYRLEVIKAMHDNPYIYLDEIWINQNEAMNHVWFHDSETGLRSTMGLNKGSRGKRIIGVIDAEGFLHYEFKSTTDSTAAKTIVDFLEHNEGDNYLIVVDNAKYHSRL,29.60231093300791
|
| 55 |
+
MVLFRATLVLTLFCVQLALAQVGINTSTPKATLDITAKTTDGSKPEGLLIPRVDRQQAETIPANPQLTIYTDGKTGKGFFYLGTTTPAGTANILDISKNGYYFYNGTAWVALNSGTYGSGTSGTPSATTDKEIYTNSTDKKVGFYSPTGTLVGYNSLTTTDYNSLITSGVTPSYAIGTSNTAALSSFYTGSVSGTLVTTGLTPVIGAAATNIYTVLDGGTSSTITIGSGGTVTSVTPIGGVTSVSLPLSGVSAVSITGSGSTITMGSGGVVTSVTAPSSVSTISITPTSGSIT,29.77388588380658
|
| 56 |
+
LATLRQLWAGTFRRLWRAGDRDPDPAKVPLRARLVLMAALPVLALVLSAALTWQAASEQVRSATDRTLLGEVAEIGRTVSTAYGDVDTRLRGQLDGLARIPGVRSAAVVPLGAEGGTTVLGERTVPAADRSRWFSSLPLRSGSPDTVVSAPVLRGDRVLGSVQVVLDTDRVNALVSGLGWVLLLDWLAVTLLLWAAAMVLLRRQLRPLARMTAVAGAVAGGDLSRRVPDPGPDEVAQLGRAFNTMLDRIEQLLAGQRALLDDVSGELNARTVEL,30.911742604776983
|
| 57 |
+
LSSSCFPWSLGVSVMTFISLSLLSYGPDRPLCPLTPTLSSLQFLVGTWKMVEGSGMFQEFCNHSASQWTFTADGHMTSKAFYVQPQQGQQLRCEEMRLIAQKHHPDTHRCRSLGQPPDTPYHYEYRRDCQDPLTMQHYVTEVMSRRLILSRQKPWDPAPDHIPPGTKIRYVSSPWGPEFCEPVPTQGEAVTLHGTVTHHTLGPLWGEGNHTALTDGFPEGVSPDVFLSAWGPKGLEKLNSLAR,32.019316129846914
|
| 58 |
+
SPLQIVRDHFIREGRLIDPPEREFVPNDMPEYVLPSGERLPPIDVAKSRHRAVMPPPPSDYMAEYMAYADIMAPVTYYTRKDLGLGTKTILVAGAIGGLCGFLWFFMYVKGMGVLDALGITPFQIVRGDFSDTMSMANGFHMFFMITCGICFGGWATNWSRKAGFSDSMEASLMSAVVAYVLMVPMMMGATHTEMLANGHLMDLTHWTVAHLNPFHMMGFFAINVVAGLASIMVFALHLWYALTVRKTFDPEVELKTLKN,33.613951886997285
|
| 59 |
+
MAYRSLFTSESVSEGHPDKIADQISDAVLDAVIAADQASCGTAKAAVTTGLVTIGGESAMCWVMSDMIRTTLVDIGYSVTAVGDEGGFAPNIQSFHDALKVIGDTIVNTRKAQSDTNVQIGIDVCATSAKVLPTEYMGYEDRGASLIFSHRSGETEDSTIADFCVGVLAADIKQTLPPIVAELGKPARLRAMGQLAPLAEDAAFVGYDWNHTTGFPRFSAGSMSTADALAAADNTADAAAMANTALAEAAMAGDHATAARWSAAVEDLTAQAKAGTITTGKIAEAIRAACL,34.430385572117466
|
| 60 |
+
SPDVQIHPPKRDPDPWGIKGLSAFLLGGATLWGLAALAIHLAGLVPFPTVELGTADFHMTLPFMAAAAGGFLIAKHQPRDMFGIGMPEDRPLIATGAAVSFALVVIALVLYAVAPGTYTPRAIGLVGSLAVSAGILGVFGAVLGRLRPVRGIGLVPAAILEGIARQPEARGPVLVSMVAGFALGAVGLLAPHHFGLAFGFGAIGGLGAVALAGWTGALVGAPDISGPTAIAAKMQRFYLWATVLPVAAMVVALVAIATPHLNLGIGEGLLLGGMLAGPLCVAA,35.89969391101693
|
| 61 |
+
SAWNTNLNMDARSAWATYQRQNGEVIGWMPIVNYADTIHDRDFAQAQLIFSTQVSKLWWAEDLGVNAFVVTLSNDLYQLWLNSPDEKADLMKQININAYNINWGVDDGTYADFQVWNIARMLRNDPSTNGKRYFAYGSDAPLIAAYRDQGWETNTVRGYGEYVVLPKAAGTVDNEVAQAAVDNWYSGAIANRLGTMANTGAVVQTGTTDNGIYGYAMTDGKTLYFPRYNTKYYNTDQGGVAHEFGHHVDYAV,39.68959897551875
|
| 62 |
+
GEKWIMKFDGALNPSNISAVLAGGLIGLAVGLQATFFNVSTTSHVTGVLGGATVVGMATYYKWASPWAISAGTFFSLVLGTYLGSQLVKRLHVYKLPEPIAFFGGSFVMVWLWSWMTTYIYPASHALTPYASHLSYLCAMLLGALGGILGSLITPPLKDTFIASALGIIGGTGFAVSHLTMLNPTIPSTLYAIAYAATGIWGAITATRIARVLNLFEGALVCGAATVFYSFVKVVAPELLPVALASIVCAAGVLYVANLTKVV,45.78566418659657
|
| 63 |
+
LAPSPKVFFIDDTPIQWGFVIILLLLSSGGLFFDSKLAGIFTSLGIAVGLIGAALTTFADTRKGKVTPEQLDRVNSTLKTFFGWSLISGVLGLAIYAASLNIDGKLAFVDSLFYFTGTGLVTVGFGDIVPTTTAAKILVVVLIVGGIGFAGSMISTVASWIRSQQEKSELDKHTIRAHARNIVICHDDPRVSALCEYLQGYFLVDDKQSTYHVLPMYLDGNSLERRALRKKLFSNRVAKHFAREGSVRDLDAVRRANVAGARAVIVLSKADENID,47.50012378184719
|
| 64 |
+
GSTDLSTWQTYVQSTAATITSYYQDTASQAQKNQVLANVTQIINQLDSSTKTKAEVDSALTAINKIKAQIAGDAGGGSSTQATIQGVLDNLITKANNLLRQGQTISEVNALISDLNNLVTQAKGQARSDQESVYTKADSALSNLQNQLNQEREVGSNDRYISQTEKDNLIQNVNNYINNEYLWTDGTSNEGQRLTAAKNLISDTLTNDQKRAAQDAINQLIKDANDLLNQARDRAANQGVTQTEKDNAISNVKTVY,51.616257412346954
|
| 65 |
+
PVPVPVPVRRPSNTQLDSPGHLRTLLDRNHLPPPDTQLSPDNRLLQDNVPGSGRPLPERTRLSPDRRTLQDFPVHGRDLPEVHRDHGLPAPDHIPPGYGGFLTEAQRHKEWFHVSDTHMAPPDGTSYPIARFHVSAGRPGMPAPDRYFAALGGAQGMASHMHGSGMHSSHGMHGSMGMHGMGHGMFGGGAMGPVFFIVAALAIIIAIGVAVAAKAGGGEGA,52.07059943074766
|
| 66 |
+
APFAICRRCRRRRGLPVCARRRWRRRRGNIWCAVGSGGIWRPCCRCITRITCRLRVSAAWRICCAGCRGRTCCGSFWWSTTCGSRACTARWTPSPWRSTGRCTRGAWTRWWRCARSTATPSPATASTWRAGWRAATPCGSATSTTSSCRCSTAW,52.5490203150644
|
| 67 |
+
LVLFAPTFNLSDPEGTVFATLVAIATAVGGYVAIPISGIDSIAGGVVSGYAVAKAGQFTNALKTTAMGAAVGEILGEQLYFGGFGPLGIVAGLITAGAIHKWLVMNKVSVNIYDAIGGRRFEVVLAVMIVTGLIMSFFVPAPVGGFIDNAVSKVGQSAAIGFITDSGSTLLANGINPVIAIGFLFAMAGVLIGGFKVASAQMGTLMGAVAFITGAFGFAVHFGANMVGVGALIAGRFTGRAFSDKVNETWPAVTDAVNNRYRTMVNVLAGSVVGAIFGL,52.5972401908542
|
| 68 |
+
MAAIHPPNLSFLPKPSAIHLFAFWTGSMGCLCPLLLGSQPILWASTALLLGTLQLGMGLKASLYPSPFPSHHLFQTTNYFLSFFLPFSLLSYASFFPSTLFPPGAIVTLTGLTLHGVSAYTLGGATGAWINYNTNHIFTAENGTVTGIKEMDTYSMVTANRFWSQVFQILFWCTNALALATHFSRIWTISRAEKHQLHVEEEHHHTAAEMVLAENIGIKTLTDYDDDDKMISYYRKDGVHHMHVEDAELALKLQEEEDLKNKKN,52.98995197391265
|
| 69 |
+
GLFAVIVEIPFSLRLPSVVQAKGSFSDSLFSHSAYPVVQPYFSPETLFGFDILLPITGEPVSRGLYTGHQPLLVVGVETSFLLTVETRLTGEVYSKGGRNSWDIQNCNFFGSDGKKYSLPAFERKKVKDVKCVDQDGVFSEVILERTHTSFTLKYTLPDSEWLIHSRSQLVKREDSNMGRPRKHLSSLVARNSSFEATYQRVSEKETEVSVQFGFSVGWKVIYLFLVKHFPFVFHWISNVLFYLLLNTLFAYIPDFSTFDCLAFLVTL,53.005620188296234
|
| 70 |
+
LTPRQRMWYGILSTAVFLLASEGSFFAISLTALVSYAYYQSILAQTQPAAAPAISAGFAFMLGVVIFGWVVLGVIQALINAISEWIRALVINIYSRTVFAPYVRALSHTPEGVRVINLQSSQLAGLFVNEFVKGFVDGLALIASLLVSLLISLWMGGVLGLIVFLYFCFRVMRQVGENMGRLREAQGQMYEQTLGLVEGLKDIRAARREEVYKGRIESLFGELAGMEVAGAKVQAVSTLMMRVVTQVAYLCMLWVGAYGVFHGDLS,56.40990415587325
|
| 71 |
+
MNINQLVLKAREENKQHENFQQGRLNLRYQEISKIEYLNRCRKLAINGNRIQRINDLQFFYHLTYLDLSNNLITSIENLHCLPLLRNLNLQKNLIGHITGLETLVRLEYLNLSHNQISKLENLECLVNLERLDVSHNHLTKIEGVCFLKSNILKELNLESNLLQELKFCEHLDYVTISNNNISSFSQVCYLLEHMPRLKYLSFTGNPYEQKLKQYRMVVFSKLQYLDGFVITEEELCRGSEVVDWIDSGSEFQRFRYCVINFLKDENNRT,58.18325968813114
|
| 72 |
+
GWVRQLPVYKRFAPFLSKFTLVTSLIAVGAGSGATYIQNLRKPRVRDKIVVHTVPLTPEMSGGKRFSVAPPSGIPHASHRMIPIERQREEDAXRERALRKKMLRRTAMLASGAFCLVLFVALGATIGTLRSEGVLKKDEFIPRPAIVGADGKAYDMDHPYAPPVKYQVQWEPKMGEKYYFHDYAKHHPNDNPENPYNKVAARA,60.55516244953947
|
| 73 |
+
MSASLFQTQGNYLVAAAISLSGLFMLVGLLAGSPRRPTYRWLLASVTLFCVAVSYFFMLSATTLEQGLVVKTNTGERALVDAVNGSVQYADGHYEIEATLRNLGSQPVRVEISRLQVVGEKMFGDIQSRTVEVGPNETRQVKFLLNRVLTSSANFRDRVLFVITDAQGNRQFIEVPVAYQYAQITGLLIALAWLAVIVIGFPVAWRSRMRIASGNRPVASGPQIAYLTALLFAATWTLVLMIAGTQIIGSQAGL,60.895889009456674
|
| 74 |
+
AQTTLNVADNSGARQLMCIRVIGASGNCSFVNQQKCTGICGCTRNATPIESEEIFDCIMKCGGQPGDCEVFQTHQCQQRMANNAHHYRRHWLSHTDFCVLPEHFHLDQDRHFHFQQHHHNWHHGHRHHHHHHDFHFGKFFETFAAPFASIFGGHIHGGFEKFSEMLANGFGGFDMFFGGFGGHGHFGGYEQEATSFKILASVVAAILLIAIAIPLGWLVKSQVSGIKVITTTTSGANQIILMKTVVAIATILAIAIAIPTG,62.10915337352313
|
| 75 |
+
GKRKAAVSRAAKLAATRAVPFARAAAIGPYAAIAIAGTKMAIDDHYKKDREKNREFVFNQWMSRKQLYDYKRKFWMFGPEKMKQLYEESGAKGAEAFFKENAETFKKIRDEYLVDLKNGTANPLTGEKVPLNPALPEDIRFPKYTPPPGLVPEGENPYYIPPPGYVREAERAGMPPPKKREMRMRPAGSEPGTTFGGAGYNPFAADPEYPHTAYAXXXXXXXXXXXXXXXXXXXXXXXXXXXXKRKAALSRTARLVATRAIPFGRSAAIGPYAMTAVAVAKMAYKDD,62.61105811999597
|
| 76 |
+
GWVRQLPAEERPVVLDRDEIELDPPVIGMGRNLAIMAVSVFLFMILTAWFALGEIQESEIARGTLRADRTLLDRTFIPITERGVFTTLDSRWALADVEPGELVWIAVDKHPATLQPGQSVQVYVRAVNDKPDNSVITPYRAVFAEIEREGFRWIVSVDQERFDQFRAHVTESLRLVNRGEALVGADGAPIPTIDLEATPGLAPDIPVTLRFEAEDIDWRILDQSQVQVARANVASADVSQPGWQEVELTAVAPWQAGKT,66.68297956107664
|
| 77 |
+
GWTLHPVSLYFSNHLGYVRLYQLWLTSVDKKSTNAFYHEVSDSQRKLVKRITRMELCFLGVMTLISLASLAIYAKFDQTSLPMLNKVFPRQNDIVTPVKFSLSASFFVFFLLLACFLSHAVNQVAKLASFCSALEDIQEFYVRIREELDSLRSYVENLEKRSAVSEEKLRLQASQTEMLLKRLPSFSSFCLLTLDRPILLSSHCPSLLPTVKGILNRGYKLSVYDPPPFQLGLCKDTHISDTQIYYNNGSRLEGATFHL,71.44388492712908
|
| 78 |
+
ALVPSDVSDQAEATLAFARQNLAKIEPEKIEIKQEPASGVNPADQPSQLDIYLTCTLKNEIRAPPGTTMPQLNFLRNQLEKNLLVPASQRDAYIQANPQQTLILDQPSPLTPEQKEDLAQLTITYGKNNLEVNTQRWPLPSLQVAMQTLESGEAHLEYRIHALPKAAGQPPVPVLKLVSKTTLPATAPVPNTASPTLSVRLPPRRTPPPPPIADEDLDDSPIVRDSRTLLKILLPTVLALVIALIAWRLWSSFTSHRIEAIATVPLPSATATPTP,72.99576740691371
|
| 79 |
+
MATPSFVSEPFAGLTPRQRQAIAAAMRSSLGYVQESVALNRMYSSALQGLVKPAGNAATIVASTGNVGTALSTLSGIQTAFSQYLKGKGSLVGSATNTLIAAQGKLISDLGSLITQEKTFMDSISKKLISDMDIAVSRTQTINSEVTKLTQERNALVAQLEAARKDADSAQKATITTELSNIIGTVAGAFLTAGFTAGIVLSIWELWAWGATLAAIAVGVGILLIIYATSRSSASNRKAELDAANSNLQNAQSTLKSDQQ,76.45022543295501
|
| 80 |
+
TPGLIDKLLGGGVQLPPGLLMALAVLAIQLGFIALIGKRVQFGAVARRYKIDAPETSLITAVLLGLAGYLAIFFAMRGMPWSATGELRWISGPHLNPLTFTAKFAMCALIVVPAAMRGWWAFSGPGADERSRHNARYAFWGSIVAVTALVVEGFLIMAPSLTEARFSPFYYARLLTYFVVTTALLVWTTVRESETPGRTLMGFALFSAAMVGLEMLSFTRFAVQFPTWWNVEVANLMYFGTMMIVLGLFFAMGGNIRWMVAA,77.96717901193621
|
| 81 |
+
MRYFKIRSTTLLIYLAAISVCALSICAPGFITPDEPAHFNYIRYLADHGQLPRIDPYAYASWGSTLSSLSYEFFAALFSWIPLETARSTVIFFAILNAVIIFATARRIAARYGSTGAFAAAAVFLLSPRVLAQSSFNNYDSLGIALMLAAWIFYEKVLTEKRLLPAVLSAVAVSIALLTNYQGYFIFAAVLLFSLPFPKLFFSRKNILFSAGVLSAAVIAAGLFAVFYKDLFLYSVFDVRLMSVFKMMTHQYPFSDAMTIYGGYFTVLF,81.23947975232642
|
| 82 |
+
LSPDLVAQLKAKTGVSYKEAKEALEATNGDIVAATIWLHEQARTSTFFFFFFFFNLVVGMGLFGPDRPLWLPGHALRLQPRHGLPGHRAAGLHRPRALPRLRLRPLPRLPRLRQGRPHLRHAPARPLRQPRLLPGDHHGRLLPRPRRLPLRLRLRHPRLLLRHLRLRPVLLPRRPLRALRLPRQPRRPLLPLLRRLLQGHPHRGARLPLRPVRRHLLHLRVRPLLRRLLGLLRARVRPQLLRPAPQRLHRLPRAQARPQGPRDGLLLPGQGPAQG,81.88453571822619
|
| 83 |
+
VGRINTAVTKVEGLKGVFDTASVFIIMRLILGALPGHDYFWHVATHKVLSTTWYELFSNVFLQVPSFITTFFMGAMLVQTMAQKSPEMQEFLKKGGIIFMTLAWFFFAPSGDYVVMRVISACTALVFIVTSMLEMNHVTPPPDTGLPRPIALCLRAFFYIGFLEWCVQQNFYAMCVLFFFMLGGVFTHYTALFVARYMKFFETFVPPIVHSGFSIAWMMWATQEGFITPMGQEPLLLTVLSVMVFFSVMSMC,86.07420147138896
|
| 84 |
+
MVYRITTIILVISTLTSFLIMFIPLTFRTFHYVMAFMVLLETSMFMWWYFDMSTSSYWNQERVHYEENGVPEFSLSFWSGLMFQMASVCYTYGKVYLSALRFGDMDHVQGQFIDLSNHFAMKTGLNPNDFKMRWPIQLMHNIINTMVEETEKLNAKQQREGITAEVEGEGRPQTFYEIQMLWHCITIILDELKRCTTVSNAIMTKETVDRMVHLCEKGIIPPDLEDFVFKLVFFTPPFEMILNFAI,92.19736060379442
|
| 85 |
+
MFSKLSLDAVPFARAPQWQRHLLRVACLISLFSLAYLAIVIAADTTNSIFTVGIGILLAAGVWFYWRDAVREELSHNPLGTRAAGIILGSGLVMLGLQLSAHLTGTWGYVTPTTFRWLAIMALAWPAAFLALRLTRDEEPVSEAMDNFDRAMAIMLVVSLVLWTFSPLLRGAVQHLHWLLFADYCFVVVDVVAVVMIYHMVRFLLAPLRETHPDAAQAIARKADAMVLWLFLWALYPIAYLVPAFVWGFHFPEGSIW,94.53532369154783
|
| 86 |
+
MVIVAIDRAIKATTILISPLIVIDLISAFIIGFRYQLVHDVIARICFAMVFVYYFALFFEIYYSRHFQGYQSALIKRCFLTLVPWLIYGPLLLLYRPVGDWYFPMTLLAFTIFALLAKRFVIEEETRDVMLEKERRMHFFAMVLFVGAMAIAFALSHFGVLEAFMPYRAFWMRGVTLIYFTSFYLVLLHHYGLREEIAFHKRGEVKPYPAYLAYTVINLTAWAVFFFFTHYAPTSAFARWWAWANFICIPFYAIG,95.44574656037318
|
| 87 |
+
MLLVFFAVMAGLLYGWWLRGSPKHARVTLGFYFITFFLLALLVWTHLGPSQAWSGFSVTLNRFYFWYLIITANAGAVLSAFGLVHRKTYVPEAERKRISLQFDAVFLILWLASALVCTFVMCEYLRWGWTGTDTLFGNHYLTPVLGPLLFWEWVTGLGLVVFAVLCWIYVRKFHYHDNLTARFAYSLLFVAPLIYLWMWVAGHPYQLAWTQDTAWLQSMGYWNGYPFMNPAHMIAFLGAGALFSLAMVAHGFRSERDGY,99.45883530953378
|
| 88 |
+
FGKDVVPVAATMVPFFGAIGFVLALRQPHFYPPAILIHGFIAAHFIGLYGENDFGEDFVPYFVAGLFVFWGFVAFILNVYFPPTPQNKRTLREEKYHEQVSVLTQAAIDGQEPQEIEVALGQVQANFDTAKSALEADRLIANQKLRAAVSTAATLIVMTAVIVGVHSQYDLLGLVLAMAAAISTLAGLYVFVGVSRAVLTFFTLRRGKTDEFLADADNFLKRNPVPVAALSALAKGHRDQAVAAAQSAIDNINPNPTTSSSSSTSASSSSSWAFDPLG,100.05271078832499
|
| 89 |
+
MRCGGTMPSTRSTTTGRCTRGWRRRICGTGTATSWTRPTGCTSRSRTSTTSWAGSPTCRTSRTGSSTPWTCPCTCTWRTRSSRSSCRTSPSWTSRRSTRSSRCTKTCWWTRSSRTTWCISGRCSWRSGSTCTRALSWSTWGRCATRRPTWSARSTSPTRRWSSACTARSGRRWCSTTPTSCTWGSCTPARTTGAWKPCSGSTSSPTPTSRPQARCSSATWSRACPTWRSTSSTRRRTSSAATPSSGPW,101.66820225573242
|
| 90 |
+
RIHDTILPFLMLGVGAFLSGIATLIEKSPNIMKCLPLLLTIGCCIPFLGWVSPIVLPFFSMKTQTTLSDGAIYGNSSISRVYENGIVEETQYVCGLNIFTSRIEVDGDFLFPKYYAPTNDTELQYVTEIPTSAHGTNPAELNATRKNLLNTLGPRYTLVLTDTDGVVRDYVVGNIPQGSPSPNLRYKGLRLELAVDQLPAYTISPPDGTSAFTFINKHWLIDIPTTLISETMVRKLVKAAGPLGPAYIVITEQSPNPIVATAGQAR,103.31987729217545
|
| 91 |
+
CRFGTCTVQKLAHQIYQFTDKDKDNVAPRSKISPQGYVNPNNEPTSYISPGHLRTKKSNMIPAKEVTRIDPNIVPNPNVQYPNLPAPYMYSGRAKRRRNLGLLMGRPNENPDNQHEMQDGYESAAYSNSYKGTYGKLTRWTSRWINNHYIDIERKVHFKDGRIFKTRAESSRINPKIGDFKTTKYITRGEKEALGFKIGGRLLLRPSSKLEKNFTVTETRTIRNGYTTTISRTIRWDDLEKCPLGNCAVGDLVTIDVTD,103.52201921223038
|
| 92 |
+
MWWWRLLVVALLRIGLALEDPARNPCSRVFFEGLTGCQQKVLRAVYPDPSRCLKACSEMKEAANSWGTRYAVATSVLGLEWLAYSWIQDKVACRCRGLSIPPTQKPSLFEKLLHSPLLLQGLQRAAEPVLGFFTQATQALKEAVWSALQWLGGQAGHILAFSRHFAFCLMAFSTLTLLSVCCNWWAIRRRFHQLESVTEQLLRCQQYVLQFRAVSRRHYISWALQLYFAHAFILRACAQLVSVLTTVSNMVSDSFSL,113.06700569292313
|
| 93 |
+
DPLSIILGILAGLFLIILIVLYFCGPYCTCIKRTGCCGNRWCYRWRCCCRRRRWRRRCCRWRTCYRYRWYSTRVRKCVKVPVVKTYKYRSKCGTCYGYVTRTRKVCCSYSSSSKKVCYTACRKKVYKTRYTYKVKVRNCKPCTKYRTKTVCSKCSYKTKIRTRTYKVRVAKCPRKSYKVVTMCKKKPSYRTCSRTSRRSRKVCLTCGSKAYRTKRTIKVPVKKTCSRKVSYKVCARTSHRTH,118.16791808043538
|
| 94 |
+
MQTYNNPEVTYFDRSQTDVEYGWWSGNSAWKNEQWLVMTKEEAKEFFRRSWIKLLDAFLPTTYIIVRWYQMYNYGCPLWCEKNDGKAHCKDWDYHPTCGKGPWWANNPLPTVKGQCEVYTTHRAGSSKECRSYYDLTSAQKAALQSSDCKATTGVYPFYSQAGTCRLNADYPLEKIPEGICNVHLNHKTRASHWGDPDIPTSQIWYFAAYDQAEKEWRTLSGTLEHTWVALSHEDYQRLVDIESKVPWSVSP,119.47290840438525
|
| 95 |
+
VQHFTGYVEDGRGIFYSLPMTNKGLDRIMLCIAVIVAFGMLLCPLASLYFSSEPVLVREDIFSALRTLSIFAAVWQIADVLRRTYVVVSKNPLLLLGLALELTFYTVYFGLDKLYPYPLAVFLPLQFGGILLRHITSIYLQAVSSRNESIIAQLRREREREARRTRERNIAQKRRIDAALWRQMSAVVIFLLLWLIAFTSSALALYNNLLASQQLSIAGLTPSQAASLNTGELLLRVIYGLVISCSAVLFTMTVEARDKIMHD,126.04829832003558
|
| 96 |
+
MLARFFRRQRTASFSLATVVALSALALHTSGMHRPALYASATAVHAITLITLGVMYARSMAPRAEGADHDLRHFMTAYLVTALAWPLAMVLTFALTHFLPGTDPLVPDRTLRLVTLINLAFMASATAHFAFALHTGWNVPRAIAATIVVFALVALTAWLIEIFTGGSTHWSFWAVLIASAAIWLGLALYFRRHAAAIAAFERRHNAQILARFIAAQDETHEQAGGGARSLAHNLDSPLTAAALFADDLSGKVDAPVREHLRLIRRSAND,146.856677830673
|
| 97 |
+
GWFDAMLASVSEFAPIFVVLIIFIVRVYKPFGSEWIVHVLHIADKRPGLNALIHRLLPRTAVHVPQAVKDKYVFLNSEHCIQFGCRHDPVPQYLELLSKGTYSLKVDVWFKHDRAREFYNMLLNEAQTASENHHASKIRHWTDEKMSELFAMAKKAYIPLNETREHSHDKAHSHNHAHSHNHSHDHGHSHEHTHGHDGVHAHDHSNTSDAHVHSHKHLHLHVHVHDKKNIIRRMNSALRKMKAAGVNTHEVAHVHDETTP,166.7331315912073
|
| 98 |
+
GAPPITGEALEKDISRREKGVGGFLSRLFSLVNSTNPFAVGVEGEKLLEEIENIRDSMGHQTAAQLYFAQQQSLLQAEYARWQESHNATLQATKDHIFNAQLGHILMLAGAVVCYTAGLRAWAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXFLAITNTAINTYFGGSITQSLEQVSAAVEHAKKVGLLSQETVGQIEAAYNSATGKALSYNNIADTEAAFQEFSRNHVLRSQLDKENADAAWTRVQSEGASLRAYLDHASRAITSALNGAIFAFGGP,169.6224292611664
|
| 99 |
+
MSAMSVQIDRLQDQLNHLGELVAQNSKVIAALTQRIQVLERIVTERLRIPYIPLEKRTALMFPLHDEEKQSEITLFINAELHLGTAPGKHKVYYTTVEEMIQHFKEGKCLPQNWPQSDNPFWPCYRELADEMKSNTAAYNNFIKMQDEMRKLCIMLSSGVYHISRNPGGAKDLYTDPKLFIQIYTNECLRNAIPAEILDQMIIDLYANYTEADIHNMAEVRASRNFNHLEKQYMHKLLKLKKTLPFAIQASMDVVL,187.47480920419864
|
| 100 |
+
AAGVAAWLPFARAAAIGWMPVASTAPRAMTATASWPIWMIWAMAMPAMTGRRWRRSRWATAPMARSSGAAPMMARPVMTAMPVAAATIRPSALRSISATASATAGSASAMTRSAAMSPIRTIWRRCSRARRAVASGAWSAISATSSARRRTTSTARALSCAAMASGLPSASMKAAAGGGSSNTMPRCSGSSASRTRACGTASPSCCRAATAASASARAARARSCRASSRARSAIWRAMSVRSRTWSRSARLRRSTSRPSMRSATAA,216.8557544805602
|
| 101 |
+
EKKEVCSVFLTNRVPLDDKRFRRERVYLPGESPFIDPDLFLSREHPLRAQVRGTIIEWLRASIYGIYPYPEQRDPNLWCTERFKQEVMPDGHCEPTLGFVPLTFSTCLTRDMIAASSYNWRKTMEVPGAKMLLHVGPLGTGGHYDYAFTFLQPDNTFAYVKGNKLVRQTKIWNDAGFQLVTEEATLLDAQEYFGAANKLGVCIFCGNCVEYCPTNCLSMCEEVLPRGNALQESWTILERVFMPEDPEHENFKYRRLRTSDGAKFINYTS,520.388790480398
|
benchmarks/Generation/ProtGPT2/protgpt2_test.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
benchmarks/Generation/ProtGPT2/protgpt2_train.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
benchmarks/Generation/ProtGPT2/run_clm.py
ADDED
|
@@ -0,0 +1,657 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding=utf-8
|
| 3 |
+
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
"""
|
| 17 |
+
Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
|
| 18 |
+
|
| 19 |
+
Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
|
| 20 |
+
https://huggingface.co/models?filter=text-generation
|
| 21 |
+
"""
|
| 22 |
+
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
|
| 23 |
+
|
| 24 |
+
import logging
|
| 25 |
+
import math
|
| 26 |
+
import os
|
| 27 |
+
import sys
|
| 28 |
+
from dataclasses import dataclass, field
|
| 29 |
+
from itertools import chain
|
| 30 |
+
from typing import Optional
|
| 31 |
+
|
| 32 |
+
import datasets
|
| 33 |
+
import evaluate
|
| 34 |
+
import torch
|
| 35 |
+
from datasets import load_dataset
|
| 36 |
+
|
| 37 |
+
import transformers
|
| 38 |
+
from transformers import (
|
| 39 |
+
CONFIG_MAPPING,
|
| 40 |
+
MODEL_FOR_CAUSAL_LM_MAPPING,
|
| 41 |
+
AutoConfig,
|
| 42 |
+
AutoModelForCausalLM,
|
| 43 |
+
AutoTokenizer,
|
| 44 |
+
HfArgumentParser,
|
| 45 |
+
Trainer,
|
| 46 |
+
TrainingArguments,
|
| 47 |
+
default_data_collator,
|
| 48 |
+
is_torch_xla_available,
|
| 49 |
+
set_seed,
|
| 50 |
+
)
|
| 51 |
+
from transformers.testing_utils import CaptureLogger
|
| 52 |
+
from transformers.trainer_utils import get_last_checkpoint
|
| 53 |
+
from transformers.utils import check_min_version, send_example_telemetry
|
| 54 |
+
from transformers.utils.versions import require_version
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
| 58 |
+
check_min_version("4.45.0.dev0")
|
| 59 |
+
|
| 60 |
+
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
| 61 |
+
|
| 62 |
+
logger = logging.getLogger(__name__)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys())
|
| 66 |
+
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@dataclass
|
| 70 |
+
class ModelArguments:
|
| 71 |
+
"""
|
| 72 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
model_name_or_path: Optional[str] = field(
|
| 76 |
+
default=None,
|
| 77 |
+
metadata={
|
| 78 |
+
"help": (
|
| 79 |
+
"The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
|
| 80 |
+
)
|
| 81 |
+
},
|
| 82 |
+
)
|
| 83 |
+
model_type: Optional[str] = field(
|
| 84 |
+
default=None,
|
| 85 |
+
metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
|
| 86 |
+
)
|
| 87 |
+
config_overrides: Optional[str] = field(
|
| 88 |
+
default=None,
|
| 89 |
+
metadata={
|
| 90 |
+
"help": (
|
| 91 |
+
"Override some existing default config settings when a model is trained from scratch. Example: "
|
| 92 |
+
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
|
| 93 |
+
)
|
| 94 |
+
},
|
| 95 |
+
)
|
| 96 |
+
config_name: Optional[str] = field(
|
| 97 |
+
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
| 98 |
+
)
|
| 99 |
+
tokenizer_name: Optional[str] = field(
|
| 100 |
+
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
|
| 101 |
+
)
|
| 102 |
+
cache_dir: Optional[str] = field(
|
| 103 |
+
default=None,
|
| 104 |
+
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
|
| 105 |
+
)
|
| 106 |
+
use_fast_tokenizer: bool = field(
|
| 107 |
+
default=True,
|
| 108 |
+
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
|
| 109 |
+
)
|
| 110 |
+
model_revision: str = field(
|
| 111 |
+
default="main",
|
| 112 |
+
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
| 113 |
+
)
|
| 114 |
+
token: str = field(
|
| 115 |
+
default=None,
|
| 116 |
+
metadata={
|
| 117 |
+
"help": (
|
| 118 |
+
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
|
| 119 |
+
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
|
| 120 |
+
)
|
| 121 |
+
},
|
| 122 |
+
)
|
| 123 |
+
trust_remote_code: bool = field(
|
| 124 |
+
default=False,
|
| 125 |
+
metadata={
|
| 126 |
+
"help": (
|
| 127 |
+
"Whether to trust the execution of code from datasets/models defined on the Hub."
|
| 128 |
+
" This option should only be set to `True` for repositories you trust and in which you have read the"
|
| 129 |
+
" code, as it will execute code present on the Hub on your local machine."
|
| 130 |
+
)
|
| 131 |
+
},
|
| 132 |
+
)
|
| 133 |
+
torch_dtype: Optional[str] = field(
|
| 134 |
+
default=None,
|
| 135 |
+
metadata={
|
| 136 |
+
"help": (
|
| 137 |
+
"Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
|
| 138 |
+
"dtype will be automatically derived from the model's weights."
|
| 139 |
+
),
|
| 140 |
+
"choices": ["auto", "bfloat16", "float16", "float32"],
|
| 141 |
+
},
|
| 142 |
+
)
|
| 143 |
+
low_cpu_mem_usage: bool = field(
|
| 144 |
+
default=False,
|
| 145 |
+
metadata={
|
| 146 |
+
"help": (
|
| 147 |
+
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
| 148 |
+
"set True will benefit LLM loading time and RAM consumption."
|
| 149 |
+
)
|
| 150 |
+
},
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
def __post_init__(self):
|
| 154 |
+
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
| 155 |
+
raise ValueError(
|
| 156 |
+
"--config_overrides can't be used in combination with --config_name or --model_name_or_path"
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@dataclass
|
| 161 |
+
class DataTrainingArguments:
|
| 162 |
+
"""
|
| 163 |
+
Arguments pertaining to what data we are going to input our model for training and eval.
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
dataset_name: Optional[str] = field(
|
| 167 |
+
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
|
| 168 |
+
)
|
| 169 |
+
dataset_config_name: Optional[str] = field(
|
| 170 |
+
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
|
| 171 |
+
)
|
| 172 |
+
train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
|
| 173 |
+
validation_file: Optional[str] = field(
|
| 174 |
+
default=None,
|
| 175 |
+
metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
|
| 176 |
+
)
|
| 177 |
+
max_train_samples: Optional[int] = field(
|
| 178 |
+
default=None,
|
| 179 |
+
metadata={
|
| 180 |
+
"help": (
|
| 181 |
+
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
| 182 |
+
"value if set."
|
| 183 |
+
)
|
| 184 |
+
},
|
| 185 |
+
)
|
| 186 |
+
max_eval_samples: Optional[int] = field(
|
| 187 |
+
default=None,
|
| 188 |
+
metadata={
|
| 189 |
+
"help": (
|
| 190 |
+
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
| 191 |
+
"value if set."
|
| 192 |
+
)
|
| 193 |
+
},
|
| 194 |
+
)
|
| 195 |
+
streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
|
| 196 |
+
block_size: Optional[int] = field(
|
| 197 |
+
default=None,
|
| 198 |
+
metadata={
|
| 199 |
+
"help": (
|
| 200 |
+
"Optional input sequence length after tokenization. "
|
| 201 |
+
"The training dataset will be truncated in block of this size for training. "
|
| 202 |
+
"Default to the model max input length for single sentence inputs (take into account special tokens)."
|
| 203 |
+
)
|
| 204 |
+
},
|
| 205 |
+
)
|
| 206 |
+
overwrite_cache: bool = field(
|
| 207 |
+
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
| 208 |
+
)
|
| 209 |
+
validation_split_percentage: Optional[int] = field(
|
| 210 |
+
default=5,
|
| 211 |
+
metadata={
|
| 212 |
+
"help": "The percentage of the train set used as validation set in case there's no validation split"
|
| 213 |
+
},
|
| 214 |
+
)
|
| 215 |
+
preprocessing_num_workers: Optional[int] = field(
|
| 216 |
+
default=None,
|
| 217 |
+
metadata={"help": "The number of processes to use for the preprocessing."},
|
| 218 |
+
)
|
| 219 |
+
keep_linebreaks: bool = field(
|
| 220 |
+
default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
def __post_init__(self):
|
| 224 |
+
if self.streaming:
|
| 225 |
+
require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`")
|
| 226 |
+
|
| 227 |
+
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
|
| 228 |
+
raise ValueError("Need either a dataset name or a training/validation file.")
|
| 229 |
+
else:
|
| 230 |
+
if self.train_file is not None:
|
| 231 |
+
extension = self.train_file.split(".")[-1]
|
| 232 |
+
assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
|
| 233 |
+
if self.validation_file is not None:
|
| 234 |
+
extension = self.validation_file.split(".")[-1]
|
| 235 |
+
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def main():
|
| 239 |
+
# See all possible arguments in src/transformers/training_args.py
|
| 240 |
+
# or by passing the --help flag to this script.
|
| 241 |
+
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
| 242 |
+
|
| 243 |
+
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
|
| 244 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
| 245 |
+
# If we pass only one argument to the script and it's the path to a json file,
|
| 246 |
+
# let's parse it to get our arguments.
|
| 247 |
+
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
|
| 248 |
+
else:
|
| 249 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
| 250 |
+
|
| 251 |
+
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
| 252 |
+
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
| 253 |
+
send_example_telemetry("run_clm", model_args, data_args)
|
| 254 |
+
|
| 255 |
+
# Setup logging
|
| 256 |
+
logging.basicConfig(
|
| 257 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
| 258 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
| 259 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
if training_args.should_log:
|
| 263 |
+
# The default of training_args.log_level is passive, so we set log level at info here to have that default.
|
| 264 |
+
transformers.utils.logging.set_verbosity_info()
|
| 265 |
+
|
| 266 |
+
log_level = training_args.get_process_log_level()
|
| 267 |
+
logger.setLevel(log_level)
|
| 268 |
+
datasets.utils.logging.set_verbosity(log_level)
|
| 269 |
+
transformers.utils.logging.set_verbosity(log_level)
|
| 270 |
+
transformers.utils.logging.enable_default_handler()
|
| 271 |
+
transformers.utils.logging.enable_explicit_format()
|
| 272 |
+
|
| 273 |
+
# Log on each process the small summary:
|
| 274 |
+
logger.warning(
|
| 275 |
+
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
|
| 276 |
+
+ f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
|
| 277 |
+
)
|
| 278 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
| 279 |
+
|
| 280 |
+
# Detecting last checkpoint.
|
| 281 |
+
last_checkpoint = None
|
| 282 |
+
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
|
| 283 |
+
last_checkpoint = get_last_checkpoint(training_args.output_dir)
|
| 284 |
+
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
| 285 |
+
raise ValueError(
|
| 286 |
+
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
| 287 |
+
"Use --overwrite_output_dir to overcome."
|
| 288 |
+
)
|
| 289 |
+
elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
|
| 290 |
+
logger.info(
|
| 291 |
+
f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
|
| 292 |
+
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
# Set seed before initializing model.
|
| 296 |
+
set_seed(training_args.seed)
|
| 297 |
+
|
| 298 |
+
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
| 299 |
+
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
| 300 |
+
# (the dataset will be downloaded automatically from the datasets Hub).
|
| 301 |
+
#
|
| 302 |
+
# For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
|
| 303 |
+
# 'text' is found. You can easily tweak this behavior (see below).
|
| 304 |
+
#
|
| 305 |
+
# In distributed training, the load_dataset function guarantee that only one local process can concurrently
|
| 306 |
+
# download the dataset.
|
| 307 |
+
if data_args.dataset_name is not None:
|
| 308 |
+
# Downloading and loading a dataset from the hub.
|
| 309 |
+
raw_datasets = load_dataset(
|
| 310 |
+
data_args.dataset_name,
|
| 311 |
+
data_args.dataset_config_name,
|
| 312 |
+
cache_dir=model_args.cache_dir,
|
| 313 |
+
token=model_args.token,
|
| 314 |
+
streaming=data_args.streaming,
|
| 315 |
+
trust_remote_code=model_args.trust_remote_code,
|
| 316 |
+
)
|
| 317 |
+
if "validation" not in raw_datasets.keys():
|
| 318 |
+
raw_datasets["validation"] = load_dataset(
|
| 319 |
+
data_args.dataset_name,
|
| 320 |
+
data_args.dataset_config_name,
|
| 321 |
+
split=f"train[:{data_args.validation_split_percentage}%]",
|
| 322 |
+
cache_dir=model_args.cache_dir,
|
| 323 |
+
token=model_args.token,
|
| 324 |
+
streaming=data_args.streaming,
|
| 325 |
+
trust_remote_code=model_args.trust_remote_code,
|
| 326 |
+
)
|
| 327 |
+
raw_datasets["train"] = load_dataset(
|
| 328 |
+
data_args.dataset_name,
|
| 329 |
+
data_args.dataset_config_name,
|
| 330 |
+
split=f"train[{data_args.validation_split_percentage}%:]",
|
| 331 |
+
cache_dir=model_args.cache_dir,
|
| 332 |
+
token=model_args.token,
|
| 333 |
+
streaming=data_args.streaming,
|
| 334 |
+
trust_remote_code=model_args.trust_remote_code,
|
| 335 |
+
)
|
| 336 |
+
else:
|
| 337 |
+
data_files = {}
|
| 338 |
+
dataset_args = {}
|
| 339 |
+
if data_args.train_file is not None:
|
| 340 |
+
data_files["train"] = data_args.train_file
|
| 341 |
+
if data_args.validation_file is not None:
|
| 342 |
+
data_files["validation"] = data_args.validation_file
|
| 343 |
+
extension = (
|
| 344 |
+
data_args.train_file.split(".")[-1]
|
| 345 |
+
if data_args.train_file is not None
|
| 346 |
+
else data_args.validation_file.split(".")[-1]
|
| 347 |
+
)
|
| 348 |
+
if extension == "txt":
|
| 349 |
+
extension = "text"
|
| 350 |
+
dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
|
| 351 |
+
raw_datasets = load_dataset(
|
| 352 |
+
extension,
|
| 353 |
+
data_files=data_files,
|
| 354 |
+
cache_dir=model_args.cache_dir,
|
| 355 |
+
token=model_args.token,
|
| 356 |
+
**dataset_args,
|
| 357 |
+
)
|
| 358 |
+
# If no validation data is there, validation_split_percentage will be used to divide the dataset.
|
| 359 |
+
if "validation" not in raw_datasets.keys():
|
| 360 |
+
raw_datasets["validation"] = load_dataset(
|
| 361 |
+
extension,
|
| 362 |
+
data_files=data_files,
|
| 363 |
+
split=f"train[:{data_args.validation_split_percentage}%]",
|
| 364 |
+
cache_dir=model_args.cache_dir,
|
| 365 |
+
token=model_args.token,
|
| 366 |
+
**dataset_args,
|
| 367 |
+
)
|
| 368 |
+
raw_datasets["train"] = load_dataset(
|
| 369 |
+
extension,
|
| 370 |
+
data_files=data_files,
|
| 371 |
+
split=f"train[{data_args.validation_split_percentage}%:]",
|
| 372 |
+
cache_dir=model_args.cache_dir,
|
| 373 |
+
token=model_args.token,
|
| 374 |
+
**dataset_args,
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
| 378 |
+
# https://huggingface.co/docs/datasets/loading_datasets.
|
| 379 |
+
|
| 380 |
+
# Load pretrained model and tokenizer
|
| 381 |
+
#
|
| 382 |
+
# Distributed training:
|
| 383 |
+
# The .from_pretrained methods guarantee that only one local process can concurrently
|
| 384 |
+
# download model & vocab.
|
| 385 |
+
|
| 386 |
+
config_kwargs = {
|
| 387 |
+
"cache_dir": model_args.cache_dir,
|
| 388 |
+
"revision": model_args.model_revision,
|
| 389 |
+
"token": model_args.token,
|
| 390 |
+
"trust_remote_code": model_args.trust_remote_code,
|
| 391 |
+
}
|
| 392 |
+
if model_args.config_name:
|
| 393 |
+
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
|
| 394 |
+
elif model_args.model_name_or_path:
|
| 395 |
+
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
|
| 396 |
+
else:
|
| 397 |
+
config = CONFIG_MAPPING[model_args.model_type]()
|
| 398 |
+
logger.warning("You are instantiating a new config instance from scratch.")
|
| 399 |
+
if model_args.config_overrides is not None:
|
| 400 |
+
logger.info(f"Overriding config: {model_args.config_overrides}")
|
| 401 |
+
config.update_from_string(model_args.config_overrides)
|
| 402 |
+
logger.info(f"New config: {config}")
|
| 403 |
+
|
| 404 |
+
tokenizer_kwargs = {
|
| 405 |
+
"cache_dir": model_args.cache_dir,
|
| 406 |
+
"use_fast": model_args.use_fast_tokenizer,
|
| 407 |
+
"revision": model_args.model_revision,
|
| 408 |
+
"token": model_args.token,
|
| 409 |
+
"trust_remote_code": model_args.trust_remote_code,
|
| 410 |
+
}
|
| 411 |
+
if model_args.tokenizer_name:
|
| 412 |
+
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
|
| 413 |
+
elif model_args.model_name_or_path:
|
| 414 |
+
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
|
| 415 |
+
else:
|
| 416 |
+
raise ValueError(
|
| 417 |
+
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
| 418 |
+
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
if model_args.model_name_or_path:
|
| 422 |
+
torch_dtype = (
|
| 423 |
+
model_args.torch_dtype
|
| 424 |
+
if model_args.torch_dtype in ["auto", None]
|
| 425 |
+
else getattr(torch, model_args.torch_dtype)
|
| 426 |
+
)
|
| 427 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 428 |
+
model_args.model_name_or_path,
|
| 429 |
+
from_tf=bool(".ckpt" in model_args.model_name_or_path),
|
| 430 |
+
config=config,
|
| 431 |
+
cache_dir=model_args.cache_dir,
|
| 432 |
+
revision=model_args.model_revision,
|
| 433 |
+
token=model_args.token,
|
| 434 |
+
trust_remote_code=model_args.trust_remote_code,
|
| 435 |
+
torch_dtype=torch_dtype,
|
| 436 |
+
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
|
| 437 |
+
)
|
| 438 |
+
else:
|
| 439 |
+
model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
|
| 440 |
+
n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
|
| 441 |
+
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
|
| 442 |
+
|
| 443 |
+
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
|
| 444 |
+
# on a small vocab and want a smaller embedding size, remove this test.
|
| 445 |
+
embedding_size = model.get_input_embeddings().weight.shape[0]
|
| 446 |
+
if len(tokenizer) > embedding_size:
|
| 447 |
+
model.resize_token_embeddings(len(tokenizer))
|
| 448 |
+
|
| 449 |
+
# Preprocessing the datasets.
|
| 450 |
+
# First we tokenize all the texts.
|
| 451 |
+
if training_args.do_train:
|
| 452 |
+
column_names = list(raw_datasets["train"].features)
|
| 453 |
+
else:
|
| 454 |
+
column_names = list(raw_datasets["validation"].features)
|
| 455 |
+
text_column_name = "text" if "text" in column_names else column_names[0]
|
| 456 |
+
|
| 457 |
+
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
|
| 458 |
+
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
|
| 459 |
+
|
| 460 |
+
def tokenize_function(examples):
|
| 461 |
+
with CaptureLogger(tok_logger) as cl:
|
| 462 |
+
output = tokenizer(examples[text_column_name])
|
| 463 |
+
# clm input could be much much longer than block_size
|
| 464 |
+
if "Token indices sequence length is longer than the" in cl.out:
|
| 465 |
+
tok_logger.warning(
|
| 466 |
+
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
|
| 467 |
+
" before being passed to the model."
|
| 468 |
+
)
|
| 469 |
+
return output
|
| 470 |
+
|
| 471 |
+
with training_args.main_process_first(desc="dataset map tokenization"):
|
| 472 |
+
if not data_args.streaming:
|
| 473 |
+
tokenized_datasets = raw_datasets.map(
|
| 474 |
+
tokenize_function,
|
| 475 |
+
batched=True,
|
| 476 |
+
num_proc=data_args.preprocessing_num_workers,
|
| 477 |
+
remove_columns=column_names,
|
| 478 |
+
load_from_cache_file=not data_args.overwrite_cache,
|
| 479 |
+
desc="Running tokenizer on dataset",
|
| 480 |
+
)
|
| 481 |
+
else:
|
| 482 |
+
tokenized_datasets = raw_datasets.map(
|
| 483 |
+
tokenize_function,
|
| 484 |
+
batched=True,
|
| 485 |
+
remove_columns=column_names,
|
| 486 |
+
)
|
| 487 |
+
if hasattr(config, "max_position_embeddings"):
|
| 488 |
+
max_pos_embeddings = config.max_position_embeddings
|
| 489 |
+
else:
|
| 490 |
+
# Define a default value if the attribute is missing in the config.
|
| 491 |
+
max_pos_embeddings = 1024
|
| 492 |
+
|
| 493 |
+
if data_args.block_size is None:
|
| 494 |
+
block_size = tokenizer.model_max_length
|
| 495 |
+
if block_size > max_pos_embeddings:
|
| 496 |
+
logger.warning(
|
| 497 |
+
f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
|
| 498 |
+
f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx."
|
| 499 |
+
)
|
| 500 |
+
if max_pos_embeddings > 0:
|
| 501 |
+
block_size = min(1024, max_pos_embeddings)
|
| 502 |
+
else:
|
| 503 |
+
block_size = 1024
|
| 504 |
+
else:
|
| 505 |
+
if data_args.block_size > tokenizer.model_max_length:
|
| 506 |
+
logger.warning(
|
| 507 |
+
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
|
| 508 |
+
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
| 509 |
+
)
|
| 510 |
+
block_size = min(data_args.block_size, tokenizer.model_max_length)
|
| 511 |
+
|
| 512 |
+
# Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
|
| 513 |
+
def group_texts(examples):
|
| 514 |
+
# Concatenate all texts.
|
| 515 |
+
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
|
| 516 |
+
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
| 517 |
+
# We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict.
|
| 518 |
+
# We could add padding if the model supported it instead of this drop, you can customize this part to your needs.
|
| 519 |
+
total_length = (total_length // block_size) * block_size
|
| 520 |
+
# Split by chunks of max_len.
|
| 521 |
+
result = {
|
| 522 |
+
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
| 523 |
+
for k, t in concatenated_examples.items()
|
| 524 |
+
}
|
| 525 |
+
result["labels"] = result["input_ids"].copy()
|
| 526 |
+
return result
|
| 527 |
+
|
| 528 |
+
# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder
|
| 529 |
+
# for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower
|
| 530 |
+
# to preprocess.
|
| 531 |
+
#
|
| 532 |
+
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
| 533 |
+
# https://huggingface.co/docs/datasets/process#map
|
| 534 |
+
|
| 535 |
+
with training_args.main_process_first(desc="grouping texts together"):
|
| 536 |
+
if not data_args.streaming:
|
| 537 |
+
lm_datasets = tokenized_datasets.map(
|
| 538 |
+
group_texts,
|
| 539 |
+
batched=True,
|
| 540 |
+
num_proc=data_args.preprocessing_num_workers,
|
| 541 |
+
load_from_cache_file=not data_args.overwrite_cache,
|
| 542 |
+
desc=f"Grouping texts in chunks of {block_size}",
|
| 543 |
+
)
|
| 544 |
+
else:
|
| 545 |
+
lm_datasets = tokenized_datasets.map(
|
| 546 |
+
group_texts,
|
| 547 |
+
batched=True,
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
if training_args.do_train:
|
| 551 |
+
if "train" not in tokenized_datasets:
|
| 552 |
+
raise ValueError("--do_train requires a train dataset")
|
| 553 |
+
train_dataset = lm_datasets["train"]
|
| 554 |
+
if data_args.max_train_samples is not None:
|
| 555 |
+
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
| 556 |
+
train_dataset = train_dataset.select(range(max_train_samples))
|
| 557 |
+
|
| 558 |
+
if training_args.do_eval:
|
| 559 |
+
if "validation" not in tokenized_datasets:
|
| 560 |
+
raise ValueError("--do_eval requires a validation dataset")
|
| 561 |
+
eval_dataset = lm_datasets["validation"]
|
| 562 |
+
if data_args.max_eval_samples is not None:
|
| 563 |
+
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
| 564 |
+
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
| 565 |
+
|
| 566 |
+
def preprocess_logits_for_metrics(logits, labels):
|
| 567 |
+
if isinstance(logits, tuple):
|
| 568 |
+
# Depending on the model and config, logits may contain extra tensors,
|
| 569 |
+
# like past_key_values, but logits always come first
|
| 570 |
+
logits = logits[0]
|
| 571 |
+
return logits.argmax(dim=-1)
|
| 572 |
+
|
| 573 |
+
metric = evaluate.load("accuracy", cache_dir=model_args.cache_dir)
|
| 574 |
+
|
| 575 |
+
def compute_metrics(eval_preds):
|
| 576 |
+
preds, labels = eval_preds
|
| 577 |
+
# preds have the same shape as the labels, after the argmax(-1) has been calculated
|
| 578 |
+
# by preprocess_logits_for_metrics but we need to shift the labels
|
| 579 |
+
labels = labels[:, 1:].reshape(-1)
|
| 580 |
+
preds = preds[:, :-1].reshape(-1)
|
| 581 |
+
return metric.compute(predictions=preds, references=labels)
|
| 582 |
+
|
| 583 |
+
# Initialize our Trainer
|
| 584 |
+
trainer = Trainer(
|
| 585 |
+
model=model,
|
| 586 |
+
args=training_args,
|
| 587 |
+
train_dataset=train_dataset if training_args.do_train else None,
|
| 588 |
+
eval_dataset=eval_dataset if training_args.do_eval else None,
|
| 589 |
+
tokenizer=tokenizer,
|
| 590 |
+
# Data collator will default to DataCollatorWithPadding, so we change it.
|
| 591 |
+
data_collator=default_data_collator,
|
| 592 |
+
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
|
| 593 |
+
preprocess_logits_for_metrics=preprocess_logits_for_metrics
|
| 594 |
+
if training_args.do_eval and not is_torch_xla_available()
|
| 595 |
+
else None,
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
# Training
|
| 599 |
+
if training_args.do_train:
|
| 600 |
+
checkpoint = None
|
| 601 |
+
if training_args.resume_from_checkpoint is not None:
|
| 602 |
+
checkpoint = training_args.resume_from_checkpoint
|
| 603 |
+
elif last_checkpoint is not None:
|
| 604 |
+
checkpoint = last_checkpoint
|
| 605 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
| 606 |
+
trainer.save_model() # Saves the tokenizer too for easy upload
|
| 607 |
+
|
| 608 |
+
metrics = train_result.metrics
|
| 609 |
+
|
| 610 |
+
max_train_samples = (
|
| 611 |
+
data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
|
| 612 |
+
)
|
| 613 |
+
metrics["train_samples"] = min(max_train_samples, len(train_dataset))
|
| 614 |
+
|
| 615 |
+
trainer.log_metrics("train", metrics)
|
| 616 |
+
trainer.save_metrics("train", metrics)
|
| 617 |
+
trainer.save_state()
|
| 618 |
+
|
| 619 |
+
# Evaluation
|
| 620 |
+
if training_args.do_eval:
|
| 621 |
+
logger.info("*** Evaluate ***")
|
| 622 |
+
|
| 623 |
+
metrics = trainer.evaluate()
|
| 624 |
+
|
| 625 |
+
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
|
| 626 |
+
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
|
| 627 |
+
try:
|
| 628 |
+
perplexity = math.exp(metrics["eval_loss"])
|
| 629 |
+
except OverflowError:
|
| 630 |
+
perplexity = float("inf")
|
| 631 |
+
metrics["perplexity"] = perplexity
|
| 632 |
+
|
| 633 |
+
trainer.log_metrics("eval", metrics)
|
| 634 |
+
trainer.save_metrics("eval", metrics)
|
| 635 |
+
|
| 636 |
+
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"}
|
| 637 |
+
if data_args.dataset_name is not None:
|
| 638 |
+
kwargs["dataset_tags"] = data_args.dataset_name
|
| 639 |
+
if data_args.dataset_config_name is not None:
|
| 640 |
+
kwargs["dataset_args"] = data_args.dataset_config_name
|
| 641 |
+
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
| 642 |
+
else:
|
| 643 |
+
kwargs["dataset"] = data_args.dataset_name
|
| 644 |
+
|
| 645 |
+
if training_args.push_to_hub:
|
| 646 |
+
trainer.push_to_hub(**kwargs)
|
| 647 |
+
else:
|
| 648 |
+
trainer.create_model_card(**kwargs)
|
| 649 |
+
|
| 650 |
+
|
| 651 |
+
def _mp_fn(index):
|
| 652 |
+
# For xla_spawn (TPUs)
|
| 653 |
+
main()
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
if __name__ == "__main__":
|
| 657 |
+
main()
|
benchmarks/Generation/Visualize/analyze_mdlm_denovo_gen.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
path = "/home/sg666/MDpLM/benchmarks/Generation"
|
| 4 |
+
|
| 5 |
+
res = pd.read_csv(path + "/mdlm_de-novo_generation_results.csv")
|
| 6 |
+
average_ppl = res['Perplexity'].mean()
|
| 7 |
+
print(average_ppl)
|
benchmarks/Generation/Visualize/esm_umap.png
ADDED
|
benchmarks/Generation/Visualize/esm_umap.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import seaborn as sns
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from umap import UMAP
|
| 6 |
+
from sklearn.manifold import TSNE
|
| 7 |
+
from sklearn.decomposition import PCA
|
| 8 |
+
from transformers import AutoModel, AutoTokenizer
|
| 9 |
+
|
| 10 |
+
path = "/workspace/sg666/MDpLM/benchmarks/Generation"
|
| 11 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 12 |
+
esm_model_path = "facebook/esm2_t33_650M_UR50D"
|
| 13 |
+
|
| 14 |
+
# Loads ESM model and tokenizer to embed the sequences
|
| 15 |
+
def load_esm2_model(model_name):
|
| 16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 17 |
+
model = AutoModel.from_pretrained(model_name).to(device)
|
| 18 |
+
return tokenizer, model
|
| 19 |
+
|
| 20 |
+
def get_latents(model, tokenizer, sequence):
|
| 21 |
+
inputs = tokenizer(sequence, return_tensors="pt").to(device)
|
| 22 |
+
with torch.no_grad():
|
| 23 |
+
outputs = model(**inputs)
|
| 24 |
+
embeddings = outputs.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy().tolist()
|
| 25 |
+
return embeddings
|
| 26 |
+
|
| 27 |
+
# Load a random set of 100 human and reviewed sequences from uniprot
|
| 28 |
+
def parse_fasta_file(file_path):
|
| 29 |
+
with open(file_path, 'r') as file:
|
| 30 |
+
lines = file.readlines()
|
| 31 |
+
|
| 32 |
+
sequences = []
|
| 33 |
+
current_seq = []
|
| 34 |
+
current_type = "UniProt"
|
| 35 |
+
|
| 36 |
+
for line in lines:
|
| 37 |
+
line = line.strip()
|
| 38 |
+
if line.startswith('>'):
|
| 39 |
+
if current_seq:
|
| 40 |
+
sequences.append(("".join(current_seq), current_type))
|
| 41 |
+
current_seq = []
|
| 42 |
+
else:
|
| 43 |
+
current_seq.append(line)
|
| 44 |
+
if current_seq:
|
| 45 |
+
sequences.append(("".join(current_seq), current_type))
|
| 46 |
+
|
| 47 |
+
return pd.DataFrame(sequences, columns=["Sequence", "Sequence Source"]).sample(100).reset_index(drop=True)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# Obtain/clean sequences generated from ProtGPT2 fine-tuned on membrane sequences
|
| 51 |
+
protgpt2_sequences = pd.read_csv(path + "/ProtGPT2/protgpt2_generated_sequences.csv")
|
| 52 |
+
protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('<|ENDOFTEXT|>', '', regex=False)
|
| 53 |
+
protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('""', '', regex=False)
|
| 54 |
+
protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('\n', '', regex=False)
|
| 55 |
+
protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('X', 'G', regex=False)
|
| 56 |
+
protgpt2_sequences.drop(columns=['Perplexity'], inplace=True)
|
| 57 |
+
protgpt2_sequences['Sequence Source'] = "ProtGPT2"
|
| 58 |
+
bad_sequences = []
|
| 59 |
+
for seq in protgpt2_sequences['Sequence']:
|
| 60 |
+
for residue in seq:
|
| 61 |
+
if residue in ['B', 'U', 'Z', 'O']:
|
| 62 |
+
bad_sequences.append(seq)
|
| 63 |
+
protgpt2_sequences = protgpt2_sequences[~protgpt2_sequences['Sequence'].isin(bad_sequences)]
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Load MDpLM generated sequences
|
| 67 |
+
memdlm_sequences = pd.read_csv(path + "/mdlm_de-novo_generation_results.csv")
|
| 68 |
+
memdlm_sequences.rename(columns={"Generated Sequence": "Sequence"}, inplace=True)
|
| 69 |
+
memdlm_sequences.drop(columns=['Perplexity'], inplace=True)
|
| 70 |
+
memdlm_sequences['Sequence Source'] = "MeMDLM"
|
| 71 |
+
memdlm_sequences.reset_index(drop=True, inplace=True)
|
| 72 |
+
|
| 73 |
+
# Load UniProt sequences
|
| 74 |
+
# fasta_file_path = path + "/uniprot_human_and_reviewed.fasta"
|
| 75 |
+
# other_sequences = parse_fasta_file(fasta_file_path)
|
| 76 |
+
|
| 77 |
+
# Load test set sequences
|
| 78 |
+
other_sequences = pd.read_csv("/workspace/sg666/MDpLM/data/membrane/test.csv")
|
| 79 |
+
other_sequences['Sequence Source'] = "Test Set"
|
| 80 |
+
other_sequences = other_sequences.sample(100)
|
| 81 |
+
|
| 82 |
+
# Combine all sequences
|
| 83 |
+
data = pd.concat([memdlm_sequences, protgpt2_sequences, other_sequences])
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# Load ESM model and tokenizer for embeddings
|
| 87 |
+
tokenizer, model = load_esm2_model(esm_model_path)
|
| 88 |
+
model = model.to(device)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# Embed the sequences
|
| 92 |
+
data['Embeddings'] = data['Sequence'].apply(lambda sequence: get_latents(model, tokenizer, sequence))
|
| 93 |
+
data = data.reset_index(drop=True)
|
| 94 |
+
umap_df = pd.DataFrame(data['Embeddings'].tolist())
|
| 95 |
+
umap_df.index = data['Sequence Source']
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# Do PCA
|
| 99 |
+
umap = UMAP(n_components=2)
|
| 100 |
+
umap_features = umap.fit_transform(umap_df)
|
| 101 |
+
umap_df['UMAP1'] = umap_features[:, 0]
|
| 102 |
+
umap_df['UMAP2'] = umap_features[:, 1]
|
| 103 |
+
|
| 104 |
+
# Visualize the PCA
|
| 105 |
+
plt.figure(figsize=(8, 5),dpi=300)
|
| 106 |
+
sns.scatterplot(x='UMAP1', y='UMAP2', hue='Sequence Source', data=umap_df, palette=['#297272', '#ff7477', "#9A77D0"], s=100)
|
| 107 |
+
plt.xlabel('UMAP1')
|
| 108 |
+
plt.ylabel('UMAP2')
|
| 109 |
+
plt.title(f'ESM-650M Embeddings of Membrane Protein Sequences')
|
| 110 |
+
plt.savefig('esm_umap.png')
|
| 111 |
+
plt.show()
|
benchmarks/Generation/Visualize/mdlm_de-novo_generation_results.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Generated Sequence,Perplexity
|
| 2 |
+
GEGQPTLDAEGMPKADEGKMMTFKSENFTDDSVENLVLTSYGVYNPVIFTDLVIRTPKEGAVVPPTVVLMNGEWTEVMPNLTGAETFDTQSKYLVNGLKRYGVSKKKHVQVYQMARRTKDLLTMIPDGMASADFSFEAPGRANTMPAVGLSMDSAVGQPNLSRLRGVDVFFRYIVYTADPFGSETQNLEVQASERTNILFLNQQKKKVKSGIVVQMQKGILFERFGEVMDGQRPSNQRVGSQDMLIGVGALVKLNQKKIRTRIIQLFNLGYDDSEAIDWLPTTVAYLDSTYYVAMTTIQSIWVTDYYGLQGLFPFNQNKIGKHGVEVKHVQYFLEFVEAYVDQLEDLFTEYNERNSKLSNSNAIQAITIAEYQQLKDQLQLLTTENPIVDSSMIALRIKKLDNSATRELVSQFNRDVERATPNITAAQISVLKDNMTILLQDELMHMSDLNGEAADATYTLQAARESLEQLTTAAEFAPEYLTIEEQDISDFKARMELLKEIVGSLSNRIESAVKNKQDKEGIQYAMYKRPNRIDILIKNINLKFKGIQFQIDSIVAKVRNMEAFIKALVYRLDNVRISLVQRVGNRRHLAKKEKEPETVLIVNLRDYRSTLILFDIMTNLRITDEGQPENILRMKPVLDNADIPTENERIPSLSMPLMVRYTTVVINLPELDEHKAPLGINIVVAKDAVVSRLEWEWEGDVFKNKPYRIKRAGYGPDYVRAGALAQVFIARSDTATQSIAVRKTANEKFLLRLPRLPGSLMGEVVLKSFATFHQAFGTGRNNVYQRDEDSDKKYNQTLIDYWFDLNRFFGLSQREEGVQMMLLVEEPFTAGILSKAIVFDDDKKSAFLMMARAFLVYLPLHHSPDAPLEVANNSPKNIRLNLQATIAARG,18.2131
|
| 3 |
+
WTTGWGVSQDLIDSASMSPGMIWILLVDSYKERWFGTYWWGTSTCKEGAFPFEDVMQRIELRILKKYFYYLAIISSVLTLLMIIAKLVTNCLSFANIHKSHRYFFCVNCFWFISQLCNDLSAFPVLKKLESATRFVIYPSPVKAVQLDTMPDKIVLYLIFLNIFSTHTVLVFQSMSLGLITGIIDIPTAKRIIVPNLGILVIKTFSSKNCKLSLLAPEMWPKCMYDYVAFKNIEAQIVITSTSVGAVLCLLLILKGSVFVSSSYMFVGGKPANPGTGTRMLLPKDDHFEHKFCHNFSNVEKISASSYAASPEESILLLVNKEEHNKLRVLAVVPKGARNVLVIEIMKLKPFQTTYNDLYLPRDENNQLQKNKKVVSVGKIVLKDPASWVYLPQGRLKMNFKKAYIKSGAAPILLSFGQRLISVDNAVPLAKMRTTGITVLEMAPRGSRVQAIVVLPGQLKCGKSETVYWFTVSSIDNNRRGIAKYMGGVTYRGRAFIDMDKNLAGPPLVSDAYQMLFNDWLEMLCGAMKATESEKVKSRKGASELRVIHRSHHGCIVAILDDLYRLRFDLVDIERIGMINEEGRINGKIRSFEFQNFMLTSKNDMKTGFVNMPESFKPRTILTGDLIDNDWAPSFDLAAIRHGNIQVLVDGNDLEGSEEATNCHHGNAFSLGPQGRKVVVGAVVAPKTATPKCNSISINLQELPANYVVLGAKALTAQHVANFSVNGTKHDKKTCKQHEEMWKMYIQKSGQISKQASIKQCAILLSLGKLRATVKWFLGKYLERIFHVLLKGCKTVETIIDKGRMHKIKLVKFRFGIWIFLSVVCLAELFCIMIFVAPEIVRGLHTLILFLLFMMLLLLNYADTGHEVTGVPYY,18.1991
|
| 4 |
+
GIISVIDLGAKLIVPGDLFFVLCGKNRPPTSGALQYTILHTKKKLFCCGPTHAKHICLINGECIRDGREKLQNLCKTSGKWTEVRRPKSTYSCSLVYRRVQRRFCGPQKARAPVWILYIVLLSAIGVIIAVTINWVLQVCIILGAVVANGFLIRVLSIVDTRNQIITRGLRRYGIYRNSVKVACTSGSVVIVRIKFMEDQISGGWRPASFIRTSFKEFLPASATAFSRLADCNEKLIEALV,17.785
|
| 5 |
+
RARILNRSLESARKYLDFLKIDKVEFYENEMTFRVFIEEAPEFKRMEITEDKIRLRLKPDKIRKFVELGNLFKYTDALQLAVQLEKQNKEELVIAEPEVIHALHKHHNHMPIFHLLEAFNDETVAEIILANIGKMPAFLFWVWNRMSDPTEDRGKGFNEKVKRKNPTIVSILDKQVYTIKGGFGAALTKSILGPLLATQNIKIKADYDESLADVFVGLHFVDGSIILRPWPITGKEVALSEEVTATDKVISASEVGSEEDKFQLTSIENNFTKTLLEIKKRRENAFEGAYTETGSKVSDKPVRELKANLKLIPEYQAERIDQTYWRKILRLSRSLISEPRGARAYLLRIGERVDPHRYRPAIGAEMLMGIPNSITTGFKISKTLGLQAAGLDLIQTFRSLSIRRMITKNFMAILIDKPGLKAVFWFIPLVPFIAVQLLIYGVLVGRAKPGNLVEIIDSMVDGKFETRPNGSPAASHKMVIGVKYSLIYPGNQAKKISLVRWNTALSKDVRGQAKEIDLWQLISYYLEKMRIGPSAVSNVFQSVHDGLKRNELAVLLIMDPKTRDDSMILDIMNLRIERGFVSLIKSYIHDYDEEVYMTYREILNQNVFLMKYEEYATMSADLEAYWLKSIEETNLRALRLPGAMRKQLFLANLCRISEHLDTPTEQDAFSNPEGITIDEGFTDEARAQGIGFVAGFVDEKEFEQRYALLAKVYIASLKALAAALADSGVKTGIKIGLGTVERIEVHKDGMMIDHVKMEGPGRFPIIVGGEVSPIVNGATIIPSFIKILADGKVDEGKSPNKTPTEKGEITPQSLYRGMGKTVVLNDNGGLQAHALTWAINDEYSYFVAMGTSNKSDEKQLAALSNSLDMTTFEDSAGRLFSSIVKAKTLSENGVITENGAEFL,17.7595
|
| 6 |
+
DIMLPKSPLFEEMATLGFIGHTLLAPIKPWTKATATMVGITGIGVTIYWGVPDMFPFSPTNHWWVKGMKAIVPSIIALQIIDLFYVVLTGLTSRFVYPIVATFYDHYFVNVQILVTGIACTLVYPETHGDVVSVDLLQCRTDGKLTQIPMEEALELINFIDQIMEKTKCKFNISEGYYEVLMTKKFIVKGGKGVGPDTDPEPCEKYATWGRLRDPTGPIRPNRAEKSQIAIYAVCGGAVLQKLGVPLLDNEAPIPSQLLIFAIGTVGIAAITIVALIFGGIDIAMSNPVYRPILAYSPHNKLLYPLSTWDVGYYNVPNVTSTYVVVVVPPALTIYSVANEAIKVETTPIPVKFAEILPTGETALLSSYPLTIAQTDLNARYERREADIFTKHEGMQQTFKGEVVPLVSNNRLKSPSGVQIGCAASLMTVPEEDGMTPPRIATTWFEGPYSAPASDPLMRMPGDFYGYGKGTMDGTVSNEMNGISHRPVTLATGGNVKFSPVMLPWYIGARYGLDIQHSGNRRMAHWLTSRAVMGFFKKNVARLADRVNAITLQVPSKDPDLNNRPNHAVMSRQWITAVEAIKELAICSLLNQFREGLGHKNDTIEADLNPFSGVIAQSSMAILKTAMGATRSSVAQLSMGMALEAFRHQMTGTAGIHYLMSVTGNGPGRISTALNKLDSPLPPAIAED,18.2088
|
| 7 |
+
MASLAAANKRRSITSIAVFLGIAAAVVVLGISGKVTDKNTVLDFSYGKNRGFKPQHLSEYVRSRYPAVVIAKGENLLRGGRFLPWVRPPGLRYVAYYDAIESWDIEDSLKELVDLRAQGLYADVETAAALKKASVSRSTITEMVGMHTNQSEPQFRTEQTKVAKSNGTMFVFATMSFTRGRYEIGSLMLSAIGKRWVEEPTPDTKMRYMKLRPSIQLLCRTGGIYRPIFQLGGPEGIFYHDGYGENSYQVDDFIWKHLERRA,17.9845
|
| 8 |
+
VYGLLTTGSASSAMATTTLMIGVLFGLFSSFALPLWSWLQYLTTTGITTATGARYFKNIMIEAFSSYSAAIIGTVSIVPLGSSIPAAASASAVGAFGAITGFALGIYSMLFKKMDSFTHWLFSAGAGLSAGLAGLISGIGAIQIGNAATATSGTAIPLFGLVLRIIVSSVQGLLGTIAAILLISN,14.5579
|
| 9 |
+
IGHVFHLLHMALPIWRPPLPVTPGEPHPRPIADLVTPAFEYKTLLRCPHPHVSPIFLSVVLWMIVALVLAGVMVAQALPAPTGARLLACYP,17.2759
|
| 10 |
+
SIIFIFMYLTNCLQWRQNGHLQMSGLLFATLGVSTVTHTMMLASRGLCQAQKQRIRRECTLFGLAVHFCLAVGLFIASVSFAVWSSLEGLDDDANTVAVMRWWGWTFSFERYATVKVLFDQGIQSTYIMSWLLLMTKREDFRLLLFFMLTMFASILVPFSRGAHFYSLSVAFSNFATVILPGVNGVGNEIVQQIIFVLLFTFPMFLLIVDIAFFVNLIFKAAHP,17.039
|
| 11 |
+
LPDVLYHYEERKFVITRSEVVLEPNELFIGKIAVDVANYNIKVKVDLRISSKYVVFNNSQLTWNDHFLQPLISDRLRWVIFRVCVGTSSPQLLIHIDMIENFLQQLLSFLKGIVVQQFVGKVTLIQEDMKIEEETALLIEIETYPEAELLKLVRNLIIKVEDRTIGSIRHEAQLAKINDWSAKRIISDLNIGDIDNGEHVLVSAQEELESSIMERLAGHLRKFVNVGTWTESAAIDIVARIYGSLSVELHEEWLVMLEYLFTYPNDYFPGMYTVQYYQNADPGELLKNHALIIDELQRLELYEG,17.8596
|
| 12 |
+
SQGLDDLIMTTVADSEKDTDLTTTADLNMVPIYVGSNETATSQLGMIVKRKRPEKPIYVPVHSCSKDDRACAFVNFFNLARDLGYIEQDEQRVTPDWRAIILTMAEDHIHLWSHPNVAILKLRVIGEKLADQRETMKDPLNTRVEQVALVQAPKIDLIRASYGSLLEYQGEQKKYRINNTLSRCESLCADAGVGYMVATDAKVCQVEGETVDNNTGKDGDKTEILRAHFKQPSAFNKGSGVLRGHIMMTLGIGLLILLLYVIIVFFLHKIQNATFKFRIPRVAIATSLVADACMSVLAAGIAYALANFPVFTAKIYAETAVVLVLLVKGRLFIGKNKEIPMTARITIIRLAGAVLGFAATIIGIVILDPVLSIDGVSLAPGKVPQLLCLAQTAVREGGHQTWDLQLFELSKLSGMKIQPGKNIVRDPDKSNTAEEPTVALWDMVDVPGDIDSALQKDPVVKAMFKPHTGETMLMRQDIWAVQRWVMNSLGKLRLGKEVAILRKYVDTTHPAGCCDTGDAAIRQAQTEGKTVHSDGVRVLEDSVRMVGLDGDGKTCVGQAEEQLIQKFPCEKKMADDVFTSARALALNASTLIEQTDNGGDEWGENDTIKQVIKTGRKVEGDAAEIATPDASWNDGVYRKSAVFSSVTSDCITDIAGRTNTIVTIKELKGPRSLPMITNLRKRTALILAEMKTLIGPGGLYIDKTGIDCKVKDEGKIDSQIKYEIRGIELYGNMTPAPGIKPVAFTGKGGSGKSTIIRVKGSVVPSFVPINKFGKGRGERRTEKNADALLIPFSNAKKLEGETKVLIPDFLFKITTRNVVTVGRIVVGGVLDNSDVFEGFDSIKLVQAAMVEGQKQVTIVGINRKEGPVYGDNLLLSADIETEEYMTYGADQALAKAAILRRSGAVLFALVFGGNPNPRIFKGTEIDDVWLKIKPRAQMASVKFDEYIKQGTIAVHGGGINNGKYLVEGEDDPCDPNDQPLP,17.9744
|
| 13 |
+
MDILKKLIGLSSLLALFLLTPDLLAEIVRDIVGVSIGEMPEIYIYLLAFYLLGLMLASMTTSPPGFSFLTTRIIYCIFYAWYYILLTLIVTILLIIGKTEGNYKISKQFGVTENGVIMNMIQKAWNFLSDISTGNYLITMWPLNHFGVVPWFNKAAGIAWFIGPYFTYRLSQRPVNFIFSALSFVIKKWLSKIWGKFVRMACAFTSWVFLMGVATTLVLVIFNEMKWIKCSILNSKQWFGKLLGYMRNSLTVLCQKTSIGINMMLVSVLILILIGTIGNGDTAIYWHILFLIYSAIGIFAFVFVVQIVVCNKDRGKKTDLSPAVYAGLAELLPSLSTSYDNVNLAPQDYLTALNVIDSLLIKLVLEIIVAGFLSPLLYDFRLSSDTKLIFCFISLILFGYVFLGFEKDKAESEIGHSRVPSIPRNIHNHTAATVVRLREVLYELFTSQDEAHLGAHEKQNVSKVLLFALFFLFVSTYLTISTPVSNVNCTSYRLDTPFSKRKRQLSLALFSIGCCLDGFSTMQHMVCGEEFQLDSFFKFFVRFGKVIGKRVAMFFFWTLAKALASYSDAIIAPGYSEKMAHFPPDQFNGRAVDFIDVDEDLFANGFSEGKTRVAGPGEIVIFYQIGGNKFEAMFTVSEPIKLLYYDKGIIQAEYKSEFGCELITVGFTTPTVYDYLSPVPAYYFSLVKDPTGTFFDLISVPQGSIGYVNAKFQAYGEFWFGRIVHTGQNRNYITSLPLLIHLKAGNILFWLICVVDLTTTSILGKGNKRAVEVYGLSLLSQCDSFHTEVKIIEEVKRFFSLKQRKYLSSIFYTSYMNIFLALQYKAFAMPINAGVFVTDLDEQAGILIQAKKTRRRIPPRLIFVRDRVSDPNIKIENSSPLFNVYLLSCGTDYTSKKIISIDNRIWALLDGIHKEELSYEFNYE,17.5258
|
| 14 |
+
FRFCYTWVVLILVPSIFIRSFLRWESRFYFLKELERKMSGGDDLVQRSEQVETSCPVSSRCNQISEKILNWIKCEHKRVLVGGDVEQIIFPYTSSPTQSAEFQKMHQFQFLDDSGTQEANYVYQRIDETGYFRFADAAEEFTGALDVEGMCENWNVFLCMNVSTQISLILNQAKAYMFTQVLLQDGTPLVQFLDPDDQRLLVNCEDNEASNEMQDANRYQQILDVDYLLLEIQNQYYPAYFLVNLNHADCFKGTPLFTPKILEGVQDVVTCRRLVWLKFALNRYDPYDSVGSLCNTPRYMRLTRRLMENWDLSGVFWTSLTFLLGRCW,19.2309
|
| 15 |
+
TPGGFIDQNREISHATRNADVNYSLLLLGHYTALAGMHAMYLGPDNVVILTEGGDFATLPYTAAPTFTAQFWQMKILAAGFSVVIAFGHFVFGVSRFWHGMLDVTMGHSSALSTLFAGHIGHLICHAGGSLIFFNFDSEPVVGTTVTIVLPLWFTHHNLVLIPWHEWTLNHSQLVQVLFKPNMSFIFAFGGHFRGMHWGIGGVNEFADGHNTGLQYHHSFFILLSLLALAVHALQISRGIIWPARNWNRAKDFWNSDTVPWISYVVYTDAFWALLAYTLGAYWAFASSGNLWTSIHQRYLQSEQVTVTTHATAMFGNMAWGGFITPVHSWIFNQGKLWSLSQISHVSRGWRFVLNSFHHGLWFIALVGIATWFYWRLHFRWGDTSLAVEAGFWNIHWVTSNAAPLMFFILAYILVATETSNKY,18.3108
|
| 16 |
+
RVFFVQNLAMMLFLLLILIPLFGDKYDVLVSCTDEAFVELNYLIVLAKQWEIGCSERVVPMAAILAFLINFGLICITLVVGGNVIQWYSPKLHEKNHFIWSCLETIMVITVVLIFLQVLVCLGFTNLITALCWLPGWKLVAPWQSCALRQRITLALLLLKVPAFLGILLHVFSKQGWLIISGVQQLSYILSSLTMIAVDLWGGSIAIQDCRGKHSLIVLKVRVLMLYAPLLSSYVYWFEYAVGTCSRYFLEIVDFLVLAFMIVVLLILYGREYVERLDNIYSLVDGANVAESLTHTYILILIAYPRTNPNRIAVHIKLISFYVWMIVIMVFARKALRRLIPCSHPFGPKPTVSDKINAVQSGSTKAEWESIEYFVLFVFVILVLLLVGLGCIKLYDTFWNPQEDLDTDIKTDPFNNSLMTIIGVPYLVTVVIRKTMSLLPLTTMFKIVGVLIAGRLNLTIGWAMAYVTAFWLVTIECIRFFHPQPGVSAKPECLRFALTTATVLVASATIVFSADINRNHKGEISKAAGFAWLFAYVLLMFLAVAFVIWKIAERGVLGQSATVSRSESEEAVVYQTAKDARADIFPLVDDIRVTEPKDSAGRVINLGMPLTPSGQSKNLLPAPGANPVPTRPCVFNACWTVPDTVLVFCTVAAQMTLTFRELVLERVKDAIHTGRCVHFWPDLAICADVFDKTGLDNISGVRMNIAHGESELSAIEFNIKFQ,17.5525
|
| 17 |
+
RWSLATLFMVISLLPVAFVNFFSFKEGFHFFGFVFAIITLGLSGIAWITKLRDPVDKMFFFRVRMLGWLRPTLVYFIMYGILGIISRLTALVRFKAMPIFMQLGHFFPVVNGILIFVANRPMKAVRLQARILNRIARGRWTGASYPEDKPGETMTDEEFICLTQSGMNLGDSFQIIENGTLIPNWLTSSPLKVEVLLYTYFLLGLFGLCVSLRLAGCGCLPEVIRRMWCWIWFALFFFSSFWQVFRQLSALRIALGRARWKKFDFGPVSFGLVVLFLVAVFLSQVLLVAILDIDEMRQKFAEVQTALTIPRNLPELKDSIKSTLLPFQGELQWYSQWTSLIVYLTHLIMTGMGKAMELSWQLFNLIWAFLCSNNGFLCFFQEYFLKLFLWGSAASILLFLPSILNLVQRLVPFTILIFFCVPPLIAVHSLYNRGLGVFENDVGTLKAKAVQTSAAPDWQITETNGPDEDYESHIMAIVTFVNLLCLHIIALLMTGTNSAQPLLFELKFDAKVFMAVFNGLIRTMSVLYSRGCETYALLNLLASILLKLALWFWAEIEEDEFASNISLGTLLREATRHIPVLITPVAMVNGAAGAGLTLLWPTRGHVYLRGAGNKRPAGRTSLGYMAGPSGEQFYIRGAFWMASIEISAGTGDVEINGSHIAFFALKGKMIKLTLDGKPASKIKDLVTRYADVAVDDKIDFEWDALAERSIWDKKQLKKGMQLNGSWPKTNVMPLITGQDMSEKYLYARVDLQNFNYNVKGASNKEGIHIIDPFGSLLPLVVFSSFGIIGLGFLYKYNVWQDTSTLQEHFQKRKTTISSSKPVKFTMDEPKLLGPMVFLTFQIVVMFLLGFHKFPWLYFAYIYKN,17.5249
|
| 18 |
+
VYPAGAALAKAAQKALENAIQEHYEVAMREELEANPERRLIAILKGLARVRSAYAEIDIMRDQAQNAIEESIEYANDMYKEGSYAIVTPTVRIHHSIAPVEMQQAIELMADLAALGYGDAGPVVAQVIQLPGLKFRGQTHPGASPYKIDVDVAIAAINLAVERLLDALQEVYQQPPKTVKVHRVSASHDVPLVFQVQVFVQFKINGAREGDFIYPGRDVSPQNRKVEKFDERRKSTRIIPIYRLRLQNPGAAFALKDHEAITLGFAQHYLLGNWPIEVGETPRTTQDGLPSMAEKAADSTNYLLAAANFMHGKPDLEVQMILKANLCSTEKKVTKLDRLALAMNYAVYLVMLAQDLSLFLKVPQNVKVHDGMGGDYQIMMSTLPSSVTEAEIEKGGDMHVQLKALLPVSEPFDAEDVIFGQTARVDEYLVDAKMAKLLRPPTNCGTNYKENSENAQYFPLGRNINLVPCTMEDGSLGLTGLEGFSLSQAGNQRMWAVNLIDRQGKVALLAEFVLNELISLSDNAEQIADSHTLKVVGVRGNVGNLWTTGTMSDKIEYTPVLMSGETKLHASLGHLIASTPDLTATMQEKKLTLLLSPPAYDETPPKIEKLVWPYEKNAEVTGPWRNITKAIGTSISDLLSLSNKMAKVEQERDAKMETSELQKYDNPEIRISRKLVSALVIIISLLIFALRHRFALATWRGCVVAIGTPSTPLKRLSGIVRQSADAGITTAGSKCSRIRIIIKRGIFMTTLGISSTIITLVFAYQ,17.9413
|
| 19 |
+
YMKRGMVHRTLTLLLIFMVLALGFAIDIRGWAMFLPEATLMVLSMLGFFRQGASDPNYDAVMPGVVCEIHMRTSMLFFSWSIALAYLAMLVNSAGQITQSPKIVDSFTKIVSAGKGLLALIINGITVAKPTDGDELFSQFSLTLTLTNIGSMSIVPQIRLQIYRWLMKPPHGFLGIFPVLSAGTSLAIALFNFWRNSLTEQYFKFLSDMTNSINAQVASMVAHRSMAFNWIGTIWQYCMITVFLLGFVYFYFLSETNGIQLRLDIDHSCGFSNVPIVFAHEFLTIACKAAAEILKSDSDNKVKVHVMSKAENIAGSGQLWLEVFESGSLPGANASIIQVIPNRKREADIVGPGTVDGLDGATLLLSPNNIFVPPGDLVAARGNKAAAGATLGANGTLTVDARKYGASKINVGYDTEAVGLAVCTLVIGTGDSVLPTAKKQMDNVVAFLIKEEDAWALQQPLKRSAERGYFALTMAMNPDTVAFATETGLRVCDLMNTLGDLFSIGPAGLDAEAVGAQGLSTTIDMNKHVFFGLEAKFSSSNINQPSSWLGALEAGLGNWLSLRNALRGDGPQPQGP,17.4011
|
| 20 |
+
AAYAQKNAKIKRKLEETVLCGGCDEGEYRRESSFGAISASDGFTPDWEHNLDGQPGLYVTKLIYKYIQHPQYLYEILAVALLGVIGAKTSLFEGLSHPKRRTESLAITFNSAHVSACLTVLTDYTRQLTYTLSACLVTLVSTLYAVNLIVRDKKIAADIQFFVEASDYLKMGLEVTRNENVTPVNDDDFFSHILWLIDHTKPTMIEGHFREYKLVNKFFILEEHGLVGKRGSMMDPINTFIKCEKLLQLIDTKYGGSVKKLKSSKVAFYNAVSEECAPVKITLPKTSDILAHRYVSVRDIPARGVPYTHSFSSNVVSAITDGRVMDKAGDFDEDLAIKIMGLKLDGFTVMVYRLDGFRMGETSVSKIATLEALIKDDIVTHELITKSSFTRDYRSMERHDFVLGSNFPYCSPAHEDTIEFKQKRQSYVGRAVADAKVEELELPATGDRGEVKDQVAKNMKVLTNQAMKVHVGTMLAPDGDIYSITKENVLPACYVDVKGYLTRRNILGKLKKFMDVFEDFAKVINILDDTGSGNHRFNRYWTRRDSRLGKPLLLTHEDDLETNVADNRRIRTNKQRERCLVRVLNLECEKCHLPEMVVLGIFIGSSAILFTLFTLMSINGVNVLLDQVPPSGFGASIEGAMREAKVLVRLGEFVANKANMFSQERGGDVPAIVPMTEEQRSDLNKPCKEERKISKCFTRMHGSWGGVKRMDPPFTRGGYLMMRQTRMGIWISFDKRKFGKTQKFKYLDCGMKDPNVWKRNINVGCHLVNTYADTNFNCCTQTIQAVVESHWTEPLFVARTFQPVSICLIGMLQFSYGPVMAGLKTPKPHPGTLRVVNVSTVNLMLFVLFNYLRPAAYNGFYGKYTKPFTLGVSQKPRAWSHKIITPPGPKQDLFISFFSHLVVLIFMVVMWIYFAGTVTPFDFQYYRQVSLDVV,18.5249
|
| 21 |
+
MKYNNYALLSTVTILGLVFTIFNWANDWNLHLNFGLTTYLFVGGTFLILTTFGVGQDDPSYLKGFTINLAGKMIIGTHLPPLVPTPFSPFIDKISKHLAGAHVTISAVTVDNIIGTLLKLLESGDNREGNWRAHRLAFHAIGATIFLWEIVISIYIGFNDGFNVDNGEKVSKTAELITTPSGTLDHTGSAESWSFDDDSSPLKWYNGFLISKEIKYKHRPLFSLTTQLSILMYNRIFIVLQLVIVHSERMKLSLFDLFFQNFFFFSTIPLDLEGLSGEYRSIGGKSDIRTLIVSCFGSLLHG,17.1279
|
| 22 |
+
KNEKTNSSSKKVQVADEIYAPGKPVAYVNTGTTQETIASDAILWLASEFSAIIEIKVVLFGPVTNDVYSCNIANYSPIPQGLEVVHKKYTNKNNLWLFTTGYDLNITFLNTDMLNLESSFLIIEGAISTSRMTSDKEITNFEVPGNAVVLCTYNAPSITSKGAKAHEASGGLAANLPREEQLQAILRSHEQYVSRKMKADCFPTTKAVNDGRILLFYLSAKNLVDSLPMERGDFNLIYQKMEVKIYLDDLLKTREEIQAARAFMTEFIVRQNGDIKLLGLSEISDTSDGRAEVLDLPLESGNSLSSEVDAVLVVGQLRAMIHGTGTFTGVFIPHDLISSGIDPESDREGIGNFSRFDRESLVLFGIGVYIDGVNEIGWFKKTPIAIGIGNASYRRSNCLQISFYCDVDANTHEDTGTSKGKTLIMATNEYSIAGAICEACGYDVEGDDKTDRQIVVNQPSAVTVAGMPGLVAKTHNGFRNFEKNFEYLNFPVSKVLAEEGGLDYFWTIPPGNYQNNVPWNPVRAQMTSWGVSTTATLVFGVTYSRTLLVSLRVNATLTTNSLFAFFASKLSHINTFRTGGIISGGLCSVLILNFVVAIYGVSLRAFGGALLTYAMVMIVVLFCREVWKVMYYADIYGKQDLIIFELLNFFVNFGFILTIPLLSTASPPGSIDIKLPGILRTLSLYNDNQQRRTFIGKLLWDPESKVYNLKSGEAKLLGANASGLMAGGSEGAVHEVETDTSNLVFRSDVSSP,17.7156
|
| 23 |
+
EGEVNRIVLDLSGTSDGSVIIEANKVTRDNVSDALLKGKNFNAPAKTSSYPAYVAASLERQDPKKTFISFFKHAHNNAHGGQGRIINLAFAHATQTKRFNVFFEAYKKHGLKFDQNTFKFHVPEDMSRKGTIAFKGNDGEITLVDVFTSSFRQQISQITIRQGLWDWKSTRKNELGYFNNTIQFQGSKTTGSADLIFALSLLGAIRTIREYYPFKEQYVLLHRTWTNLQKKNKASWEWASARDKGQLNTGTKQTFATSLITELPIKSFELMTSARSLPEMEVNQQYEHYRIREYYKCRGAGEDTLMKGITGSGADATKVLMISYMLNEGLVLILDYSQQNIKTGNTIAILKEQGLAIKTSPSYSIQRLTKLHIYAMENLEIFPLHREQVNVMNAVLLGELGVAADEVSKANFNNMPLPSRQATVLSINLQDKDKNRKVLLRALGDQNSFIPPFDKSDVQNTVNLMESITKNQAITFDLRQGNGRSNQLIDVDI,18.2889
|
| 24 |
+
NTTRNPTENMPTPRSWLTEGRPYIAYACAKCKSETDKANKGLLFVTKDKIIIKSVPGIADQIAREVKEFFNVQTPAEGWDLVVGDVTADASAGVRGVLGGIVFTQKGSVLQALAVAVTSIGTMILFLNLFSWGGGWVTMFGAAENIITSLAMIAKLVLENKVLLLNIGMSGAGICLMMTTDVSPSVLTANFLAYAMIDTIAFAGDAITYPFTIDIGDAFFKFYGGAEIESVYNKQSKPWPSWVAEQLSFASASNTGAGVTWTFSLTSINKSYTLQFLQAVGLLLSAQSRPPDLLGSEAALTFNVTYVPLGFEVNAARIKTMLSPTKNVNQIGNLKPFLKHLTESLGNLKTLLKQRTQITEDDVDVRKLATSIEVPEKNLLNNELNKIRYANFVSKRLAVALNDEIPDLYKVNDLKSSHIFLKSNANLGNGIERLNTGIDIVSNQEPQMLTMLFLKGRLIKNNGTTAPLTLWLKYISNLTTLTNIGISVEESANRFRNLQNSFFKNNNILEINIQVIANPTDAEKELNLVGY,17.3426
|
| 25 |
+
GAQNTDVILGALSNFILGAFGLYYWFQWGNVILHQAVIMSFIHLVLSPDWTIWFYPYFVSEGCLYRVVLAIVQRTAMTLHISPEVSKYGIRAALSSPQEMYSLSRGDLRWFFKDLAIQKVRWRRMPPAVMILVLFIAYQLLQTKTITPTQLLLIQGLLFRVYGNLMITITILGTVMGVSPFTVIYNGWGKPKGITYCEFPSAFLFLLEDYGSGEEMTSIALPASLFVEYTEKASVIRAGYILSQVDEFSIKNMITRERNLPKSELLYVAADSGVNHTLNICQFPVSDTYLIKYSFIPYKLYIEDGKKVEMPPNKVWDAIVIGHYSQDDYWQLAAFCNQEWDFANFEKMLARPQRLVDTCGMALAATYWALLVQVLGAPILDNCLWINTFAILFAAGILWQIPPLRQDMRIDLSARFKHSVIVVAAYPYVLRLTWSGQSQQKFDLFIYFFLAIFTLSFNSVHYTADPAREQFEWRDSTGKDIPCVFLFGLTVTYWYGALHTGHDPESNTGLSTAKTSFDWKSQFQPFDNQYTRQATELLGIIPCATLHRKCRETWTRQRVFNVMVDMQQGSARFIFLIQDTAFNRNFKGGLIQDRQDLRKMLAISPGEALRAVIHRREHAAIEKQLNDVRADELVVAAQTAPGERVQELLRGSGVSYSLTNFVTFKKNISDDERRVPAPELVFQIVIVCCWDSRIVKALLAIITITSLAVGDLSGVFILFRS,18.2048
|
| 26 |
+
GELPALAGNRCGEAKLFDILARPDLPRRWYIHLGSVFTLMLVLTFLGAFIGTGCWVDGGGFGKFIDRGLSQAPTFGPQVLTHLYPEAWAHFFGIADPAGGYWLYHIILFSGAHGVFIFAGGALARTLRLGRLLGMARALGMRPKHCAVGAVGVILFLTAFYYLPDGNPTFTPDQGYESGSTGTIMVIDNGAVGLLFHPLFGAGLTGTFHTLTLAHEGTASGEGLSNLSEGGTESETYAAARLNALFRLVANQGRAWRALHIYTLPFLSLGVCAALGLTVAHAWTAFDYNNFVAAARADSFKFGANNWVLAANDIRAGAGKFVHAGDEVLPGELIR,16.0959
|
| 27 |
+
ADFFVRRQSTKKLYGLPLDGSVNDSVACIWGFAVFWNGLVFPWVFAFVGLIGWRLQIRFVPGSVIGLFKFELILSLIPDALAHFGVEDIYANPEYVFNFPRGVLTFASTHGIRTLRALRFAYPFVALFGRKAAGLFRRMGVVCLMAMVIGVGFAVAAFFFGELMPTMRWTFGEGGIIQTPVFAAGFRSSDVPATALEAAHFLVFFLLGLIFMAIHTGAAIFYAGESAARKNEDSQTFSWSSASSARLTRQRDREILVRRNGTSGESPGLA,16.437
|
| 28 |
+
MSYLYLVFFMILILFHLNLLTYNIVKKKPPFNGKYKKWEFKRAFDRYPVGYIYYGHGQWKDERNKTEKHPRDQ,29.4763
|
| 29 |
+
KVAAIGVPFFGLLIALLLNITMVFLSQTTLSKYWFAWHIFAIILILLGLLVNVLVNQGSSGSTTSNFDSGMLAMISVGKALGWNIMARYTPWQTGTLNSISWFNIGGAVTVAVMGKMAGIELIERENSRTPEGFSSPWPVGQTPAWMGAGPIGGVIAIVGISVSAVAVSALANISVVDVSNISLLLEIPVNSIIMGEGVGFYYLIMVLIMGMITLAYSGGFFSAKFGGYSERLGADLAGARTPLNVYGENIPKVMRATASVPALFRRPVANLALSLWILASLGVMVTYFESVAIFNRTIENIGKVAITNGQSVDVMGFTDVYPLDVDESNFIAWRTAIPPGVLVGVTPPIFGRIELVAVNAGLLKLERKGVAQVIDTGPESFELEAKMLAPSMTGSFSTQAAIGGSAFAAMFQSSTGANSVFVSFSKGSVAFSIMAGVFIGLVVALMLAGLNWNPGTVMKKLMMSMTVVSAGVSSIFAMVKPLALTTSSFLLVESVVIFSNSIGASEFIGFAGGAAFMVNKQFVRALASGTGALVIGGPVFAIGYIAAGLGSVTAAADVGRAAFIMAGIAGVLTGVSMLTGSLVGSAKFPDRSEGKMKVLRNWWPGYSIARLAGRFETSNLLMFFTYVADQLGLLSKDLVRNAHNFAN,16.5548
|
| 30 |
+
NWYNIRAHNYVAGTTMVDAATKPALATSIATQLLGTSDYDTISKLEHNAKEGGKINLIMTNQFPASGKMVIQQGYFGRGSAVPYTNRLPLIQLLSLVDSAATADKEQVLSVGWAIDAIVERRASKMVLYNASKSFLLGKISNIMGSMLVNIQISAAGQYTILTSYDSILTSKFLSYNRPVVDQGAGMINMATGTTVGANGQLLLRKVKEYITKVQGIDASLLAFAQRGLGSVTQASISARRPTRNRMEENAQKGAPGEFSKVTDAGGGHLPGSKMVFKRILIPVFMRYAIMDVRVKMAKTTYCPQTQTPFDKWYYTLNFTLRGTGYTTVVANPDKTGKDVMRTTMHRADCTGFEVAGSVDLGLQDIQVLEMGQFKNFDVYLFLGQGEGSDKYAVAKLTNAPPIAILNGFSSTMTLKAIWYTWRWPTMTRFSLAVLYFAAGHIMTRKFQNTAFMRDGQARQV,18.0029
|
| 31 |
+
DFDMPDGGVVTPLKAGETVGNLSAKGTLFNPPDDLHMRGDHNETLKYHSVTAVVIAGLQHEEIIGTAQDESCGYSAEQNTHCVAIHAAHKGDHDSSIALETEKVAVLCGDTEEGGYIWKERRHLSDSLLARIKAMFDVRFYDSHYGDKPGMSWPALRPWMKRGDLRGAWPVFLGAGGFAFNLGSMLGDGYTWNIYAILPALNGLQRLLFALGRPIRAVKYVKDTFDGTATTSFLLFYPAPSVFFLIAFFFGAISALAAGYMFLLEGRASLPQAITASIVAVSVCWQYNALFVGLMLVGEFCPRFAGTPAGVMAILGQMHDVLPHLLMVNEAVLAFIKTILYLLSGSGEPPLEASQMEYSAIVGGLVRITPAKDLDDPADYAVTGYAMITLVGFAIVLAMQVHLDGMCGDFSGVRFANPLHVGVKVIFNVDPDILCGPDTVTVGTLLFWAGGRFVFFRAASRILLPVFLSPVYKRWGSRVSVVATAFLTCTIIVGVRIRYQNDEVYANGAIYSRSDCAPGMFEEDKRFRNLLPTLEYLNINCYFYKLKGHNQINVHTFNWASMVFALYKKKEFIKQALLGWLNGDKIDLERQKEKSPNSENHDSDDWRGDVTVSGFTRPNCGHQRTTTLLQKVRFRTRCMMSRLLHVPFRRVAVHFFSFVFIMRLFSK,17.9999
|
| 32 |
+
GRFRTYVKFYLRFGACHLPVTVFVFVNVAALVPFILIARLKFTSDPVHVTVEMFVEGMTFLTGSASIMLFGILMAFTDRRSELMSWWFESEGATSAGLYNEIGFWLFITIEFGTGLIGFGLRTVEIARALGFKPVINFMYFAPLMGLVSVLASIRLGMALSLALDMSPVVIVLTGLSGRDDGTNFAWLYGGIGGSGTYGTGLGDSPGGSSFLAVMFARGVAKLGSKVPEIAWAIIYALLPAVLGLGVNALPKYYLGELRVTGIRGIPFGDPAIVTRSLTKLLRQEAPVDLLVEPLLIRHAILVRSVRTMKIGELVQIRVDVPLESFEDSKIRSVDDPLLDGDDVISTTGQ,16.3933
|
| 33 |
+
GVSKWFDPSKVNEAYSLSLRGDKYETTKANKTELFGEISLRVKEYANLSSIYYSSTSGYKDGFKWSDNSSKNKKVKLFNHFNAGDYQAMWEASRYIHLNQAKDCTLSYSAWNGTDAVSVTQAAGDSSLTLYRTINSTNDTTYFLLGSMNGGFSHQEQTDCSTSIPNCSAQFPAANVPTQRATYCVVCSLHNDHCKSTDVSEGCAGKNLLKESCQASFTNYKN,20.0769
|
| 34 |
+
YLLWMHDKSAYMQKSRTPSVQWGYGVAAVEKLAQWWASAKGRGGWFVDPPSPKVQAIPNGCLRNIASGFWKPPVNYSHETSKWKFIYVTLAFENLYSAFWRFFPGFMGFLSPEWNRKANKWNVVGKYDYLAAFVLKFGASYTDQTHIITWARGVRDRISNISLTVYVGANKLGNVLLSLGGGLSFLRGEFQPYNHYRAKFQAVALYDWRMSMTYSAKYLQVLSGQSGLKETVMTSGFHFFRLTAPASVFRTSQRTEVYTLFLGGLGEAQKDKEVYYITLPTLGITYYSATLTGSFDFSFHVGLKEDWRSIRRGHITLHFGAGSHDGKLTLRNVVDITRGIPLKYVDFRGLEFKWRDKAFYIHAKPDPQAFWVGIAPSDGVKSKIGPLPTITRLTPQLLVAIDINYPMFPKDGVDGYGAVEGESRSYYVHVFTAFDMQSLFNGQVHANYQKNKPKKDVIVTAATTPSSEELIKQLTQKCGKRATFMSIDMQDK,18.4373
|
| 35 |
+
MNPLPYKVRFLEWTNDSAPDTCSEAATAEPALRCSNIVGVKNPREFDTLWEKRKTRLESGTLTTKLESPSRMAILKRSIFRIFINFVVALGALVLLVISVSLNVRNNLLDPAYRIGVSQNKIARIGIDLFNGPKLQVAEFKICLGQTVFHLNVLHTILGLLVFYFTLGGADEDSARYDHDQYLPFSFVTNYTFHFEVAHYAMEQFGVGALANLLFLILVAHTIFVVSEEIRRGMANRVNLKKTSKLNPSGPARIIEEFQYCAYFVNQVLKIGKWAEPAAAQFIGRHDMIARELGQKLFDDNPSQSEVNEGVTAARVKVINGCSKEPCGKPPVMAQDLASKILDQFGTYSDTPIIGRINTIMLNGNTENGQTVIDGWLHHLQQRLEVHHIPLAESYDNFIFGLDNTETTLFHPFWTDMEEGEYGNPNYITSGERLINYRRALHNTWGSVFLPLYVFFWNWSILRPPPDAETLLKYQISMPSSIRATAVIHYHIHWLTDEEKHYVQGKITQCQGATIICESTATEDLIEFVTLDPAWSHLTGGRN,18.4536
|
| 36 |
+
MRRANLTRADSIADGEVDSLVRASPSLPRTEDDAVYLDGFERRAPDFEAIAQLSKMRYAGMSGLMDELKKLHDATDLNELISMGEMALVESENRTNAIVRQGLSEVLAAEDLSICDIQIAGESGSVGFGRGLRNLTNYVIDVEVRPNGHLIIQAQCFHTEDKSYEKADSKPLDSVQYDDRKVGYQGDSVNAGIPEVAAAGAGRKVLYAEIAVGGDRGDTGWKLAPIGSVLGGGDGAGIRGWATAAAQIYNWTRLAEGIASIDRGLAINGGARLDGTQYALGVGDANQASPVLFTGGLTGAGPAHVRQFERLVPDHPLSKTLVVLSSINGTVLADNSAVGHVVARGNTGLEILTADTAKVANGYTLPVRGEFDVSSAGNITAVTAIAGPGEDISRQAP,17.1274
|
| 37 |
+
GSTKDQKQTFTSFVGWIIFCSVATLSSFVYQQVLLKGLSQVLDYLAVTGSFGGIGSILCFFISTIGSGSGTVRTNNLYQHAASIFWTIIGFFGIAEAAGLVASLVFYFFQ,15.303
|
| 38 |
+
SGLPAFLAGIYPVIGGSLAVSIAKIGPTVPILQAGQAACHSKLLPSNEKPVTIPVILSLAYGVLGWTLGGLGEDLLGELGQVIGIGGPKL,14.4576
|
| 39 |
+
RTSQIFEAFLLRTKALKWCWIVLHLVTLLLLTSLACAYYQVESAHSQQPVLDCAYHYKRLGDGWWVGYSQGVIGFGVTAFILLISHQEASGVQDETGKFARYWKLNCTIFLTFLVTWIGLHFMIEGIDTFIGYILMVAVASALLGQVLISINEVAKTTLLGNNLDGITLSYGASPEPVSKNLEGDPAVYAQIANSGISIRLWWIIWALFAALGILLFVMLTDRHPTPQPFVEAGYLEKGIMTVLLLALSNYPILPAVFLIVLTSADIRTHRNKVVYSCNDSKFISKLSAYFEQTNKEVTVMMETAEPIVHVGNYSSPVGAIITISAIIVSTLGSLGKRKSAFPVTLTFVVVLITVIAIANNVISPSDQPVGDNSFFLFEITIALGVDFSSFILAICSFLKLELNTIFGSFPKCCYFLLSFVIMLFSSETFIAEPLFSQILLALISVITLPETTSYFGQKAVSFIKFPCIKDGFSILPTLLAVLELFGIVRNLRLLRLLRSFRAFRIVSEAKVFCITKTVLAHFYGPLRHRLLMHTVKGRKEKLMQALMCLGILAFLVSAIVEAIVLLFASYYLSTCYLLPAFSFSTVTLSLLHVYLSYIHVNTILVALVVSIFVIGILMSLILRIHKNMKAQANN,16.9812
|
| 40 |
+
VGSETIGAPIENLPDPLQAPAITAKIPTGATVQYLAQEPGIVGVWLQPRMVAFKVNRAIGSISFLIFFFLTTFAWLYITPGQINVVGTCVGVSVGGVLIGWGILIPGDPAKASFKADKYRWVESLALKFGETAARACYGYLFLSVAAGLEYLNLFIF,16.1419
|
| 41 |
+
MNAMHLVRLNSAGRGSSVAILNDNLATGAAGVSSHMSEDDRIDVIVDFSRGGGGMQQEALAQYLTARLSSDGFLLADINKPNVNIQSVATSSQFEVQPRIQSNMDVLVINWLIQGKDSDFSTLIIQRGKTPYINSAHREKILLSLNSINVADKDIELDFDGYQTGPTQQLPPNVFAASIGTSLAIFFAKGEIPLRYMINSETNGIKLLQYISQSSPADMEREVVLVNHEKEIQQSLNTEKLADSELFLEGWSEKIDNSVYVANLFEDCFHRAVVGCVATARLDDMMGTVEFAAWLNVDSQGKLLISEIYTSFTPELVAGQAVVGGKFSTVDISTGEYEIFEKRAAFGINTQTASALIYLPMPRALAPRVEFWQLIEKLMKASNQSVMISAGVAGTFSGGRGLLYVNGLNAQLVGMLDALLKLQKIFAANLGANPNLSNVLIIGDTDSVLALSQGIKLPNGMSLELKEVNKLNNTFLDELSEIIGDFSGSSEVRSKIWTSTQEVKLGDLTEPLFVGVSSDIVALVANGNIELIIANAGVSPRANLDTAQVFQRGKQVIKSRTGPSLNAKGLYLVLSDQESIRSCQLTGAQNLLAMNIQINLKVVVRDVLSAAAMAFLAKECAIVDIGGCEVSAPAYPEVVTLRYDTQTSRSFGQRIIQKQTLGNAAVNCSVSDAGQSAPGSSGHAKGNNTAYISVIAARVGGGIGDLAIVLAGLIAGATAATAPNLAYKWKGNIAPQAKDVLSSVKNGDRSLNTRDLSVEPVKNELAGTTTLNWHTTFAMNSDSGWRNVHPYPSNGNFP,17.5854
|
| 42 |
+
AKAPGLVGLGIGSVSGLVVGLALSFLLGCVCTDHRWAKYDGAGLAILEGMALNDALLWVYPLQWTLIGGVSLDSSSVSLVLVIVACTAALAGVGRVLRAILRFFAPRTRSQRLLLALVLSEVAVQLVVFFAQPLATALPLITAFTDHTLQVCYGGYTTLSPMDTLGQWVTYVKANSTGGTSLRDPYRALSILLVSFGLVTVAVGVTLKRFTASAGDCQ,15.0093
|
| 43 |
+
ALAQCVLLALASGVSAVLAIIPRKETYIRAKIVSIKKAKYGLSMYERGGRLKGLGIPPWSKAPRSNHHLGVYADEIGILGTIFGYTVPMGALVIAILITFAHLMPSYIKKYVYLTQVEIENYSPVPHQVPAE,16.8979
|
| 44 |
+
KECARRIKGCLNFTGSASWLSFVNLFVKQIYTGYVFAHASLMTLLVWQAAMHHIVNMNLCDEYHWTFTTATSGPLGYRNFTTLAWIDSMANFVALHRHFLVYGGLYHVASASLFTAFVAHFIRRRSPPTSFWAYLNFEQKKFLSAYSHGHHLILGSFLAFLTHLDYFFDKFSVHTNAFSQSWVFRGELTPELAVNLGLMFHVKHFSLFHFSNSVLILALHFSHSGAFIDEMRSLTALESAYGTMRWVREGMGWHRGVVERWYHGSFQVKHTEEGSMQFAKNFLLYLPELPRAECYAAFYLRTDFKGNLALRRHAEYRRKFYMMEGKTLFWRATQKGLECQKTWGAGFARTAALTSSTHAVAHVANVTTGFVLGFFFVWRQVHK,18.4019
|
| 45 |
+
FIATDRKWIPLWILNMMYTLSGMGGVVAFSAFLAITRMGYDIKWTGALFVAGSVIEYVEKLFPQAGPAGTLVVLLIPAAATGHGMLPMICVVCCMRIGFIGWGAKILILPLLGNGKLLAIYGIRSPWGVAFTVPAVLILVAAGLVFEHTWKLVVHAYDIGFLLTALAVALLALSKLLWYKEPILYALLAFSVTALVGFIASAAGSFFVGRGCTTCQHPPSTIFSDNGRGKSVWTFFLAIGVTIQLLALFAFLPKVGTHQTVKDLFNIIGTGDITIMLEQAAKAKRRGVYVLNLFNDKCPKSPAVLDRTVSYLPPNVSCAVKATKDTNMPLVTLTDDMHFHLEDYGHRALQEFPTLPFNHTKCYLSQADLYLTGTDMSGIILVSLNNWMGEIGHLAAHTLECPDERAVFSLPIGDDTFKYLLYHEQTLKGIRLFSHLLSQSSRQVTEGAGGRDILMTHQTALITLRSLIAAEVFVMTNGTLKLIPIGRTDVLWEYRATAYHDCTMVGIPSTNHLTWDGQVVESEPLWPLSGYMSLKTGSVILVMIDTVGRTTYMLQNCIIYLGLLTVRLPASVATMEDRDCVLMGYLAFLVKTLLTEKGYCFLRRACELIAIGVFAAWFMSIKYIAVGAFTGGILPWVLSYAVLGMMFIGILYCLIMFRMGQMVERGIVVYGRSDDVSMQNRLPDVADPMPATSLVLSHEMFSGCLPNDVHFEIREPVGVPMIRFFDWYGERVLPCQQPFKEVSKLIALVLQQLAHMHEKNLDPPIWNVLRIHVTPARPFRGLGAMGVNVIISYMILILVKFLGITISEKWL,17.7597
|
| 46 |
+
LFKSSKINSRNPISMLNIKMNLGSRYQVLAQIQLSPNKINSDDDTISFCINTENLLSWFLPGDHFNFADLRVMWALLIVTIICGGILFATLSMLYGIAPTRSTTKMIKINDQPAFKLGLIRTHVTFSSAGILLVGVSYNLSEELVKVPYIRGGNLYFQFSTPFALEFTSICFHNSYEPLYNWLAGYDPYTGTEVFFNFGPFLAAWGAGVAGTIALVAHACVAVELFKQLKFKIKISKICSTRIILPVALTGALIAWIVPLISSPDILKTTGKNIHDGDTLIMIPVLLKRIFAQMGKPESHSIEHALAHNHAAPSEAQFRLAIDDSYYNQAISTCTSRELKPLLNRNVVRLLCADGKKTIRDPKRILESYCEAINRVFGGTFKDFLFGVVENSKLTKFFKYFLGVLDIADLSNYSNGALTTEADQFLVEFLDIYPEYHKFSQNKTYIRK,17.6359
|
| 47 |
+
FWNWRRRFLGFLIGVVVTLFFVEATGTFVDNWSTIRAMHKMTGMTFGDWLGTIEALLTFGFLIAHLTGGGTPFGWVDDVFVVVTIALFARQRIFRLALVGLRGFRLERAGSTLKAVGALRPLSSTRKLAAWLMGWLSMLAFFGLVTGVLVYVDVRGNWFETAPYTFETVTVVYNFYQEHGYGDDALRYGLALSVLAVSPFIIGILGISFNWLVVPLSGWDYD,15.5532
|
| 48 |
+
TLLICYGASASNYSDSTRANAYLNMPITLSDVVVGLIYAISLGSVFQVDAILLAVILGNIVLGAVAFVVASAVATALERLVGRVSLIPAFETAVSGSISGDLSSYPDLYKRTAQSVIAMAIVGEVEEQVRGAENAGEGILDVLDWQEGGGEARTTLNQIGDGVLQGVNIGEEELRSLKPLEVGNLDVASDVTDYDKAVKIDIQFALRRARAGGDVVLLDARNKSSIDFGTDIIVGTAGFGPVGTAPFIELAAKAGFNVMVRGGIEDGIALTDIEVVKHARIKGVAISGGTTASIGSAARRIGRARISVSVGKARFQSLKKVCDVAALDIQETFAVEQILLLATGGKQIRSVSSAIGKPYIQGPDGSLGDLLASIENTVTVVSVKQNKAAIINDLGPSDLASIEDRTPEEFLETTEDDVAEQNDCVLMNALGLNIVFEDNVVLIMDIELGEFIPAGREVQLLNSRLEKKQSKLRIAEVLLTLSSRALPGITRGNYDIEYHDLAAFALGFQPVFIGSAAREGTTREALIAAIILLLESLILAGMAILAVGVRKLVGFQVQPFSGLFRAIFSVVIGTAIVGLGLAWAYGPLHRLGEEEEVAQTKVGWGGSFALIISIVNAVLYLTAAIFLIILLVALALFFVVETIDHIFTFEIENTVSAVDTFLAGFGCMTPQLQKFNRQLHKIPNFFHTLDEFKGLMAHQDIIADFNRSIASLFLDYINAVMIFLDGEIATKILRALDAILGGVVFSAIIIGAQASVADSIITGRDITLELAVALLLGAGLAASVLALGVGVTIAGVGGAEKTASNEGQARNCRILFYFCVGCTSVIVTGVAVAKSIEIL,16.4406
|
| 49 |
+
TLTFMMEGTQAWIPWYIIMMVYHLLTQTYNMAGLLLFGLLFAGIIGLLASRPRLFDLEQRERINWTMQPLPRALTLVIYMLLPFSSLVVIFAIAEATTYSPPKQDEHPHRLTTAINVVVAPPYNFDAGVSWIPLALLGLAVALLQKVQLTPTNRYNRLFKLVQFSQININLYSGKAPITIAMDSKDTYPIDETMRTFAVLRETSKKDTVYIPVEVENCLKGTESLYPAADTSVNLYLVHGGQNHFATKATMHSIFIVPVGIVGPFLAVVHLIILGIAEAGKREEYYLLYLFGYLSVLTLKLNGTAIIDALIRDGIHCARLPGRYNVLNYVVPKVSAEMKIIQDTLIYWEPAATQWETKLFDKSRILRNSPGYKFAKLLSVHLITMAAYCTLILVLPTVLSEYGQRNSGPEKRVLFSCRLLKSRVKGKSRICFHQVPGRKMTDTAKKLTSGVKNIFRNPGYKYMESNEILLIYTINLDYKNNALYENGPAIQTAAVINNNHAGTLFLQDIDVIPNLMALSPFVFLVCGYAPEDTELVFCWVLNKCGGKEVYIAFSINRNQIEDPLSKLEVIANNIIRVIKDDYNHRKAAAKEYAEWIAVAEASIGTLPLSIAKGFFASEETPKELRMSFILRAMKQWLVVRRKHKHDCKNMDFKQRCKSVATIKRKPMEQSLCVPIEKHQPAMRKYLIITLLEQNLDRWAHEAERVTSHFLPFFNNNSETNCHICECLNEYYQDAELLLQNAKISSGCNEYGAIYYSGIPISGAVAQKMTNIFISGSSVVLLITSYGGDE,18.2811
|
| 50 |
+
VAQMEFPEGTTSSCIWKQGYHNPAVVIQQLTLHRCSASSDTICTLMTSQSNSTQLMDDLLASVLKILVGLISSDYTLIDVGGFTVDVDSLSLVYRNFHTNISPCNIDTVTKTPDDTLTFEQYRDDMRAQVEQTYKAYVSADPPETRSVKASSYTHVYRPIGMPHNIIQPIMMIVEDTPQTESGTGIKMCSNQRQDVVTGNPVEAFQTLAQGDHYKLMDSSTNKSILAVSNGWNLCLGSFDSLENNPTITDKEGEKHYKFMKNADDTLNSYLYLNAVYINDPTLPVTILSSNAGCKGLLEAIYKNNIRILYDSYPAPNAEASGNNVKSIGTITVASCMGKGSMCPCGDDYQTLAVAVSLVNYEYWDLNGSKSNNNVIRTSGTFSIAILTDRGNYTANRSALLKAYINLLENYAEERKKQIKATIWLYQRDGRSSGKKEMSCNDDPSDTGYVAAEYPGAAQVLDTDDLETMPGSILPSFQNFAQVKLFKQQYKGKVPVKWMHGYVRHNLKANYFANGYYFAPSEGSIINPVLGGATE,18.9275
|
| 51 |
+
TGPYKKLADWNERVPTPSITQYIASHYNYPDLVAVRRVLRVPVAVDATGKEVTVDKQDCFFKSDGVVYTTNYKSYPKTIISESYFAYAIPGDVQQKMHTIPLTSNVYKDDREFFQYKVSFQFTPPTPPDIQYPARADNDSEGVLDWTKEVPWYAKDCNGPLAKCYARVNTDEFYESSAARLHPWDFPWASRLHIPAGIYYR,19.3561
|
| 52 |
+
SRKILPIVGLIIGIFSVIAMIFYVLLKDKNHATNIETTPADVETIWNMTGLLSQSIEKAYTNPTREYITHADVLEQLKKTFNFDSEILNKAMNTVTQYMSENQGDAAVKLTEDFFQTCAIETQTRNPGQFASSYGPDHKLAKDQATDETIGEDNKSPFNDPTVFGIMKALLASMTNIIKIAMETLNLFTIESNVIQLLPLVHAMNPRSIEELRLTLSYFHKNLNVTLEEDRQKLASILEILRHLLQLFFLYLFSVQDTLQNWLLNIHFNPPLETIVPTIPPNDNEIAQMLIQLNTDSSSHLITILDKASPKMHMIVLGEQILNQSLKDFTDGLHSVKDWAEPTDVLTRLGISPIDNPMSELSKLWQNVLLYIKHQFTSISNSSTLIGQLKTLAHVRYQLLEIKPALQSLASYYLNIDTTMIMSYNLYAFDELAIKENLEEEEIHSKIPEEQDYLDIIAQDDLDLYLKNLIEYNGSIDQQARNRIGFSVISFVHNKLFEILPWLFGKDQRTIKIGLVIKNIKGYIPGLLAGKMEQIRNLSTDENLQLNDKLVVFSGMKQTNGFAKLTLLNMSPLISYYFSSKAAGLTWTSDFIPVLKISQLIALLQVYFLVMKSKTITGKYMLRYTDTAVKKNHVFSFHEVAGHFEGQYSSPLNAFFNHISRNTIPGNRKIIQTYPLLFGSLLAVIILLLILKLSLYPVKLNLATLFALNIVLVTAFLVKTGKDRLKATALLLLGLAYAADLLLGFKSFGGQGESSSREHKLANLIIFPLLMIKTIFVIVSIFALYYIG,17.5915
|
| 53 |
+
TPDSEYMSQTQNRYSENTCNHQYPTEWSEVIDHTSVGILVVSINRFWHQDKCHQKASFLREGAFFRAGILLGALSVLLCFSKWSVPPIPLTLLINVYVSEQWIFLGLFVGDNNEIANHYQIEVLLDFARPYKRYAYEILILFSTHVIIAVVFRNLVVYSPDSLLISISQTDRLQHGFCNLSAVLETVGILDIIVSLFLYSLAETSVALIIGLVVIGSAFAVHQAWAGMWIPGRNTSRVLREVKWFVIIFVAGRCTLWFYLFSCSQNNLIRQTSMHVFVTGLQICHLFQASAPHLVSYLVHLVVRFTQISSVLRRNNVYYFLGAPFTSSSSLIGGIAICVFPDYSGFEKLIFLSENAILMIASNLLLRDGPRRAFLVAREHQEVTLNWLSRSLIWRKEVDIVLMGLLMLLLIVATGNIFTIGEIARVVSSSITLNSVLSLIYWFIFNGEHVKPFTWVSLREMVNLQLNLSVMRSKRGATNRLQKMNAIQEIMAVDLSGGQRRAVLIARELAIAPRLVILDEHTATIDTVETSVLALLSPLLRKGTTAIVILAMHGRDLLHQLIGLIYLNMRVLDYLRHKKWNDMKLFKAMAELLKKYMTEPGFLRWMDRLLLYQLRNQTVEDLKFTFVAQQPA,17.687
|
| 54 |
+
PRSRLRLFMLRLTGMSAKGASPTMLLGLGLLLPPVTLFYGGGVAEHGLPDPYALGNVTIVFATPSVLQHGVHWPIPELGIMALLSFIPIFAPEWRAPTMMAYGLLTGFLLGAIYGPPVVVLPLLWGKIKMWWKLAQALLGASQLYFTIQTAIPLLVTTESETYNPDSRFVMQLLWSHIHTFIPILFIIKAFTIGLQPLQMQHQPGIWALFALTMFLVSWTLARDPYITPDGYFADQKAMGDLLTFNLLQRIPVGNHPALSPPGPYSLLGHISTQIIVAPFIWYWRSA,17.041
|
| 55 |
+
NGYVEQISYHETITSDKLRIDCLLDLNLRFLAMVLKLDIKPLKGELFAAFDCAKMWNCPVERSKDGEPVNQDFVAEAQLRGKVFSCVIIEESQSEYIPCSAPSTVSLEICDNLGKMMPVLRATFQLQLNLGTIMKSTVKESDCRLPAYHLKLECPNENELVGVPQPGPVRKAIDPISLFEELAAHIKFDKNGERKFVQILSYSKKPIKYSVKFDFCNSAREERLEVASYKLEIVSLQEMRKDSTKERSLRTMILLQSSTISFQRLDMYLYKILYLCFLDTKYADVMFRFGVLADISRLCSMMPELKGTWCTGTFVWFIKWAFKVPTLNLGGQDEQMSQFLRYMFKAKKMVIHDPPDWKMACKDSFMPKRNVRLTLCNQKETSTRQALIETKLLEEICTDVDVVMRGEENTVEKSNLFWLVGDSKTVPRNDRLLVGLGQTRNLNASKFEVSHVNIPPGATAVETHPTRIVKLPQIQGALLFYLLYNCWALTPWFRLSKLETVTAITFFSRALYAKLYVTNAHCAQDMLKMCTAVRQLFSGSNFGYLMFHKLITTKQTMKKHFNVQQILSIAVTNVALVVTVGQTECVRPSIFSYVQTVNQAKAIQGVLVSILSPDQAGLAILFIEEGLTRFVLVDYLKMLGKSRQSKPFTLIVGTKETLEEWALYLTGEYVPT,18.533
|
| 56 |
+
SGSFNLTNAIWVRKYKWYELAPLRYLGRCMVMDKSGNKHVILSQVGLLDSPGDELIQGANLPLRITIVDGDDNDFFDQFGEVFELMNLGERAEFNNFVQPADIATTISVQDFIRYSRYLGKGGTFVNEFRDRYLEDGRITEASVGGGFIANLLDVEDLSLVPEREMVFGEKERGFSEAFGSLNRLVENNFSQENGRAEYLAGSNGSSYTTGKIGYVTNWQSQFVITRDLVSMGDFTQKLFSYHQGNIGSYRPGFEKGARVKFGDPIQDWTNGSDPVTSDWSDNYGFKYFVDTPTSTLWRGVVVSNPAIFNMDEIGKNLKVTSLTSYYIKADGNIGRGTKVLAGNNYQVNFELIYFGTTWTLFDANLYYDNGDEWGLSDWTDVVYNSMVAAYQAVDDGHMTISVLGTIYVLMMLVSISFGTIYVYDLYTAMALAASGYLFTRRGLS,17.983
|
| 57 |
+
NDIRSTTEDVLMPVPKDLAGKFFIIEESVVIATETLQKDSMISFHEFGMGSADVYYTVASRPQYISDSTLSLNDTAISDDVTVIKSIGLTVILDLTAYDVSRVTMADRQSYEDREKVSYREIDLFTILVAEAFSCGILTPTYLAERLEQLGRIDIHDGGWNKTINAYELVISASTFADGKNFSTAVTIVPNLPAVGSEIGRIKANDGDIRDALGWVFGETTEQSISPVEYGMILITHGSPGGLLTAKPPLDNSVQEKLFQVLASGWKRGLYLQGGTLVSRAAYLGLDHWLKLPRGLSIIDVSMIDNSSLGIPLYIRYQVSVDQTDKIYEGGKPIPERMDQNRRSFFGTLNLPLAITQAKNITNKSAHNIGQEWWLKIFFTLIRVTVCMCILGFPHTGIEASFMFYLCSQYYSHWFVKWGLEVLSWENVMSVAGMNKKPGFEFALFSDGVILGVVLFTAYIVILFVIMLKRPLMLPIKRMKFIGALLVWSLSVVVGFLQGSPRDKKKFLIKSAIWAVFFSLVAFPNVFLWFFTWKIARLSANASVFYSGTTTMFLSLLVTTATEFSVVQYTVFLEFFIMLTSGILVLVVWLISSQKTSSVSIT,17.4521
|
| 58 |
+
KARYVRLVVAVCLCPFVRYLEIQLQDELEAEAAKKMQLVGREKFNAEKLTTEDLIAVDAVGRAMAEAQMDPATIQRKIPGEVPANLLEEQLKSFLLAQEAKLEARRRRKKLQASGSKSNRVMARERQYLKRCDCSIDEAKRNLLDTTVDALAARSTIREDILLADSKISQLADTSPGIEYPNAFPEQLPYLKEEYYFIRTSRFAFDERVHALQSNLSLLGFDDDLTDATKAYTEFGEAFGMCLEKLDISGILDFLKFIPASSKWNPI,17.6976
|
| 59 |
+
SLIGDLMSDFSGYAEIVTEEYMMKHWMPLGLIDSENTFKYSYQAKMGLAGIENTGIDTSYVRSPAAGKVPVLPARDAGQRLGFTQLLMDLYLNSPGILQTLVYSWMEVQASWMRETRFGSLSNEFETTEQYLPGFKKARAPLEAEQIPKNGGRVPGGDRIVGVFEDSPVSGRSPEEHFQSFSILYIKWNAFWFFSVQCILTLILIIGFLLTVDGLHPCMQPMRYLSLTEFLMEFEGWVVSRRVVYIRDYDFTLTFMEIGNVAGVRLESYHWFLFWTAGLILGSIFFETLRHYIGAMGVVFPTDPPPSEKSDTFSGVTFIAGFSGAMRVALVYTAPQCCRYGEIAADVGHILAGGGGGYDQSCDEYLVIYPLSGGGWALERARKRGVIVFPYNATPWAGILERFLPLVGTARYIAFLVWLISLAVIIYGVYAYALIARKNPKGLMNEKGIKTARLATGWSWFILIKGLINMLPLRGVGTKVFLSQIVRWLPEYALGK,17.7286
|
| 60 |
+
LSFKIFIKLLIYLILIILFILSLFCKTTQTIGMPDLFVKKVDWIYTTYYTFYNDYWIVSVKGVSVEEAIRDLETSFELSKRNVMQLVDAVVWTEASDINPGTDFYHWQWKKLLEEDPLFAKTERLTLVTTFNCMFLAWFANVYALAITTMPTGLFIFVLRFFLLIYALFASISGSGYKDTWLVPFGGAPIRGNLAAPTGRKAFLDCLEYDIVVTNLGAATRATASVLITLFTILRLFTGKWNMIVDVTVRRVSMDCDEELAGATSTTSNMREIERATDVFASVCQLIRSFLDGRNYSQAVANMEYLLRMPESKIMLAWKWNEQAQYPVFRYFVLDAMNEMRVMNQQWMSELDGLFVQGPLRNVFDYLQEQVQQLRVAKQNSFMRFKTKFARGKELWNVWLSKSNNLCQCSDEGLTTLEVAAILLAVCWMVYGFTGTIRIITEDATPKSFTGHLYYQRLHYLRPMMQKIDNNPLVSLLPPRIILDDSTNWKNLVPELINVYIEPLTIPASQQVYELLVVLHHISPSFSGWRRETMVRPNFATDDVGWMKMEVSFINYDQVFYLTEMYPFSQAPFFKLLSQLRIMSQANFRVTIADLSNEIFNYQSLDFEAMKALDHLYQDLGPIDFLFVNTVLVRILNVLRYIRFLRVSRFILPYLRKIARGVFNIFDWYNIVRILFYAFGVSNLLSTIMCSSEPNEDTCDIMQPLDMYLTIFVLDLMLFLSYPQYGFIKALHGFLYHLNTLGTTMFGLAKNNLIYFTLVFSILLILFLGKILAFYAKRNNLEELIR,18.0488
|
| 61 |
+
ASIHFVVASLVATGLVIGTLIGNLIHSAGVAPVIAIALLIIFFCYIFHVMTTSYMNSMSSQGPVDAWTCVGQAIAIGISGFIAAVEGLVATIFFAGLAGAIISPISIYLIATIAIPLTIGLVLASLLVIVLKHICKAALPSVSVVKGISLALTLLVASSLIWRAADSAKCSNCLTASSFVHATFDAISYGAMIEVMAAAASLGEGVIVTAFAVIIALVFVEGLAFALTNIFCGLFDG,15.3219
|
| 62 |
+
KWLKSKEATKRARDKVYVKIMNRETPMAIYTGHHTYWELATNPVVPDKRFVLGEVCENRDDLPYYHWIEHFRSAIDKGARSKEDEGKDRKTSGIYTFRPLTQYQREEDMPTARVQLVCKGVTVEGMSINEIYFHIIHFAADDLNDMAAEVDWGVTEVQLDALVDEPSECEIVTDKKSRPKHIVITTNDKDLPTVRALVDKICLAEVGDHEIQMTRCVTEEQESYIKYLSRHKSDAVLMAGGAISDIQNCSEGRFPITYTDVCLKDDSKWSSANIISHFRGFEEILAEYINEQRWLNGVALRRGFTVQGVSDENPILLITDIQLDDLELAFRQNSINQTSLVSIGSHVLRDIGYFSRGQWGHEGPNQYRTRRHASCWVLNVRHNAILPTEVVIEDGWIHSVFTLYPSAPPHPLGYVQAQWRGFNKENVKDIREAFLKVRDLEWKRHEEVGKLINDIFNTMGYAGNTFWEAHFKRPLFSGLGRIKEAIRRKLIFIRTVISFENLKALVIAAVVTAYLIMAFIILEGKAWGRFEDYGNTTSGWFNLTGHVPRYRYFINEFCLSWHKQTRCREGNFITQIEDKLQACLKFFGDIINSYKGSLFKHSVWGLCPDIICLDKGISRWNVDWSPERTKEICGINPEPRASKSTWRSIQSICDLFNLDGDEYGTYDIDMRVRLTHNSTTPCFPISIGLPCKFDSAGYWTLARIIFEKYSLAFLRRIQIFSPGVAEPLVMVTKGLNTAFAILTLGLAGGLITALYLTFGKPMEGWIESIRILVLVLSLFLVALVLSGVTHGVQYRTFKDDRIKISVRLWVFTRRIE,18.7487
|
| 63 |
+
QEETPSSDRFICKNIVVLSGVAAILIGLGNILICVTTKYVKVLRYPNLRSVLTVVALAGFVANGLLVFIATNGSPTIGVSWLSMAVEVGFAVGLLMCLATTNILADNQNGETGPSDNDFLGSTQAELVMKGNKVAWWPMGFFVVDVYYAKLFAGVNNRILMGKIKGNTWEKNSWNKPGQIMAQVFYIIMTIAIFLSPLLVLVPMHRFPLNVVATSVSVSVLLGAAFTGLPDVMNWCTASFGIRYLGFTSGLAVKIISLILRISGRLGSIQLNFAEKLGVLLVSIAAGLISGIVWIGGLLVQLFTFIVDSFLNTKAASDQPLDIIIMLWFGITWHVLVLASCTGIFYMYNIFILGQSKNYGSISAVSTGLITANQGVELKGYPLASCCVFQITNMKVHEDIKECWTLIENCKDERNVHDIFTITVMHGKKILLTGGNTYRGVEIRVNEAGQVVPNHELYVLAKEVVYSNPRTLTSVRVAKNVELCGLTVRVADKAAMNSMLDKQISNLVHLGMSLHKMEKNVVLSGGQRKRFAIARAMISNNFLVLLDEPTSALSTSGENALFTDLPVKENGTTLVVVSHRITLLKFGDVVIMLAHGEVRVHHLCIYTKLDEFSLKIATYFMRHIGYFLDIVWAFILACIFGLAIFNLSVFGYNPSASVKLVPVITLFITSFLVAINQFFGQSAKGKLAYMHHMVRRDLFGKCH,17.4792
|
| 64 |
+
WNRAAHFLCMMLTFGVTTVSITTRDTYYTGLTKAVKDMSYTNWLIVFQFKMDTPSRTPWWRFENRWLNVPVLASVIWRGIPELFYGSTLVDHFSGVWNIVHWKHRLPTFKRLRGINSDYKPPLRIIIFLSLTFIPDLPRVLIVLGNIPKVTVRFFMLVMTCQPQTDLKQQDGFGFWRYKKPTTANEHNWKELRAADRPLMTYPTTAPKHMHPFGSLLFYGLRVATDQARVYMNEHPTSRAFLNLILALLELIPDPSG,18.5118
|
| 65 |
+
KDFATYDPTALSPGANRRHDTWKPMGTATKVERLLRWGYLTFAMLTTLTHIVILVFVPFSWSVWGNMRYGVEPPEMKDQGVVKFFLLVFSFFLHTYVLFTALR,17.4756
|
| 66 |
+
AFVPITKFYNLRREGTIFKTPELRKMGIKVWLVDLAVVPVAKPGVRASARRIIAYILEFNKKASKLIIRVDASTGFFLTDNLIGFAFKQGIRKVRFITDAPKSGSIIQALFGQHDVVISGADIVGTEFEVGHELEELDIAIGIGAREATRVFAAISACVPSQKIIGGGGTVLEYCATTGSATKGIFLIRGWMEYVNLLPELVRLSAVFSMARLMSTSIHIRRGQGSTPSYAILVGCFVLWIGILAWLVGFFDLSEQEWLFTLPILQLGLAAFAGLGLAIIAKELANITTAFGVLAADLVGGAFCIGGFNAMVHKLPFLYNLTVGIIGLISLAGYIHIIIGGSWWPGPRDREGVLANFFWTPTSNENEDFDILPLEAEDEKTSIDNPSKGGEVNRINLFDDQVLVKQSMTPCGSNWPHLPFVFPDWIMNNLFKAIFWKIVEGSINGAAIIAEDAIARMKVHVKPISYVNESRRFLKLAAFMVHVILEIYVFFCITLEFEQNLFGKSNGAGVPKLLILLMIALFLLAAVGGDITTKWATDVVQQLALYLTPDLLPMWWFETALGDAFECERPGIGTSVQYEKTAFEDKGEPTFDDTLARLIPEVLNVVFPETSPNAVILHWVNFLTFMLALAQSGICVLTGSFFFNQARSLRLCQFQKVTLLATSQDQSADRIVAVLKKWPPEKSGRVAYVNRIFVQLLVDPNKMRVLSGLAFTASVEVLSSVHAKRGAFITKPFTVIFVLLLVLVPLVGGYAFRILQNNFHYLQLLCFIDRDPNLNY,17.2646
|
| 67 |
+
SVSDVDVVKDKGITHHNTIVAAQSKIEIRVMSVAPVTVNQKGTLILDFSNREPNEISVTDKSTAGNCIYAEKHYKKDCVLAEEGGKVRLAGVGKSSSQSVKSKAAIAVQPQAGFACGQNGAGAFQREDELWDELITRNKIAATVALLLGGVMGKTVEKVNSIILLREKESQYIKSIAVQIKGGDKYQICVALVLEQDVLFHGVDKQAPLRNLIIKMKVCNTREMIPKIYETCKDAGKTDVATEVAQSHVLREELVAYTEIIVGVYSPNLLEVVYKMIPDDSVKLELPFDVNGSKIMAVDGKRVLKEKFMFGWAIGNRFGCIMDGKHEKVEKDVVAATLMGIDEPGTNVSELLKYLSYPRAVAAENTLSVEDVNMTMISNHINLGDVSKFKRECLALMDYLRSISSLVTLYNSENVSWQAKTKTRTFGFSFNGNGFPSSLQLVKLVTIIVANYDVQYAWYTGAEESEVNSPERFGCCYKCVRKPIRAGCKTSKMSPTFIILPEKTWEIDDHNLEYRCMGKPALSITLKYDRDDDNSKEDNKLYALAVGLVDSAVTVHGWETFQVSCWIPIPDKKTVKMPGFSDLYLAVSLCFPMDEEKKLRLHAPTLPEIVFVTHASTYIGDEAELVLHILKRNGVCKSLGFEDNHEIWSFIAWISQYHSTNWRHSGSVVCGKIRQLLQDLIPSADQDTQVQAYCEECKNQENANIDDSTLMLVIAYKNLQYLRAGILPDYYTFDNIIQVGSNNVISGAAMHFLDQIEPFFVANTDPQKNIQLIRNKEDEFYWRFWAFDAYANEERTSNNRDIAKFEIATKTIPRLQYERKYSEALEVIKGFSIAYNEKY,18.7321
|
| 68 |
+
GENFEELFARVRRQHPEAFFLYLPVILIIGTGVAMELFPFAKKYWRFSSALGRAFLILLLSILVLKLLLGKLDEFRIESWALADFLHVVQAKTAPISPTIAVLRYFRVFRALRDNMLDRTHDLSKPVIGYLFMAGLPILLILSTAIELGVIQLDGLTILPLLTSAAFWGILPQRVTSGGDGSLLAVLTGFAPSLKEGFRYGFIIGLFMALLGFIYTIAAENDNEALRQG,15.1469
|
| 69 |
+
PYVRSGNVLMAMPQWLQDMKKTLSSKRSQKELVKDGDRIGQKLVKERKMSTVAMDLYWMSLMLAHPYAHPVLSGQATVYHAVGDGAVVKVHDGDTLFGVALYFSENMWFAFFNFAPGMQAPNVSSRDGSIGLWGHLLPAPNFSFAQLMMIWFVVIDFLAGLSRLLMLYYYNLAKTFRFHLLFASTVSFLEVQAVATSFWAYSGNSALLVMVSFLITYTGTTFLAGSMHATGFYVIHLTDILSQHVTFLMTLIEAMNSHQSATSMSGSRADGKTTVNIMLYSASLLNFTTFKGGFAKFMCYISALLWLVILLYAFVDGAACIGFGGRLRRFSHMAIVKDQSWKFYVTQKGPGIIQAAEYMMNGPNIAWSFVIHVTASHHRGDIIVSGGWIGSLLPMAMQFAGQWAPLIVRAPKNPRVLKLYLLVTMLPAGILIAITVYTLWQPVKKRPQRPSNESNMLIVIVGMALGAGTACLPFVLGEYNSNIVVAWAVSPVLVRNCFIIFMTVPQMACMQDTICSVDRGEHVTGLNSLTVVSVTVMSMPSYVIAVQTVSVSKSMLGIPFPFVELSLKADASLEQLAGPINIKDTVLKQCGAVVIILMLVFGILRLTFGGVGVVDLYSPKLLSIAEAKVVFLFMTITWGSGITNSTKVFD,17.7269
|
| 70 |
+
KWAQGYAVLEVVFTVPFVFILLFMFITTCILYDAKTDFVEFVLSIAFFLTNSGIEWKVCAVTASSDSQLLAVMCLVGLAYYKIYDYDCCDCPFSIDPKKREKTVNLKQCSQLIAFELPPKFVAREEVVVSQVPHRFKNSEIEDLTFELEGLIYDHNYPIEDGFEAWRVHFLVDVGGGEIGELAYPVYHAPVMSIGYISQRPIGIKAIVVRNQKDQMAELINEKDVLSISYSVGLSYELNEYQLTTIRNLRNSAAGLSGLKIAVDSIIGLCRTPGLFPFNLSHATSQAITVVLGKSKRFNLDLSKIKGVPALKSFARYAVKSVKRLIADADSLPPTLVAVFYKTGNVSTVKGLSPPLIKLNVLSDASVVPLGKKINGIGSTGAVCTIFNGVCWATTIVSQDDVPTVVVQITQFRLGSQLDRWGKRAQTSDDMFWDTAAGVRLIVQMGVGSPIATIIVAVRPADMYNNMVENSEEKLKLRNNRNRQADDEIYVAIRMTGGNARQVRLGLSEVQQKQRFVLDIPTAGLIFIGKEFTSVIAKVAGVYPTILLAERTPEDNSVSIYLRNVNYIKGRPTSFLGTGFNNSKGEFLDPFFTLDPGPQDAVNGLRIAKEPESHKILEEQHGPPCLTYNQHESMLQILKRARLSIAVPDNRVAD,17.9063
|
| 71 |
+
KSVVILVGCASSDPSDSIEFHFFGDNTAITKGRIGRRRFVVIGGPSADLDDEEGEYGATHVTVFDIAGSIMPIGFTRGMTRLYGISHLTEKPLPGGFVMVLPPGGWRELQNLQFYEAEKYIRLSESVMQDVNGGLTILEDLGDEIIKTSFPFGQPPDKG,17.3791
|
| 72 |
+
ESNISKIFKDPICAEFKKVLVSKIRPIKTTAVLAGLAGFFCGGGFFLGAITADVFMIGTIVMVYFAAVLKMSDARGYAWFFSFFSRFLIGATNFADFGELIRAFLKDVNLRKEKVHKGNYLALFGVFGITWIVLIMTSLLALGEFIFMVGDIFKQSGKKAKAKLNAEETIAIANPVIYALIMIISFLLSVATILTSSTGARAKRIQSKRRNLGVVLVGIFALIVVAILFILIVELCTSIGIQASYSLLAERLIGTSEYMEGIPNTNEYWNAQGVKQMLGVASLWHTKIYEWWNAIFGFFVIKLIKFISDQAFRDWKNGLHSLQIFVGLSVLSAGAGSISSILVLSDIIKNANTGSFIIVPVVFFIGLMINVAIFAIYYGD,16.4909
|
| 73 |
+
KTYSEDMTFLNTPIDRYDKPLIDRVPPEHHTYVRKIITVFLVSGILAVLLLMWATPQMHSKVRWLEAGNSPGVGRIKLDVRVPEIHPQTMHILNAILRFTKKQKDGPVLVEAKSDGDTIGTADEFAPLAAIARIEQDLKASLVVRQIVPHACSVPYPMWITEGDKAWDGVFYKVEECDTMDDFVRILNFMIGAEYLPGSNTTNEYCGASRKIVCFEPVMIRNGDDAEWKASVVVTMEILELVMQQVITCTDSAEDGFLISQKGQFVGEGELGILSVNLEKQLYKAVEIRSQDDRLKTLIMLIVSFIAVAVGAAMVSGYIPRRRYQVTVNKVPYRLQDDAPVEGDVFEHGLTEMRIPVLFSLVDKLECVAAIDRQFKLRRAERALVSFGTYLQQGKSLAQSWAPVFFGFAEYLKTIGVCIIDNVEGKYLKNTVAATMTLFLLFLCWLTMCFPLQHPPRLPLADFRYLRNLPGGSTTMLTVFIYAHGLDETLDTEKGFWFWSDTGLLGTNGNMSGCYFVSFAVETQAFVAMMLGRSHSLSHRFHGLLQYTTLGWAFFTSFIPFVRDRNFTERHYVIPQHTKASTITCQNKSVRTTDKPQARKYQEDLSHETTCHCQTVEKTFNHPRVKLTTVKACGEVWTECPVDIAYTLASELFYSFDLQGGTLLRPQFEHPWGRSNLFAAFFHMDEGFSCHLGRDPDMQEYINSSSYLLNSTNVSEVVLLEFCDNIPPNILLITATFFYGNMMGKDAIAIPYDEYVGKHAYELAPEAAVIVLIVFAVKFLLRPLLLKCLWAAEFADHALNRRSINTPTAFPVIHLFFDVSTVAAIIGKQKNYDRFFPQLAVDLEALVDEEGKEEISFLIREMQRFIDGVMVMLFMRKWRKRRTLAQLRPPAVSSPDAPRNCLNVPDREKACSLNNELKTNLAIAADY,18.5435
|
| 74 |
+
TDAIKVLVVVIVGIITYLLVMWYWSGVVFEYGPVFIFFLLITPFTGEEYNFVAIFDAIAK,13.894
|
| 75 |
+
LGRRRIYGGLFHVLFFFTAFIFLVQGLRDGGRLGVPPHTLVALFEVCVILVLSASYASTFYCDYLSTTVHIMHIIQQLLDCYYYHNTVLTDNNPWAIDPVTLSLDKTYEEEEDERVEDVTLNILKKQYWHQAFQFEKCRMNMRQEEEESWLLLGNPQKQCESCKVRVSDPIGPDSPRPDVTIRISSVDISRVLLILRGSESLACEDKVRFSLYRAYQEPSPLFAEEFTEDLRCIVHTVRVEENALDTLDASPELFYLPSCNAQLIACSSFLRLNFLLKWTERLALNDHFWANKGNLYHKSWQSVKEHEYVHFPYCLVRLGGQDLERVEAHEKKKPLNICLSITVPIYWGDPRRKEFCKICLETDYTGYDMFKKRRLAMIFFLMFLFFWVFSLYLVEHAGQAMKNKGKAVKLKEKLDRLLDCSEMKPKPIRDGNLKMLQMPGTFIDCSSDKGVLSEFVAGYLLIGVFFLGFLVYVSCSCTILLWLYWCIVIVWIIIMFYWWVSAVTVDLQMAKFARQRERKVVAQIELAWRLSLFWVLYPFLLLLFLLSYFGVSPLPKKVKGIGRAKDHPKLFYQLKILFNLTLLLFGSLGFNDSMKFPWDLVLFLFIEFVLMLYFFTVANSQGFSFLYRLTKPLRNVSALLIIHTFASFLSHVVKRIYESALFSMMLQTLVVSFFQQYVVIIYQVTATNFISRLIKTLKLNLPTFVITMVFAFLANFVCKLLMRVNNENYTFILMAVPLVVPSLFVPFTTLGLNSVAMGYFLRGFFCETLQEKAKQSVVKFKKDEPTSRYELTPTR,18.1273
|
| 76 |
+
SEKSISRALVTTMRYKVKFTHGQAVFERQYRHVLDGPFGERDVTGKLRLPPDPDRQLSLKNVYIVSAHFDPKGVEAKNDYVSVSDSIAKRAACVIADLRHQGCRIMYPGTAKGVEIMADSDGVHLQLLLQATKGPGGKKTAIADLRVPLIDYDPMATIIHAHVQGGPVFLREWTVIPVYVQLKFKNDNKVKINFIVPGDIQPPLIQDNDHWDTNRYKDDGQYKFDLLPEQIEIRGGYEDIVIDPGFTNSFGRVLTHCNSDAVERRTLPLPLWKGLYTRHKARSQDICEVPVLDQRVVLPSSRVRSIPELAIEDMWTPSLSDSVDKNVKHAGNKCNMTNMKREFTGIRPGDFKQALLGLTCHTGREMNINCLKSVLKGNKAKTMVFLHGPMTNTLGALEKLNKENPKRKYMVFKAFHIDADGLSISLMISSSGANLPYSTGLHLLNPDGQIVALNVIAPISIGGQSVEHLQENLLQKVLRNNKAKYSINTCVLSIDWMADLTSRPTKLLPRLYGSGYSISDIVTSPSDALFDIGAQEFVAAPLMQGPIDWLRAIAIDEAEHEGMTMSNVVAEAIENARREYTLGLSGVDTSGIAIGHARETVGREGTASMPTSSAAAAQGFWWATSILNLPPTMTALSDMIGGQVVNGGSIVVVGDGVSNIDEEQRIMAQQMVEQIIHLMSVNQVSALALKNLIEQENNTVGQDMLMRPDQTAVSLILKTSAISELCELTDYVYQHSVVQSQRILVGPSGTAIEQRTSKAALLDKQLTMPALYMEGDAG,18.4702
|
| 77 |
+
GPVSNYTYTRRDGLRAWFAQTEPRTIAKPDPADYLPTHLPAREKLATYKRVVIERITNSMGYIEYLDARTFNYISSPDANHIIIETVKMIACMLFAIGIIFSVHDQVTATQRTAVALLVAIEVLPNGVCPSGTNHPSVFQKILTGEGIFCADVASVGAARAVFITPQVQGGSLLATKVIMHEQPRPTEIVRDPILNQAGVHALCGTKVEGDVRQSANFTISFYSYSSTEGINYESTQSDIYHDKSPSNPITLLCVRYSANGKRDLEDGEVLSRPVTTQVTHESDGGTEKVRKDNITDIVIVLEKAFPAAIDEFRITTIKILIDHPITEYIVVCNDPNPVGTFRLAKYILNTYPDGSVVVRHENTFMKMLSSIFIHVNPDPSRLLNVIPVTNSLVKSGRYVMGDSDVVEKDNMKAVLKPLFEKVVGSWMGNSTFAMVAGFASLASFVFAFGHVSTQYAGGIHSEPLSILFGVNFSLSTTWVSAFYLLAIILMGILAGLETLLEGEQA,17.9375
|
| 78 |
+
IGFNTTTLCVCLMVALAITLTYFIKNKKSAYTIRLECTQASNCEVINFPRGMTTLNSLPSDDIQLFHTYSAVRLALCLGGSLILGAVLKIIFTNTELGRVLHAKMLKNGSLSALAIVTIFVVFFIAMSNLALIHAKGSNKAAASIPCGFLTYSVLALLGVFMSKCWNRQMPMLYGLSKGHC,17.0854
|
| 79 |
+
NGQVRERMVVLALKDPANSDRINDHSMHIESYTFVYYPAQGGCIHVGIIRLRKPQKLSLQEVLAANGDSVILAGLGVDACSFPDHIMENFFEWGDTQQKMDPRVGPNAFVYDQAFLDEIEGHDLFFLREILDSVTIGNRLQNPLQIFRPYAQARNKITRSHLGCSVTKMMLRNLETVCTASFQTREPELEPFLQEMRADNVSPVLDLLEEFSFKIPPGANIVWLAPVAWPILQMVKRQLRGTGRCPEVNHVSPGDVPKGATQGTWDALDAAQLFIGGDSNIRGSRFFFQLCGLVRLFRTARVASVFTKADPLSRTALPEQGAGMERLVADIFESVHENERVANLADLDRELCDYPARVQAEEWARACGRAKSHAAYLQSGTVDTNVKTHAAHYVLKKEELDFAMGFQGKTLALSGHRCLVRKRVASTKPEIFTQLEQLRVNNPGIARAEYTELFAQGSFVVVVALAEFRNAVRQVQDDSLCQELIEKAQMFPRVLQEVRKDPTPKRLSFTLTLIVVGLPRSDFALLNDAFLACFIPNPRGVANVILDTNAYDLEDLTSKPEVIEFEGICDLRGVAAVAPKTPIPAPRPMEGGKMFLISDMEEDGKGYDVPLPYALGAAVVVAACDQCGHPTFGTNGIAERIVIYVYLHFPAVVALFNPIGWACFIAHSCDPAFNSSFCRLGLPMLIMVVAAIVSGTLVFTLMIVTETECLDSGESKQKALAGDMLIAFPLMGLLELVLMELAILKGSAPSSSRHVHKDDGFVAMSPLNGLALFIVLLIGTFHGSTTVSGQVRSGRRTDMGLQGITGRAAGVRGHHFIMLSRADNIVTKLVPPWATALMLLLLPFLLIAEIEMGAGPLTLMDGVRSWISCLLVAIATAAFFFLIGTFQWVLGYWHRSNDSFISILTALYLLINIAKLGFGFYLH,17.9518
|
| 80 |
+
VPISYDIKVPTGWFIDGDKVRYKPCQGLKVITLVNDWWIILEVFACVTLPSTLILEKQDYFHKRRCTTIFESVAEFTQAYQVSIQESTQQHLTMAVQQVLGIRNRVDRKYVILVANDSPVVCYLEGSKVLFTLLQGPKPSSTVLLGRNGKTGLLLRDVTYMKTNGFDVVGGDLSVGVKNAVKAGFYPVPLTQIVQLSPVLTQAFFDDESVTVLDGPIGGHMSNKVNSQAQQNWKINNDNGFMVKREARYLGVTVIKNDLVRGFEDLTDVLGGCSKSILGALEMRDASELNHKGISVVAETISNAMTLGVSELTCCTKHRKEVWLQRRLRGKWLRLMLNIVFAWQNDLFFTITFVPNIVHFQRKIFMPAEAVFNFLIASLLFVLIGEFGFFDVEWRRWHWRRFNMIFYVERYFLFKAFLANEVKRGIEEAKKLLSLAFTFVLISALHLVNRIVNLLADCSHSRFLVDNLIDLRFIDIYCTKKYNHMTFMLLLLAATIITFLLIGINAAMVCCARNDQVLQLIQSLESLFNLIAHLNYMTVNKFTFGLMLRMNAYSLLIIVNSLTGYAELRIASAIMLRLEQAFYDLMSRFDVTLNGVIADRVGVASYSELAVAILQLLELLVMEIYEYSIILGLVAIYFMVTIGCCVKTLKFQGLDAFNP,17.7417
|
| 81 |
+
IFFLLSNSQDAYADKFKILVPLLWFLLSVGFAVLLHWKQSIMKAIMFNLSLVAFLCWLVWAVSNFLYKLSDIKMVFCLFIVVIWVTALWSLSTPISYRYTVKNFVVNERITGMFPNLILFAEIVPAITYIYFLFTFLDFTYRLQALNDVTVLGTKPMRLIQVLLHLRVAIGFLVVNLVGTFYDETMEGGEVWELFNSLTPDINSSVTVIIAVLFVFFNFFFVVLTPQHTKPCKKADNSSKPLAILVNGVTLILVLSLGYFFGSLGICAFSANSASMLQAFSMHTTIIMLFKIGVASAWGQVWNQRTDLEVTDHDPPLFILTLLLAGWVIESTTNFGGDSEIMNLLGFLAGVLTSIEIFGLQINLPTFGSFVQDWGAIATTGFQSQEQFFWITYGLIFVLPVAMLFFKIFHEIIEFNTMLIICTALNASFSLLHLSRALKFETKMVGKKRCSADEEFGARMEDAMDGAYAFFSKLLNTLFVAVFRVIVIYLVAFFIFKMLWIFIPTIVDKTNEWSVGGFVCELLFSIAGNLMGIILLAAPNFKMWFLLDVNTVFLFVGLLGLINEISGLRLWEMRFSNWFIKFHMWFFLPLILLLPSAIITFSGFIGAFRIEIVYVFSLIGLYSVPLILSALRQR,16.9356
|
| 82 |
+
AVITQRVIGIVAVTLLLLIALAGGILTPMGETGSFRFPEVSWSVLTLLKETGWGSAEDGPTLQLGRLVTRAIVVMVFAALIGGWIGAILAWLVGRRS,12.8882
|
| 83 |
+
FISRALFETAVILILVLSGILQGMVLHSSAPITDDMELHKLLAKEIYTAFCIVAAYLVVCVGYPLEREDRLMIGVYGSCSAGPLDNVKEWGYRTIAPLLVAYFGLFTLWYYRVFGEDAEKIWIQTAIFVAAVLGMSILNLLVYLGRFPDAKARLSILLKDLAPNVLFHFVFFLINAVALMPFIYALIESIIQGVGIKKFIIEQNGVNVPLTIARALKIRIIDGVQITTPNQNIRRVSPTVDDRMGPETSRFHNTLEQDDSIVFIFLFVQDLKILGLGSQYFTLFINVPAFYYVIDIINVGFTVVIIFVVIELIKGRFRSLVGLFWVGSSIKSSDFLAGIKNFVIFNIAEVPGVLITTIVEIAWGADFNSAKITIMDAVLIVFWFPILDFAWTNIAFATTGNFYFLIIVAGLGMKKADPMLLATLIYAVLSGACTPMLEVIMESVYVVGAVDAIALFIAPMFLRFNLPILVTYETRRPNLIWLMALIYFVDSYHLYFKSWWFFILSIWGGVIGMDVVGLVWILGNYSTVSIIGMG,16.8602
|
| 84 |
+
KATQSDKTFPLEVSFGFTASSGNIVDAHAASMATYITLQAVADLVDSPTECPISKDISTEQKIWDCLPTVNTFTARTGRQAEIKSTSLGQNLGLPYFDSSKSDKLHVDMNADGRTISDTLFLRDTQKQMHDSRKIFLPNTAAPNTGTKDLIDEYGEGLIVNHDTNDASGYLLTDELDCHRPTMKAGSLNPDYPSILRGLKVNIEDIIQDENKVSGFYQVLSYLLSKGSRLKRTIKFCHERDRYIHSDNHKFVFSGIGDQAKMEKELDVAKATGVTIGLEDSMVKKPSTKGNLVGIIPMNGTFLLVPQDPLSGQYGSIIFAHCIGKLDEDTAANIHTYYKAWVMMGNLSLYDKFMSAKSFSALKVQLRVLIARAGYIPVLQVQTNILDLSITEDANIFEEVLSSVSNPFFGAKKTIRQIDHDRNVGFSVNPRGIDSWRNDMPIVLADVAGSLTLTTLGLGFRAGLSDADLCYYHDLLLEGRVDNEKNAVPEAKARKVSQTRAGLFKRLLLQMNGTDFSIRGCQGQQLDLIASNGYQSLKNLQACKKNGSVTLSISVVMRMYRFVADFAKNNEALNINKYDFLKHTDVWYYPGEHNRDIGDLNLREIKFKPFFTCDNSPRNTVAVFKQLYTLPLDGRYFVSTKRETSKLITGEIYSISEFGEHKGWVALANENPGLVSTIRQTGRVVVQFMAGKSVKDAPISKRIMLVPCDKVIVMYKSLFYRLPEQIDTCYESEEFYDTKKDCIKTALLLARKIGYGMNALGHDERLSALQDVLRTLKPCDVLTLKLVTDLGQATGNMILLIISVHRSKLRKVISILGNCEAIARVLQTMKVGGVDAVILLGNNINDKDIEPTVYVGYSIMKEGINVPFTGGINNGAVLACMIKILEPIPIFVAPARPFRYYLRALLGFLDLGVGYLSNADEKATFASSYRPGMTAKELLAQLAGELNIPSDLPRTMEIVKDL,18.0526
|
| 85 |
+
RPKTAEAIGLLRATMLFALILAFIALAELLAGAMGLPGSWVGLPVPLIVLAILLVLIGFFLGLWAIGGFER,11.2268
|
| 86 |
+
AGVCRVPEHHEMPTIFCDRDGNNGPILADNAQVMAKLEERQQGRKIRAYSSSHFPTRSVFLRMGSGALAAIQANEGNVRLLEFDSSEPRAPLSKESDLFGYGGIGQPNSPVKNDYKQKIFGGMIITGPPSKVSLSLSGYAQAQPTTSLGTLYGDAAPAENSKVKSLLAEGNTRWRFVDNTDDATIMAYGMRSIRFERPFGMATLNAPVNTDLAPEGRAIIKCPTAFTFKFTGDDEYYTHSPPAYVGAIEDMKKEKYRPPVSLSGKRPLVIEDKRAGVMDIYSFTPKITVVPGGRNDVYYDIDIRISGGIEAVDRQPMFMFAVAIALCAALAVGFVMAAACELMGRSPRKAGTQYRKKRILVLTSNTLICYLLSPLLLEATLIDDSSCDCDLDLEGNRFDVTYGLVSCDDLQFQTLFSFFFFNLIASVIFVELRHARRIYIIEYPLFESPMSLVGCVFLALFAILLPDTVADGSGDQPMTFYTDIYYGLTSLGEMVTAYRLINLATGFLIGIIVELAATLFIMLAAEFFRSSQHNSADPRKFQREQRSFLRLRRELLPTTSGVVPTNW,17.8225
|
| 87 |
+
NGKNLAERFVWQQLSAAPFTMIFTMVQIGSPDVTEYGWNFDKRALAGVYISGQDVRLIGGARNKNVTVTIESVVIGGYGRPSSTDLAGHEVLAYFSQSPARSRFTNIYARLQNTRGGYTAKFTSSFRPEAQTYETGALSIRFQGTADAPSHLQERTIGQLESDGTLTGDEKSYRTGINRGLIGKWEDAMAHTLGAVGTSGSALVLISGNHFGNGYAFYGAGNKSLTSKLVYDNPFTQVNTQERFAKDRYPDLTGLELLPENVQVTAVGNTSDWLKGSIMFAAGALAGLGSGQIIAGFAAVRNAAEGLGVALLIAGGTVVGSKAN,17.1103
|
| 88 |
+
AGLFPNELNELRRRLASDATTFIAPINFKVMLTREFQLLHLVFGFAVGLAWNLLMGQNWPFFPLIHGSADDLPKLTSFGVIVHMHEAVEPIWAWYLSLISVQIHSGKFLQSIANTRLVGSLNGMFPAWQGGKMIGRLIPRHKIAATLAIPSLPVLWGVTHIDLMPESLEWSMNLVEGDIALFQETGTFVDIFLLAGGPRYLFQVTFDEKINRLMRKRPILIVSQKIGSHHFKDVEEYAIAMRQGIHLEEAEINIPGGKVTYTPNYLAPSYREGRNRTVGIWQTFLDEAWESEAELAILHKDLVISGEPVLYPHQFRQGGRELVGKFFRLVTVDPRAFVAMQNGAISKEELVAPLITARERTSWYIFGIGAVSATLVATGPVNLNCTQIAYGPSLSAGLAHGLIFLLIAFHLYHVLKCSQAFQGLAAIKALNLIKPTEQALPERIDLDPLVLFNVGHTLIVTLFLYLSILGRGDVGLNLTMAGVVGVMTVLTYAKFRHCPIQSQSNLNMDRADQYRVIDGLQYVLKEIEKDFETATGL,17.607
|
| 89 |
+
WAQKLIILMLSVILGGLFYFSLLPLLHPSTTLRNAPIIMPLLVLSSIIFYWFIHDNMIHHFDWNFKEFRLISQAISLNAFAFGLMLGTPDTLKPGCQHIRNPSFILYYLVYFTWYTGLLAKQQKELLKSLWSFLIIDSPFNLSVMRITRANLIENFSISQGNYLGQILLILLTHHSPLTILSWRGSPDRINDHASKNVPIKVDNETDNGELDKLACGALWSYYSQLWIETMLYRPTNGTEKNQYKDFINLVNLESYSTNVTSDVREGSPKAALLVNELHTYVILNASVVLFVTSRRDYNSLKKRNEA,17.6565
|
| 90 |
+
KGYDIRSNASWLVTRADGKRANAVAEPSALKPGPQSGVGNILPKSRASYFILNNIGAKIIYLLDILDTATVGALAKAPPANTNRDNQAKFKFTATATGGASFSGTVPTDIVGIKVGPTAPILWGQVVGGGQAAAGGTKGVTVEGGSGYFVAGFVLDDKENSLLPNSENVATIYIIPRGNIIVNNISEKTGPGVIIAAEGLAAKGGEMLVARGNSQSSTVVDVSKKAESKSIITEELLKTAQGNNFRADINQLVTSLVDSWDLGTEFTVGLNNATPAGGIFASGGTAVNGKQSNAAVAYGGVQIPQNGKAYGTMVIYKGSSQSKLDAVKVRFSTTNAPEYWFLVGAQDQLAGNNTGYFTGKNALAFAAALAQTEANKVVLAALTNKDNPVPQNKSGVVAKGIAEAFTEKFTVDAVGTDSIVANFNTKLAPGQIVFIGPDLEITIAYNGTVLSDAVGNNAGEAAILNPNISKRIQEKVEIGFSPKKNIGEEYISANGSIDSIGKCAADETAEFSALASTFTSVDFLVKSEYSSYSSNNTFDRFSLRDTSFTDDSNSKNSTRLKASDSSKFYDNYKMVKTAVFNGVATPAAAALGSLSQFTGTTTLRLEFDKGAASLKGEKFSDSKGNSVTEKKFQDTLRVNTLGRGVAFIGVKVDSPKALVIAVAGALGIGGNVLLKGGKIVASSKLKALSAKQARNQLPLFGEYNFLSGLFSNGATVAFIDPLGIPATKAIIYPGESIPIEVFTKTPTRVKFLEKGAKLGNTLSALFVFTETANLTSSLLVRANPGVAGNTKPKNLSDTSAGTSPQFAVTAISHRLALTA,16.9213
|
| 91 |
+
VLGVAEKKDNDDQAQSNSSDDIKKADRESTLYGQISAGVQVGAVGTAQQQVTFQLGYASLWGNSKWYHGLNKRDAVASGYESLMGSMTQAGNGISVRGQNSSSDHMSSLNNNSNNQYAGDNLLFSGGNVIQDMGMAQSLSYQGPFSGIQYSSQSYTNTNIFWWSGGDNASDIKAKLVYKAVGYDNYLGEVPGEATIQVRNLKFANNGTLAYAVHSQILLNGGKVAYNGRCMVSNNSTVYYSKTLQSALAQTWYEQGLVDANTLLVSAQGKKSDLYSLAKQNIAGNRRASFAYGASANPSAQVNASLSNTFTDTYTYFSGTPTYSRSSFP,18.6621
|
| 92 |
+
VGIIMPDSAAAFSVAGSLDPLQQVADAIEEFAEKKLSHFDIDDTSFINIVSYTKQVVRALFVQPCRQMIQPFRDPYSEIKFVNLDISKQLMPSPRRNSSVVKQQLIPFGKVWNILHKVGLNIIFKDVTVVSIALALAANLIKKSEFLSLKMAGRSVGTEEKLGFMGFIFMNTENYTKGNIPGKEIVAMYFLYLQNILFHPPEIGSPEPAKYSEQSGTYPCADAAKKYKKYPLQEKFMFIHASIGVGDVGKKVFSQRPEKGHLAEMLGAAVLFFGENFPQADFNYLPSKEAEYNLSLALFKFGTQFVVNNQPAFCYNEEGNGWLPVNKLESNEILDCDTPTKGTKVSGHLTPVTAGWLHLLQNLGMMCGSRQAQWQVYPFHNATCANLQHTKISPMAGLGEAISTGGRIRPTYRKILLGIPEDHYNPSSDLNMIESQLVQLDKLKEYEAYHFGPQMYAQNFPEKTQLTRLMQLSILLSDDRSLARLNNKSLQKMTESPEKNVTKSVPSTITFGYASELRRKKDRTKNTWVNLRRKENGAFDELFVSQESIHSNLSVAEFTIDFKRNDGVEIRACGVLCVTDAFRVKSQFKAIHVVGMSSTTLAKVNCQVLSPNNVTFDVNNPQYLHRQQSVLANASTWPQFRWQGEVSGPTLYQLDSIGVLPKADPQKPKWQAGIMFVLKLCLYLILFCYLPFIGVMYLKPAIFEQSTPPYKQAEAMHLLICCIVIMLFYSLSPVNLQASKQVEGSGVNLLVLFMTLLSYWSNLFRWFGHLFLMLTIASSLIYEAVAKLKTITPKNLKSIWHIETWQVFEPFMVFYIVVYSVALMSLKVFVDTWWVTFFYSGPVIITGSILGHGVNLLDRKMPYSSNIKLALHNVHQLLLNINVMVDEFTGQPTSPFVTNVAEPAKRAASLIIIAVGDALLAYMLGYTVPLVPRPK,18.4627
|
| 93 |
+
KILLGSSITQSWLTYIPFVFLLVIPLFMIRHYGILMTNVLTILILCVGAKVLANSKGDDPTSVRNLKDVWQKAFATALQITIKYYFGKSTKDFLQSVSMIKADSTIFVRKAPSWPFNLSVATVRGATTNGMSFTLPCTGEGNGGFCLLSQEAYAVTGPLLEDVGVLAPEGAGKLTQAPELVVGKVGDVDSKALLSLMIHLLAKIGVATVALSLIKGELEQLRVEGTDIARELATKDSKDDNKGSSLATIINPPMNIIVTVSATKPNNTVGGRASRALTQYLMDAGTKVVISTSTYRDLVPKARNDSSLVKTILAFKEDRVPLEGIISKRKDTVELRVVLIRLIDRGRIAEWLDKDVKAIDSSDDVTEDLIVDSKPMVHPMRVGGTIRSDYVRNQTIIHIYEKEWDDLSRVIEEKQEMKEVPFMWIQSGKNMEDEILP,17.3175
|
| 94 |
+
MFFYSLIRTAPGTLPLRQSLIIFVSGGGGDGSVAEAGTSLGAPAAEVFHVTIAARFSHELFATILLAYCVASGDTLSKVVSDRAHLVQLVTHHVGLARLRMLVTVIHFTALCPFGGAILFTAPLDINTRQPDPDPVALWWYYIAPVTGQMTREFGGTIINPNIANSYHLVYFKVLFRHFVAEYVGWLHGPGMHPTDVLDIKAALKKSPTHGPEIPHYYSPPRVPRAIPPIFSVFNEIGDARYTTIYDGSVMGLLEKARTYDMEEVYTPRQVGYIFVHSKGHNVFRLVAELESAIGDLFTAYFDSLTSEDGKQQNMISAYLKGLVASHGCGLASAFSFGEQEKWRNAFNYLLWGRYQIESWRTVEAIGPDLLSFWRKRFNELKQAGVWITTSPTCWEAGSVKDNGLFIINSMKYALGRDAVWSANMPRVNKHITIEVKGAAEHQQIVDALIALVKDYDNLGFYSAEERADHRFFVAMVKEKGSAGSSKIDTARVWDVHLIRSRYFYYDASAWYHSAQRMTDPIVNRGYNIGLFAAIVAAGMLLLVLRVDRRKITCPFRISCPDERFSLKSHEIPLDGNLRVYGELKSELDHEDPFGDLTVFRGTDTELSSGGFPLHWEFVKEPEIGMLETLIQAVVGVYFTTSLYPGYADEPGRTEMGLYNMGPFGWWLVKYSDR,18.1742
|
| 95 |
+
SNTARSTQMVGTGIDINSTQMYPYNIMLTGFEVLIRLTPSAIENWQIRGEEALDSFFTSLSNAIGNACVTVFLMRILLAVYTTKSSAESDRAIGYATAGLPNNITAIVAQINAVVATAVVNSMNACLDLAPILYWETLQKIDNISNYYPPFDRDCLKARAMTYQPQEVRMDMPITVACQSGRLMNTAVRKETVIYAIIKEEPKNNFYLLTDPVYQRADTVVQAQYGHEPEFDTEDNLYPNKYGWIQYHEEYYEPIWWRWKIRSYFRTTQLETKTSLLARDSWEPFYASPFSRHIPISITDRPGMDHFMDDLYQSTPSFLTNAICCHNTNGHFPTELLGTIDTLRSALGGLDLHQSSHKSHLLLLRSTIRDLCEASGSGMTTQFTYLLLGNVYIARSDNLKHAFDANAKKGFKLRVIKGAIPPHVKMQVVIGASAGRILLMKTSKLKFVFSDGNLQRPLSEYVELAGDDISEAVFHAGKDTFEFEYEVTDDQFFFHFRAELIEPWKRENLYDNSLYLFRIGDKKFVRTLFTTLSCNKSVMLYFQKFAKLKITASKKKGISFTPDRPACGISVVPHLDKQFVLQVVLQTLMKITWKPCKKNRFQSRFVDHGGFFDFVTYSEIYLKLFAGENVVFTRMSWYAKLSTPHDFQPRSLVGVSTMGIFDEADGKYHLIGTGNFGFKIWRFLYVLDSVFSIEGMFAATITEYILWSGIVRYFRTFFTLEAGIPSHSSGTEGVYVCFKELIFEWPKDTPSVQISLAESTDPSAGIWIREIENRNQFNKVSMLVKTAVDVAQLVFTLEAFAPFEQSLNVIFDNEVDVSLTKALGPTASNTYESSQLALGNRLVLSEAGDVTALDRMVTTITCNTLCFFRHYNGITVVNCIEKAAAVVCIHITNPMPGFVQTQLGIGGLGPYICKSCSLAELQCEDRKIRFYSQIPGSAGDFFEDIAWQKRLEELKNLPK,18.5522
|
| 96 |
+
LVKVETEVKVYVRPTKPLPYTVETAYGGSPEQQFYNIRKLEPGLFADMGGFFTPPMSTASLGSTYQIFRQIYDGTLLWKNNMSDTDPNNVYRALQMKDTVSSLMFVLVLPGSDVYIKLGLVHIEETTKIDGTPDDAQPSSTDLSPGRFVEEQEVMSEDDELALLEDLRSLKFVCQDVLKRQKRHIFNNASITESLTIAFALNNTDNRLSWLMYLWIFALFGLVLIILVLVAGFDIWWSPTKQYGMIIFNLIGNFSSYKTMSEASMKSGIANVGQAWTTIKTVTAVLNNKALVVVNAGEDALALLKSLPKQTDAMVGHIQMTDRAIGMKPNDLWTFIAPIGYVGKGTRMFIIFPKSILSPNPSIGRTVVVVAGIKDVLQDMIQISKTKTDKVSGVNKGDQVDFRYKVETSKNNSLVAGAITEALRGSSASGIKIGSFVQDPNPLFGDLENNFAYGASAMLFDAFLTRFNKENNLIVLVGNRSALNTDTRRLVQWVDALHFNTQLFIIAMEKNAQVQNALTTANKLGILKPTVNVVDQYFPQGLLISLDINRAIHASLKGLPPKGFVVTVIGEEPDSSQLVAKVKAFGLKLFVYAKSTSDVAKLNDLGFATLETGGSLEFFKMDQFKLEIANEVTKTMRSFALIVVLDDDMQQGTKILTDIHPHHPQFTPGPKEASLEKKLALVLGMAVVYRLRLTTVRLEVVTRIPAVIVVNDIQIFTDMAYTNVSGNLPRLPADKVKLGKYSYHAADADGINYKVTGDGSKLKGSIVAQIMVNDVVVLNTKWPIETSKWKIAVNAEVRLDIGPFSTNIPRTMTGYESYLGNRIVLIGRKNRVFGRSTIVEGLIGFLFDVFLHVWGYVFTWITLAIHYWAGPRITHILSRAGDILEIVMSAMRLEKFNSMTKDWLRLLEIPILAELAERIVEGDKRFGVIDKSGTPYELIKKTVEPENVPTALSKVESL,17.6058
|
| 97 |
+
EFRTALGTLAAFVAIFFISVQFLFRFYPETWLPIYHLAFKRLSVPPAAIVAIASVTIFCIGAIFGLFPGPALALRRLTGNVAFLIGIATAIGIGVTFLIKGISNSNNTSSMSIIIRTVAGAIAVVLLTLPALVVRIHGNFGRAAVGGGAAEGANAAIFQSLTGSNNAFRDALFNFGVKLLFGLAIETREIILLEYIYNLLLTVGYDLNFASRGRLQLNAITGLLVVSAAIVSGYRTVAAERKVFDFALLKARKSVYPALRELRLVPLITFIGVLFVTTK,15.2478
|
| 98 |
+
KGLQFTRKGWNHKGRRHWRDFDTVAGNALLGIEGQTGPRMVETGENVTTEPGNRRTRPTLLATGTEPADAGIEETRIEQDVILPLTTKANGGMIRVHHYDVRKIAGVEIDLESDILEARLTDGEDLHNCKFTTTVKAHIKTEPTPVSAADSEILLKGQYVSSDFEVLDSDLDANVSRDSRMWFEVAYICDILDKQTLLNEGMTFTVTSDGYSSGAADVWVLSTIKTQCRHAQGQQWLYRAGNLKPVVEMEIVYSAARDVTGGSLFGAVNSAAPFTVEMLFFPATIEQLRPGTPRAGKTITNPENATSGGNIEVFEEVKHFSDSQFRNEVRFITDDDSVYTATERRRIPNAPQNGIIRYWMKNGYASWNTEKVYARQPDGDITRQESFENAAQSMSTADNYYNHYYKEALRMHLAGGVEDDLEDDVTQEVRVSKDGEVEIDLDLNYTSKRYREGISWFLGCNAAHGIPINDAGVGFAFAAIPQYA,18.6838
|
| 99 |
+
FSSVATATTTAIAFAIAAGVGGAIGGAVVGSLVIASLRGTVTAASALKAPLVPLALTVGAASLGATIGLAASWGVNLTL,10.8479
|
| 100 |
+
SSITAAIQLYKPDSISILDDDSPDDLFETVEFLTEKQKNKQTSDNSYKLFADSFLSIVDSPNWTNMLLIAARVLLVLYTICPCCRADWVGAIGTDDVSYDIVCDLLGININFFKITKVLTAQYLPGRTKVGYMKHPLKTSYFVSIYVEDISDCARHPYGFSYAWQYVKKPYGTVSVDIYNGNPREKLFCLEGLNWATGLGLVVGAGAYKSLGTSVERVNTLVIFLETGELFVWAYAALWFRKRYTEDSEAKVNLYIAGLIVLFAVEKVSYAVPDIVFKEQILKSMVIFAKFSIINLYLDALFDFVFICIIILLLRVLSKDLEAVVGPTLSVFL,17.2645
|
| 101 |
+
LECYGQQSSLIEMYRDYTIKVRDRYANNERIILDHYLVLNGDFYVRLASNKIVLGPDDANSVVAILQIGDMGLFLANGKNVTEMKRMLEKLEILYFTGSEAAVGSVTGHVCLMITNIWKDNKKLVEMLEFLGTEIIYNSVGLVFMIGKMSDKQGVYAKNKFSDSILEIAVKLQNFTWRNHVLFIGAYLYQWELYAEPEVVINNNISVIRVLWDPDGKSLYIIRPEKPPNIFEYLMHGICTFGGVGAIAGGMGVPASHIGGLIYKADFSISSWCEPGSVNVGALPYGSNCVVVQEGGNVVTFSLPTGSDVPIFALEHFPEPGKWKWEGFYWINPTDYRIMISGLKYTLAANAIAGIGAYLESYNIKISTWQYLVNGNPYDSVGVYNQHEYPLYPSLPMSDFTIFPVLTFAP,18.4071
|
benchmarks/MLM/config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PATH = "/workspace/sg666/MDpLM"
|
| 2 |
+
TRAIN_DATA = PATH + "/data/membrane/train.csv"
|
| 3 |
+
TEST_DATA = PATH + "/data/membrane/test.csv"
|
| 4 |
+
VAL_DATA = PATH + "/data/membrane/val.csv"
|
| 5 |
+
|
| 6 |
+
ESM_MODEL_PATH = "facebook/esm2_t30_150M_UR50D"
|
| 7 |
+
MLM_MODEL_PATH = PATH + "/benchmarks/MLM"
|
| 8 |
+
CKPT_DIR = PATH + "/benchmarks/MLM/model_ckpts"
|
| 9 |
+
|
| 10 |
+
ESM_LAYERS = 3
|
| 11 |
+
BATCH_SIZE = 8
|
| 12 |
+
NUM_EPOCHS = 10
|
| 13 |
+
LEARNING_RATE = 5e-3
|
| 14 |
+
MASKING_RATE = 0.40
|
benchmarks/MLM/data_loader.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import torch
|
| 3 |
+
import config
|
| 4 |
+
import random
|
| 5 |
+
from torch.utils.data import Dataset, DataLoader
|
| 6 |
+
from torch.nn.utils.rnn import pad_sequence
|
| 7 |
+
from pretrained_models import load_esm2_model
|
| 8 |
+
|
| 9 |
+
class ProteinDataset(Dataset):
|
| 10 |
+
def __init__(self, csv_file, tokenizer):
|
| 11 |
+
self.tokenizer = tokenizer
|
| 12 |
+
self.data = pd.read_csv(csv_file)
|
| 13 |
+
self.max_len = max([len(seq) for seq in self.data['Sequence'].tolist()])
|
| 14 |
+
|
| 15 |
+
def __len__(self):
|
| 16 |
+
return len(self.data)
|
| 17 |
+
|
| 18 |
+
def __getitem__(self, idx):
|
| 19 |
+
sequence = self.data.iloc[idx]['Sequence'].upper()
|
| 20 |
+
|
| 21 |
+
# Randomly mask 15% of the sequence
|
| 22 |
+
num_masks = int(len(sequence) * 0.15)
|
| 23 |
+
mask_indices = random.sample(range(len(sequence)), num_masks)
|
| 24 |
+
masked_sequence = ''.join(["<mask>" if i in mask_indices else sequence[i] for i in range(len(sequence))])
|
| 25 |
+
|
| 26 |
+
inputs = self.tokenizer(masked_sequence, padding="max_length", truncation=True, max_length=self.max_len, return_tensors='pt')
|
| 27 |
+
input_ids = inputs['input_ids'].squeeze()
|
| 28 |
+
attention_mask = inputs['attention_mask'].squeeze()
|
| 29 |
+
|
| 30 |
+
labels = self.tokenizer(masked_sequence, return_tensors='pt', padding='max_length', max_length=self.max_len, truncation=True)['input_ids'].squeeze()
|
| 31 |
+
labels = torch.where(input_ids == self.tokenizer.mask_token_id, labels, -100)
|
| 32 |
+
|
| 33 |
+
return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_dataloaders(config):
|
| 38 |
+
tokenizer, model = load_esm2_model(config.ESM_MODEL_PATH)
|
| 39 |
+
|
| 40 |
+
train_dataset = ProteinDataset(config.TRAIN_DATA, tokenizer)
|
| 41 |
+
val_dataset = ProteinDataset(config.VAL_DATA, tokenizer)
|
| 42 |
+
test_dataset = ProteinDataset(config.TEST_DATA, tokenizer)
|
| 43 |
+
|
| 44 |
+
train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True)
|
| 45 |
+
val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False)
|
| 46 |
+
test_loader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False)
|
| 47 |
+
|
| 48 |
+
return train_loader, val_loader, test_loader
|
benchmarks/MLM/esm_utils.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoTokenizer, AutoModel, AutoModelForMaskedLM
|
| 3 |
+
|
| 4 |
+
def load_esm2_model(model_name):
|
| 5 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 6 |
+
masked_model = AutoModelForMaskedLM.from_pretrained(model_name)
|
| 7 |
+
embedding_model = AutoModel.from_pretrained(model_name)
|
| 8 |
+
return tokenizer, masked_model, embedding_model
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_latents(model, tokenizer, sequence):
|
| 13 |
+
inputs = tokenizer(sequence, return_tensors="pt").to(model.device)
|
| 14 |
+
with torch.no_grad():
|
| 15 |
+
outputs = model(**inputs)
|
| 16 |
+
return outputs.last_hidden_state.squeeze(0)
|
benchmarks/MLM/mlm_generate_utils.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import math
|
| 3 |
+
import config
|
| 4 |
+
import sys
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from esm_utils import get_latents
|
| 7 |
+
from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def mask_for_de_novo(sequence_length):
|
| 11 |
+
return "<mask>" * sequence_length
|
| 12 |
+
|
| 13 |
+
def generate_de_novo(sequence_length, tokenizer, model):
|
| 14 |
+
masked_sequence = mask_for_de_novo(sequence_length)
|
| 15 |
+
inputs = tokenizer(masked_sequence, return_tensors='pt').to(model.device)
|
| 16 |
+
|
| 17 |
+
with torch.no_grad():
|
| 18 |
+
logits = model(**inputs).logits
|
| 19 |
+
mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
|
| 20 |
+
logits_at_masks = logits[0, mask_token_indices]
|
| 21 |
+
|
| 22 |
+
pred_tokens = []
|
| 23 |
+
for i in mask_token_indices:
|
| 24 |
+
topk_logits, topk_indices = logits_at_masks[i].topk(k=3, dim=-1)
|
| 25 |
+
probabilities = torch.nn.functional.softmax(topk_logits, dim=-1)
|
| 26 |
+
predicted_index = torch.distributions.categorical.Categorical(probabilities).sample()
|
| 27 |
+
predicted_token_id = topk_indices[predicted_index].item()
|
| 28 |
+
predicted_token = tokenizer.decode([predicted_token_id], skip_special_tokens=True)
|
| 29 |
+
pred_tokens.append(predicted_token)
|
| 30 |
+
|
| 31 |
+
generated_sequence = ''.join(pred_tokens)
|
| 32 |
+
perplexity = calculate_perplexity(model, tokenizer, generated_sequence)
|
| 33 |
+
|
| 34 |
+
return (generated_sequence, perplexity)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def mask_for_scaffold(sequence, generate_type):
|
| 38 |
+
if generate_type == "uppercase":
|
| 39 |
+
sequence = ''.join(["<mask>" if residue.isupper() else residue.upper() for residue in sequence])
|
| 40 |
+
elif generate_type == "lowercase":
|
| 41 |
+
sequence = ''.join(["<mask>" if residue.islower() else residue for residue in sequence])
|
| 42 |
+
return sequence
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def generate_scaffold(sequence, generate_type, tokenizer, model):
|
| 46 |
+
masked_sequence = mask_for_scaffold(sequence, generate_type)
|
| 47 |
+
inputs = tokenizer(masked_sequence, return_tensors='pt').to(model.device)
|
| 48 |
+
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
logits = model(**inputs).logits
|
| 51 |
+
mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
|
| 52 |
+
logits_at_masks = logits[0, mask_token_indices]
|
| 53 |
+
|
| 54 |
+
pred_tokens = []
|
| 55 |
+
for i in range(len(mask_token_indices)):
|
| 56 |
+
topk_logits, topk_indices = logits_at_masks[i].topk(k=3, dim=-1)
|
| 57 |
+
probabilities = torch.nn.functional.softmax(topk_logits, dim=-1)
|
| 58 |
+
predicted_index = torch.distributions.categorical.Categorical(probabilities).sample()
|
| 59 |
+
predicted_token_id = topk_indices[predicted_index].item()
|
| 60 |
+
predicted_token = tokenizer.decode([predicted_token_id], skip_special_tokens=True)
|
| 61 |
+
|
| 62 |
+
pred_tokens.append('G' if predicted_token == '' else predicted_token)
|
| 63 |
+
|
| 64 |
+
generated_sequence = masked_sequence
|
| 65 |
+
for token in pred_tokens:
|
| 66 |
+
generated_sequence = generated_sequence.replace("<mask>", token, 1)
|
| 67 |
+
|
| 68 |
+
return generated_sequence, mask_token_indices
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def calculate_perplexity(model, tokenizer, generated_sequence, mask_token_indices):
|
| 72 |
+
total_loss = 0.0
|
| 73 |
+
tensor_input = tokenizer.encode(generated_sequence, return_tensors='pt').to(model.device)
|
| 74 |
+
|
| 75 |
+
for i in mask_token_indices:
|
| 76 |
+
masked_input = tensor_input.clone()
|
| 77 |
+
masked_input[0, i] = tokenizer.mask_token_id
|
| 78 |
+
|
| 79 |
+
labels = torch.full(tensor_input.shape, -100).to(model.device)
|
| 80 |
+
labels[0, i] = tensor_input[0, i]
|
| 81 |
+
|
| 82 |
+
with torch.no_grad():
|
| 83 |
+
outputs = model(masked_input, labels=labels)
|
| 84 |
+
total_loss += outputs.loss.item()
|
| 85 |
+
|
| 86 |
+
num_mask_tokens = len(mask_token_indices)
|
| 87 |
+
if num_mask_tokens == 0:
|
| 88 |
+
perplexity = 10000
|
| 89 |
+
else:
|
| 90 |
+
avg_loss = total_loss / num_mask_tokens
|
| 91 |
+
perplexity = math.exp(avg_loss)
|
| 92 |
+
|
| 93 |
+
return perplexity
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def calculate_cosine_sim(original_sequence, generated_sequence, tokenizer, esm_model, device):
|
| 97 |
+
og_embeddings = get_latents(esm_model, tokenizer, original_sequence.upper()).to(device)
|
| 98 |
+
new_embeddings = get_latents(esm_model, tokenizer, generated_sequence).to(device)
|
| 99 |
+
|
| 100 |
+
sequence_similarity = torch.nn.functional.cosine_similarity(og_embeddings, new_embeddings, dim=-1)
|
| 101 |
+
cosine_similarity = torch.mean(sequence_similarity).item()
|
| 102 |
+
return cosine_similarity
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def calculate_hamming_dist(original_sequence, generated_sequence):
|
| 106 |
+
generated_sequence = generated_sequence.upper()
|
| 107 |
+
original_sequence = original_sequence.upper()
|
| 108 |
+
return sum(1 if original_sequence[i] != generated_sequence[i] else 0 for i in range(len(original_sequence)))
|
benchmarks/MLM/mlm_lowercase_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
benchmarks/MLM/mlm_motif_benchmarking.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import config
|
| 3 |
+
import sys
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from mlm_generate_utils import generate_scaffold, calculate_perplexity, calculate_cosine_sim, calculate_hamming_dist
|
| 6 |
+
from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer
|
| 7 |
+
|
| 8 |
+
def motif_benchmarking():
|
| 9 |
+
path = "/workspace/sg666/MDpLM"
|
| 10 |
+
|
| 11 |
+
test_sequences = pd.read_csv(path + "/data/membrane/test.csv")['Sequence'].tolist()
|
| 12 |
+
|
| 13 |
+
tokenizer = AutoTokenizer.from_pretrained(config.CKPT_DIR + "/best_model_epoch")
|
| 14 |
+
mlm_model = AutoModelForMaskedLM.from_pretrained(config.CKPT_DIR + "/best_model_epoch")
|
| 15 |
+
esm_model = AutoModel.from_pretrained("facebook/esm2_t36_3B_UR50D")
|
| 16 |
+
|
| 17 |
+
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
|
| 18 |
+
mlm_model.to(device)
|
| 19 |
+
esm_model.to(device)
|
| 20 |
+
|
| 21 |
+
for generate_case in ['uppercase', 'lowercase']:
|
| 22 |
+
case_results = []
|
| 23 |
+
for original_sequence in test_sequences:
|
| 24 |
+
generated_sequence, mask_token_idx = generate_scaffold(original_sequence, generate_case, tokenizer, mlm_model)
|
| 25 |
+
perplexity = calculate_perplexity(mlm_model, tokenizer, generated_sequence, mask_token_idx)
|
| 26 |
+
cos_sim = calculate_cosine_sim(original_sequence, generated_sequence, tokenizer, esm_model, device)
|
| 27 |
+
hamming_distance = calculate_hamming_dist(original_sequence, generated_sequence)
|
| 28 |
+
|
| 29 |
+
case_results.append([original_sequence, generated_sequence, perplexity, cos_sim, hamming_distance])
|
| 30 |
+
|
| 31 |
+
print(case_results)
|
| 32 |
+
sys.stdout.flush()
|
| 33 |
+
|
| 34 |
+
df = pd.DataFrame(case_results, columns=['Original Sequence', 'Generated Sequence', 'Perplexity', 'Cosine Similarity', 'Hamming Distance'])
|
| 35 |
+
df.to_csv(path + f'/benchmarks/MLM/mlm_{generate_case}_results.csv', index=False)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
if __name__ == "__main__":
|
| 39 |
+
motif_benchmarking()
|
benchmarks/MLM/mlm_uppercase_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
benchmarks/MLM/model.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import config
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from pretrained_models import load_esm2_model
|
| 5 |
+
from transformers import AutoModelForMaskedLM, AutoTokenizer, AutoModel
|
| 6 |
+
|
| 7 |
+
class MembraneTokenizer:
|
| 8 |
+
def __init__(self, esm_model_path=config.ESM_MODEL_PATH):
|
| 9 |
+
self.tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 10 |
+
|
| 11 |
+
def __getattr__(self, name):
|
| 12 |
+
return getattr(self.tokenizer, name)
|
| 13 |
+
|
| 14 |
+
def __call__(self, *args, **kwargs):
|
| 15 |
+
return self.tokenizer(*args, **kwargs)
|
| 16 |
+
|
| 17 |
+
def save_tokenizer(self, save_dir):
|
| 18 |
+
self.tokenizer.save_pretrained(save_dir)
|
| 19 |
+
|
| 20 |
+
def load_tokenizer(self, load_dir):
|
| 21 |
+
self.tokenizer.save_pretrained(load_dir)
|
| 22 |
+
|
| 23 |
+
class MembraneMLM:
|
| 24 |
+
def __init__(self, esm_model_path=config.ESM_MODEL_PATH):
|
| 25 |
+
self.model = AutoModelForMaskedLM.from_pretrained(esm_model_path)
|
| 26 |
+
self.tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 27 |
+
|
| 28 |
+
def __getattr__(self, name):
|
| 29 |
+
return getattr(self.model, name)
|
| 30 |
+
|
| 31 |
+
def __call__(self, *args, **kwargs):
|
| 32 |
+
return self.model(*args, **kwargs)
|
| 33 |
+
|
| 34 |
+
def freeze_model(self):
|
| 35 |
+
# Disable parameter updates for all layers
|
| 36 |
+
for param in self.model.parameters():
|
| 37 |
+
param.requires_grad = False
|
| 38 |
+
|
| 39 |
+
def unfreeze_n_layers(self):
|
| 40 |
+
# Count number of encoder layers
|
| 41 |
+
model_layers = len(self.model.esm.encoder.layer)
|
| 42 |
+
|
| 43 |
+
# Enable parameter updates for the last 3 encoder layers
|
| 44 |
+
for i, layer in enumerate(self.model.esm.encoder.layer):
|
| 45 |
+
if i >= model_layers-config.ESM_LAYERS:
|
| 46 |
+
for module in layer.attention.self.key.modules():
|
| 47 |
+
for param in module.parameters():
|
| 48 |
+
param.requires_grad = True
|
| 49 |
+
for module in layer.attention.self.query.modules():
|
| 50 |
+
for param in module.parameters():
|
| 51 |
+
param.requires_grad = True
|
| 52 |
+
for module in layer.attention.self.value.modules():
|
| 53 |
+
for param in module.parameters():
|
| 54 |
+
param.requires_grad = True
|
| 55 |
+
|
| 56 |
+
def forward(self, **inputs):
|
| 57 |
+
return self.model(**inputs)
|
| 58 |
+
|
| 59 |
+
def save_model(self, save_dir):
|
| 60 |
+
self.model.save_pretrained(save_dir)
|
| 61 |
+
self.tokenizer.save_pretrained(save_dir)
|
| 62 |
+
|
| 63 |
+
def load_model(self, load_dir):
|
| 64 |
+
self.model = AutoModel.from_pretrained(load_dir)
|
| 65 |
+
self.tokenizer = AutoTokenizer.from_pretrained(load_dir)
|
benchmarks/MLM/pretrained_models.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoTokenizer, AutoModel, EsmForMaskedLM, AutoModelForMaskedLM
|
| 3 |
+
|
| 4 |
+
def load_esm2_model(esm_model_path):
|
| 5 |
+
tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 6 |
+
model = AutoModelForMaskedLM.from_pretrained(esm_model_path)
|
| 7 |
+
return tokenizer, model
|
| 8 |
+
|
| 9 |
+
def load_mlm_model(esm_model_path, ckpt_path):
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 11 |
+
model = AutoModelForMaskedLM.from_pretrained(ckpt_path)
|
| 12 |
+
return tokenizer, model
|
benchmarks/MLM/screen_mlm_cosine_hamming.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
path = "/home/sg666/MDpLM/benchmarks/MLM"
|
| 4 |
+
|
| 5 |
+
df = pd.read_csv(path + "/mlm_uppercase_results.csv")
|
| 6 |
+
|
| 7 |
+
all_sequences = df['Original Sequence'].tolist()
|
| 8 |
+
seq_len_sum = sum(len(seq) for seq in all_sequences)
|
| 9 |
+
ppls = [ppl for ppl in df['Perplexity'].tolist() if ppl != 10000]
|
| 10 |
+
|
| 11 |
+
ppl_mean = sum(ppls) / len(ppls)
|
| 12 |
+
cos_mean = df.loc[:, 'Cosine Similarity'].mean()
|
| 13 |
+
hamming_mean = sum(dist for dist in df['Hamming Distance'].tolist()) / seq_len_sum
|
| 14 |
+
|
| 15 |
+
print(ppl_mean)
|
| 16 |
+
print(cos_mean)
|
| 17 |
+
print(hamming_mean)
|
benchmarks/MLM/train_and_test.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import config
|
| 3 |
+
import math
|
| 4 |
+
import sys
|
| 5 |
+
import os
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
from torch.optim import Adam
|
| 8 |
+
from torch.optim.lr_scheduler import CosineAnnealingLR
|
| 9 |
+
from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer, AutoConfig
|
| 10 |
+
from pretrained_models import load_esm2_model
|
| 11 |
+
from model import MembraneMLM, MembraneTokenizer
|
| 12 |
+
from data_loader import get_dataloaders
|
| 13 |
+
|
| 14 |
+
def save_hyperparams(ckpt_dir):
|
| 15 |
+
hyperparms_txt_file = os.path.join(ckpt_dir, "hyperparameters.txt")
|
| 16 |
+
with open(hyperparms_txt_file, 'w') as f:
|
| 17 |
+
for k, v in vars(config).items():
|
| 18 |
+
if k.isupper():
|
| 19 |
+
f.write(f"{k}: {v}\n")
|
| 20 |
+
|
| 21 |
+
def train_and_validate(model, optimizer, device, train_loader, val_loader, num_epochs, ckpt_dir):
|
| 22 |
+
best_val_loss = float('inf')
|
| 23 |
+
|
| 24 |
+
for epoch in range(num_epochs):
|
| 25 |
+
print(f"EPOCH {epoch+1}/{num_epochs}")
|
| 26 |
+
sys.stderr.flush()
|
| 27 |
+
total_train_loss = 0.0
|
| 28 |
+
weighted_total_train_loss = 0.0
|
| 29 |
+
total_masked_train_tokens = 0
|
| 30 |
+
|
| 31 |
+
model.train()
|
| 32 |
+
train_update_interval = len(train_loader) // 4
|
| 33 |
+
|
| 34 |
+
with tqdm(enumerate(train_loader), desc="Training batch", total=len(train_loader), leave=True, position=0, ncols=100) as trainbar:
|
| 35 |
+
for step, inputs in trainbar:
|
| 36 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 37 |
+
optimizer.zero_grad()
|
| 38 |
+
outputs = model(**inputs)
|
| 39 |
+
train_loss = outputs.loss
|
| 40 |
+
train_loss.backward()
|
| 41 |
+
optimizer.step()
|
| 42 |
+
|
| 43 |
+
num_mask_tokens = (inputs["input_ids"] == tokenizer.mask_token_id).sum().item()
|
| 44 |
+
total_masked_train_tokens += num_mask_tokens
|
| 45 |
+
|
| 46 |
+
total_train_loss += train_loss.item()
|
| 47 |
+
weighted_total_train_loss += train_loss.item() * num_mask_tokens
|
| 48 |
+
|
| 49 |
+
if (step+1) % train_update_interval == 0:
|
| 50 |
+
trainbar.update(train_update_interval)
|
| 51 |
+
|
| 52 |
+
avg_train_loss = total_train_loss / len(train_loader)
|
| 53 |
+
avg_train_neg_log_likelihood = weighted_total_train_loss / total_masked_train_tokens
|
| 54 |
+
train_perplexity = math.exp(avg_train_neg_log_likelihood)
|
| 55 |
+
|
| 56 |
+
# Save model every epoch
|
| 57 |
+
train_ckpt_path = os.path.join(config.CKPT_DIR, f'epoch{epoch+1}')
|
| 58 |
+
model.save_model(train_ckpt_path)
|
| 59 |
+
save_hyperparams(train_ckpt_path)
|
| 60 |
+
|
| 61 |
+
# Validate model
|
| 62 |
+
if val_loader:
|
| 63 |
+
model.eval()
|
| 64 |
+
total_val_loss = 0.0
|
| 65 |
+
weighted_total_val_loss = 0.0
|
| 66 |
+
total_masked_val_tokens = 0.0
|
| 67 |
+
|
| 68 |
+
with torch.no_grad():
|
| 69 |
+
val_update_interval = len(val_loader) // 4
|
| 70 |
+
|
| 71 |
+
with tqdm(enumerate(val_loader), desc='Validiation batch', total=len(val_loader), leave=True, position=0) as valbar:
|
| 72 |
+
for step, inputs in valbar:
|
| 73 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 74 |
+
val_loss = model(**inputs).loss.item()
|
| 75 |
+
|
| 76 |
+
num_mask_tokens = (inputs['input_ids'] == tokenizer.mask_token_id).sum().item()
|
| 77 |
+
total_masked_val_tokens += num_mask_tokens
|
| 78 |
+
|
| 79 |
+
total_val_loss += val_loss
|
| 80 |
+
weighted_total_val_loss += val_loss * num_mask_tokens
|
| 81 |
+
|
| 82 |
+
if (step+1) % val_update_interval == 0:
|
| 83 |
+
valbar.update(val_update_interval)
|
| 84 |
+
|
| 85 |
+
avg_val_loss = total_val_loss / len(val_loader)
|
| 86 |
+
avg_val_neg_log_likelihood = weighted_total_val_loss / total_masked_val_tokens
|
| 87 |
+
val_perplexity = math.exp(avg_val_neg_log_likelihood)
|
| 88 |
+
|
| 89 |
+
# Save the best model based on validation loss
|
| 90 |
+
if avg_val_loss < best_val_loss:
|
| 91 |
+
best_val_loss = avg_val_loss
|
| 92 |
+
val_ckpt_path = os.path.join(config.CKPT_DIR, "best_model_epoch")
|
| 93 |
+
model.save_model(val_ckpt_path)
|
| 94 |
+
save_hyperparams(val_ckpt_path)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
print(f"Average train loss: {avg_train_loss}")
|
| 98 |
+
print(f"Average train perplexity: {train_perplexity}\n")
|
| 99 |
+
sys.stdout.flush()
|
| 100 |
+
|
| 101 |
+
print(f"Average validation loss: {avg_val_loss}")
|
| 102 |
+
print(f"Average validation perplexity: {val_perplexity}\n")
|
| 103 |
+
sys.stdout.flush()
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
return avg_train_loss, train_perplexity, avg_val_loss, val_perplexity
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def test(model, test_loader, device):
|
| 110 |
+
model.to(device).eval()
|
| 111 |
+
total_test_loss = 0.0
|
| 112 |
+
weighted_total_test_loss = 0.0
|
| 113 |
+
total_masked_test_tokens = 0.0
|
| 114 |
+
|
| 115 |
+
with torch.no_grad():
|
| 116 |
+
for step, inputs in enumerate(test_loader):
|
| 117 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 118 |
+
outputs = model(**inputs)
|
| 119 |
+
test_loss = outputs.loss.item()
|
| 120 |
+
|
| 121 |
+
num_mask_tokens = (inputs["input_ids"] == tokenizer.mask_token_id).sum().item()
|
| 122 |
+
total_masked_test_tokens += num_mask_tokens
|
| 123 |
+
|
| 124 |
+
total_test_loss += test_loss
|
| 125 |
+
weighted_total_test_loss += test_loss * num_mask_tokens
|
| 126 |
+
|
| 127 |
+
avg_test_loss = total_test_loss / len(test_loader)
|
| 128 |
+
avg_test_neg_log_likilehood = weighted_total_test_loss / total_masked_test_tokens
|
| 129 |
+
test_perplexity = math.exp(avg_test_neg_log_likilehood)
|
| 130 |
+
|
| 131 |
+
return avg_test_loss, test_perplexity
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
|
| 136 |
+
print(device)
|
| 137 |
+
|
| 138 |
+
model = MembraneMLM()
|
| 139 |
+
model.to(device)
|
| 140 |
+
model.freeze_model()
|
| 141 |
+
model.unfreeze_n_layers()
|
| 142 |
+
tokenizer = model.tokenizer
|
| 143 |
+
|
| 144 |
+
train_loader, val_loader, test_loader = get_dataloaders(config)
|
| 145 |
+
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=config.LEARNING_RATE)
|
| 146 |
+
|
| 147 |
+
# Train and test the model
|
| 148 |
+
avg_train_loss, train_ppl, avg_val_loss, val_ppl = train_and_validate(model, optimizer, device, train_loader, val_loader, config.NUM_EPOCHS, config.CKPT_DIR)
|
| 149 |
+
avg_test_loss, test_ppl = test(model, test_loader, device)
|
| 150 |
+
|
| 151 |
+
results_dict = {"Average train loss": avg_train_loss,
|
| 152 |
+
"Average train perplexity": train_ppl,
|
| 153 |
+
"Average val loss": avg_val_loss,
|
| 154 |
+
"Average val perplexity": val_ppl,
|
| 155 |
+
"Average test loss": avg_test_loss,
|
| 156 |
+
"Average test perplexity": test_ppl,
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
print("TRAIN AND TEST RESULTS")
|
| 160 |
+
for k, v in results_dict.items():
|
| 161 |
+
print(f"{k}: {v}\n")
|
| 162 |
+
|
| 163 |
+
# Save training and test performance
|
| 164 |
+
with open(config.CKPT_DIR + "/train_test_results.txt", 'w') as f:
|
| 165 |
+
for k, v in results_dict.items():
|
| 166 |
+
f.write(f'{k}: {v}\n')
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
### Get embeddings from model
|
| 170 |
+
# best_model_pth = config.MLM_MODEL_PATH + "/best_model"
|
| 171 |
+
|
| 172 |
+
# model = AutoModel.from_pretrained(best_model_pth)
|
| 173 |
+
# tokenizer = AutoTokenizer.from_pretrained(best_model_pth)
|
| 174 |
+
# model.eval().to(device)
|
| 175 |
+
|
| 176 |
+
# random_seq = "WPIQMVYSLGQHADYMQWFTIMPPPIEMIFVWHNCTQHDYSFRERAGEVDQARMKTEMAR"
|
| 177 |
+
# inputs = tokenizer(random_seq, return_tensors='pt')
|
| 178 |
+
# inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 179 |
+
# inputs = inputs['input_ids']
|
| 180 |
+
# print(inputs)
|
| 181 |
+
# with torch.no_grad():
|
| 182 |
+
# outputs = model(inputs).last_hidden_state
|
| 183 |
+
# print(outputs)
|
| 184 |
+
# print(outputs.size())
|
benchmarks/Supervised/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
benchmarks/Supervised/Localization/cell_localization_predictor.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.optim as optim
|
| 4 |
+
from torch.utils.data import DataLoader, Dataset
|
| 5 |
+
from transformers import AutoModel, AutoTokenizer
|
| 6 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
|
| 7 |
+
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pickle
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
# Hyperparameters dictionary
|
| 16 |
+
path = "/workspace/sg666/MDpLM"
|
| 17 |
+
|
| 18 |
+
hyperparams = {
|
| 19 |
+
"batch_size": 1,
|
| 20 |
+
"learning_rate": 5e-4,
|
| 21 |
+
"num_epochs": 5,
|
| 22 |
+
"esm_model_path": "facebook/esm2_t33_650M_UR50D",
|
| 23 |
+
'mlm_model_path': path + "/benchmarks/MLM/model_ckpts/best_model_epoch",
|
| 24 |
+
"mdlm_model_path": path + "/checkpoints/membrane_automodel/epochs30_lr3e-4_bsz16_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params",
|
| 25 |
+
"train_data": path + "/benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv",
|
| 26 |
+
"test_data" : path + "/benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_test.csv",
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# Helper functions to obtain all embeddings for a sequence
|
| 30 |
+
def load_models(esm_model_path, mlm_model_path, mdlm_model_path):
|
| 31 |
+
esm_tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 32 |
+
esm_model = AutoModel.from_pretrained(esm_model_path).to(device)
|
| 33 |
+
mlm_model = AutoModel.from_pretrained(mlm_model_path).to(device)
|
| 34 |
+
mdlm_model = AutoModel.from_pretrained(mdlm_model_path).to(device)
|
| 35 |
+
|
| 36 |
+
return esm_tokenizer, esm_model, mlm_model, mdlm_model
|
| 37 |
+
|
| 38 |
+
def get_latents(embedding_type, tokenizer, esm_model, mlm_model, mdlm_model, sequence, device):
|
| 39 |
+
if embedding_type == "esm":
|
| 40 |
+
inputs = tokenizer(sequence, return_tensors='pt').to(device)
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
embeddings = esm_model(**inputs).last_hidden_state.squeeze(0)
|
| 43 |
+
|
| 44 |
+
elif embedding_type == "mlm":
|
| 45 |
+
inputs = tokenizer(sequence, return_tensors='pt')['input_ids'].to(device)
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
embeddings = mlm_model(inputs).last_hidden_state.squeeze(0)
|
| 48 |
+
|
| 49 |
+
elif embedding_type == "mdlm":
|
| 50 |
+
inputs = tokenizer(sequence, return_tensors='pt')['input_ids'].to(device)
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
embeddings = mdlm_model(inputs).last_hidden_state.squeeze(0)
|
| 53 |
+
|
| 54 |
+
return embeddings
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# Dataset class can load pickle file
|
| 58 |
+
class LocalizationDataset(Dataset):
|
| 59 |
+
def __init__(self, embedding_type, csv_file, esm_model_path, mlm_model_path, mdlm_model_path, device):
|
| 60 |
+
self.data = pd.read_csv(csv_file)
|
| 61 |
+
self.data = self.data[self.data['Sequence'].apply(len) < 1024].reset_index(drop=True)
|
| 62 |
+
self.embedding_type = embedding_type
|
| 63 |
+
self.tokenizer, self.esm_model, self.mlm_model, self.mdlm_model = load_models(esm_model_path, mlm_model_path, mdlm_model_path)
|
| 64 |
+
self.device = device
|
| 65 |
+
|
| 66 |
+
def __len__(self):
|
| 67 |
+
return len(self.data)
|
| 68 |
+
|
| 69 |
+
def __getitem__(self, idx):
|
| 70 |
+
sequence = self.data.iloc[idx]['Sequence']
|
| 71 |
+
embeddings = get_latents(self.embedding_type, self.tokenizer, self.mlm_model, self.esm_model, self.mdlm_model,
|
| 72 |
+
sequence, self.device)
|
| 73 |
+
|
| 74 |
+
label = 0 if self.data.iloc[idx]['Cell membrane'] == 0 else 1
|
| 75 |
+
labels = torch.tensor(label, dtype=torch.float32).view(1,1).squeeze(-1)
|
| 76 |
+
|
| 77 |
+
return embeddings, labels
|
| 78 |
+
|
| 79 |
+
# Predict localization with MLP head using pooled embeddings
|
| 80 |
+
class LocalizationPredictor(nn.Module):
|
| 81 |
+
def __init__(self, input_dim):
|
| 82 |
+
super(LocalizationPredictor, self).__init__()
|
| 83 |
+
self.classifier = nn.Sequential(
|
| 84 |
+
nn.Linear(input_dim, 640),
|
| 85 |
+
nn.ReLU(),
|
| 86 |
+
nn.Linear(640, 1)
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
def forward(self, embeddings):
|
| 90 |
+
logits = self.classifier(embeddings)
|
| 91 |
+
logits = torch.mean(logits, dim=1)
|
| 92 |
+
probs = torch.nn.functional.softmax(logits)
|
| 93 |
+
return probs
|
| 94 |
+
|
| 95 |
+
# Training function
|
| 96 |
+
def train(model, dataloader, optimizer, criterion, device):
|
| 97 |
+
model.train()
|
| 98 |
+
total_loss = 0
|
| 99 |
+
for embeddings, labels in tqdm(dataloader):
|
| 100 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 101 |
+
optimizer.zero_grad()
|
| 102 |
+
outputs = model(embeddings)
|
| 103 |
+
loss = criterion(outputs, labels)
|
| 104 |
+
loss.backward()
|
| 105 |
+
optimizer.step()
|
| 106 |
+
total_loss += loss.item()
|
| 107 |
+
return total_loss / len(dataloader)
|
| 108 |
+
|
| 109 |
+
# Evaluation function
|
| 110 |
+
def evaluate(model, dataloader, device):
|
| 111 |
+
model.eval()
|
| 112 |
+
preds, true_labels = [], []
|
| 113 |
+
with torch.no_grad():
|
| 114 |
+
for embeddings, labels in tqdm(dataloader):
|
| 115 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 116 |
+
outputs = model(embeddings)
|
| 117 |
+
preds.append(outputs.cpu().numpy())
|
| 118 |
+
true_labels.append(labels.cpu().numpy())
|
| 119 |
+
return preds, true_labels
|
| 120 |
+
|
| 121 |
+
# Metrics calculation
|
| 122 |
+
def calculate_metrics(preds, labels, threshold=0.5):
|
| 123 |
+
all_metrics = []
|
| 124 |
+
for pred, label in zip(preds, labels):
|
| 125 |
+
pred = (pred > threshold).astype(int)
|
| 126 |
+
|
| 127 |
+
accuracy = accuracy_score(label, pred)
|
| 128 |
+
precision = precision_score(label, pred, average='macro')
|
| 129 |
+
recall = recall_score(label, pred, average='macro')
|
| 130 |
+
f1_macro = f1_score(label, pred, average='macro')
|
| 131 |
+
f1_micro = f1_score(label, pred, average='micro')
|
| 132 |
+
|
| 133 |
+
all_metrics.append([accuracy, precision, recall, f1_macro, f1_micro])
|
| 134 |
+
|
| 135 |
+
avg_metrics = np.mean(all_metrics, axis=0)
|
| 136 |
+
print(avg_metrics)
|
| 137 |
+
return avg_metrics
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
if __name__ == "__main__":
|
| 142 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 143 |
+
|
| 144 |
+
for embedding_type in ['mdlm', 'esm', 'mlm']:
|
| 145 |
+
# Initialize datasets
|
| 146 |
+
train_dataset = LocalizationDataset(embedding_type,
|
| 147 |
+
hyperparams['train_data'],
|
| 148 |
+
hyperparams['esm_model_path'],
|
| 149 |
+
hyperparams['mlm_model_path'],
|
| 150 |
+
hyperparams['mdlm_model_path'],
|
| 151 |
+
device)
|
| 152 |
+
test_dataset = LocalizationDataset(embedding_type,
|
| 153 |
+
hyperparams['test_data'],
|
| 154 |
+
hyperparams['esm_model_path'],
|
| 155 |
+
hyperparams['mlm_model_path'],
|
| 156 |
+
hyperparams['mdlm_model_path'],
|
| 157 |
+
device)
|
| 158 |
+
|
| 159 |
+
# Prepare dataloaders
|
| 160 |
+
train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
|
| 161 |
+
test_dataloader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
|
| 162 |
+
|
| 163 |
+
# Initialize model, optimizer, and loss function
|
| 164 |
+
input_dim=640 if embedding_type=="mdlm" else 1280
|
| 165 |
+
model = LocalizationPredictor(input_dim=input_dim).to(device)
|
| 166 |
+
optimizer = optim.Adam(model.parameters(), lr=hyperparams["learning_rate"])
|
| 167 |
+
criterion = nn.BCELoss()
|
| 168 |
+
|
| 169 |
+
# Initialize main directory model checkpoints
|
| 170 |
+
base_checkpoint_dir = f"{path}/benchmarks/Supervised/Localization/model_checkpoints/{embedding_type}"
|
| 171 |
+
# Initialize subdirectory and name it based on hyperparameters
|
| 172 |
+
hyperparam_str = f"batch_{hyperparams['batch_size']}_lr_{hyperparams['learning_rate']}_epochs_{hyperparams['num_epochs']}"
|
| 173 |
+
model_checkpoint_dir = os.path.join(base_checkpoint_dir, hyperparam_str)
|
| 174 |
+
os.makedirs(model_checkpoint_dir, exist_ok=True)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# Training loop
|
| 178 |
+
for epoch in range(hyperparams["num_epochs"]):
|
| 179 |
+
# Train the model
|
| 180 |
+
train_loss = train(model, train_dataloader, optimizer, criterion, device)
|
| 181 |
+
print(f"EPOCH {epoch+1}/{hyperparams['num_epochs']}")
|
| 182 |
+
print(f"TRAIN LOSS: {train_loss:.4f}")
|
| 183 |
+
print("\n")
|
| 184 |
+
|
| 185 |
+
# Save the model checkpoint for the current epoch
|
| 186 |
+
checkpoint_path = os.path.join(model_checkpoint_dir, f"epoch{epoch + 1}.pth")
|
| 187 |
+
torch.save({
|
| 188 |
+
'epoch': epoch + 1,
|
| 189 |
+
'model_state_dict': model.state_dict(),
|
| 190 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
| 191 |
+
'loss': train_loss,
|
| 192 |
+
}, checkpoint_path)
|
| 193 |
+
print(f"Checkpoint saved at {checkpoint_path}\n")
|
| 194 |
+
|
| 195 |
+
# Save hyperparameters only once
|
| 196 |
+
if epoch == 0: # Hyperparameters don't change midway through training
|
| 197 |
+
hyperparams_file = os.path.join(model_checkpoint_dir, "hyperparams.txt")
|
| 198 |
+
with open(hyperparams_file, 'w') as f:
|
| 199 |
+
for key, value in hyperparams.items():
|
| 200 |
+
f.write(f"{key}: {value}\n")
|
| 201 |
+
print(f"Hyperparameters saved at {hyperparams_file}\n")
|
| 202 |
+
|
| 203 |
+
# Evaluate model on test dataset
|
| 204 |
+
print("Test set")
|
| 205 |
+
test_preds, test_labels = evaluate(model, test_dataloader, device)
|
| 206 |
+
test_metrics = calculate_metrics(test_preds, test_labels)
|
| 207 |
+
print(test_metrics)
|
| 208 |
+
print("TEST METRICS:")
|
| 209 |
+
print(f"Accuracy: {test_metrics[0]:.4f}")
|
| 210 |
+
print(f"Precision: {test_metrics[1]:.4f}")
|
| 211 |
+
print(f"Recall: {test_metrics[2]:.4f}")
|
| 212 |
+
print(f"F1 Macro Score: {test_metrics[3]:.4f}")
|
| 213 |
+
print(f"F1 Micro Score: {test_metrics[4]:.4f}")
|
| 214 |
+
|
| 215 |
+
#Save test results
|
| 216 |
+
test_results_file = os.path.join(model_checkpoint_dir, "test_results.txt")
|
| 217 |
+
with open(test_results_file, 'w') as f:
|
| 218 |
+
f.write("TEST METRICS:\n")
|
| 219 |
+
f.write(f"Accuracy: {test_metrics[0]:.4f}\n")
|
| 220 |
+
f.write(f"Precision: {test_metrics[1]:.4f}\n")
|
| 221 |
+
f.write(f"Recall: {test_metrics[2]:.4f}\n")
|
| 222 |
+
f.write(f"F1 Macro Score: {test_metrics[3]:.4f}\n")
|
| 223 |
+
f.write(f"F1 Micro: {test_metrics[4]:.4f}\n")
|
| 224 |
+
print(f"Test results saved at {test_results_file}\n")
|
benchmarks/Supervised/Localization/process_cell_local_data.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
path = "/home/sg666/MDpLM/benchmarks/Supervised/Localization"
|
| 4 |
+
|
| 5 |
+
train_val = pd.read_csv(path + "/deeploc2.0_train_val.csv")
|
| 6 |
+
test = pd.read_csv(path + "/deeploc2.0_test.csv")
|
| 7 |
+
|
| 8 |
+
train_val = train_val[train_val['Sequence'].apply(len) < 1024].reset_index(drop=True)
|
| 9 |
+
test = test[test['Sequence'].apply(len) < 1024].reset_index(drop=True)
|
| 10 |
+
|
| 11 |
+
train_val.to_csv(path + "/true_deeploc2.0_cell-local_train-val.csv", index=False)
|
| 12 |
+
test.to_csv(path + "/true_deeploc2.0_cell-local_test.csv", index=False)
|
benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
memdlm_schematic.png → benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ca38d78cc8fbc8777a23f456477901f5af4bbfda7a0908081effd09adbe7e94
|
| 3 |
+
size 12568908
|
benchmarks/Supervised/Membrane Type/membrane_type_predictor.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.optim as optim
|
| 4 |
+
from torch.utils.data import DataLoader, Dataset
|
| 5 |
+
from transformers import AutoModel, AutoTokenizer
|
| 6 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
|
| 7 |
+
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pickle
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
# Hyperparameters dictionary
|
| 16 |
+
path = "/workspace/sg666/MDpLM"
|
| 17 |
+
|
| 18 |
+
hyperparams = {
|
| 19 |
+
"batch_size": 1,
|
| 20 |
+
"learning_rate": 5e-4,
|
| 21 |
+
"num_epochs": 5,
|
| 22 |
+
"esm_model_path": "facebook/esm2_t33_650M_UR50D",
|
| 23 |
+
'mlm_model_path': path + "/benchmarks/MLM/model_ckpts/best_model_epoch",
|
| 24 |
+
"mdlm_model_path": path + "/checkpoints/membrane_automodel/epochs30_lr3e-4_bsz16_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params",
|
| 25 |
+
"train_data": path + "/benchmarks/Supervised/Membrane Type/membrane_type_train.csv",
|
| 26 |
+
"test_data" : path + "/benchmarks/Supervised/Membrane Type/membrane_type_test.csv",
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# Helper functions to obtain all embeddings for a sequence
|
| 30 |
+
def load_models(esm_model_path, mlm_model_path, mdlm_model_path):
|
| 31 |
+
esm_tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 32 |
+
esm_model = AutoModel.from_pretrained(esm_model_path).to(device)
|
| 33 |
+
mlm_model = AutoModel.from_pretrained(mlm_model_path).to(device)
|
| 34 |
+
mdlm_model = AutoModel.from_pretrained(mdlm_model_path).to(device)
|
| 35 |
+
return esm_tokenizer, esm_model, mlm_model, mdlm_model
|
| 36 |
+
|
| 37 |
+
def get_latents(embedding_type, tokenizer, esm_model, mlm_model, mdlm_model, sequence, device):
|
| 38 |
+
if embedding_type == "esm":
|
| 39 |
+
inputs = tokenizer(sequence, return_tensors='pt').to(device)
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
outputs = esm_model(**inputs)
|
| 42 |
+
embeddings = outputs.last_hidden_state.squeeze(0)
|
| 43 |
+
|
| 44 |
+
elif embedding_type == "mlm":
|
| 45 |
+
inputs = tokenizer(sequence, return_tensors='pt').to(device)
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
embeddings = mlm_model(**inputs).last_hidden_state.squeeze(0)
|
| 48 |
+
|
| 49 |
+
elif embedding_type == "mdlm":
|
| 50 |
+
inputs = tokenizer(sequence, return_tensors="pt").to(device)
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
embeddings = mdlm_model(**inputs).last_hidden_state.squeeze(0)
|
| 53 |
+
|
| 54 |
+
return embeddings
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# Dataset class can load pickle file
|
| 58 |
+
class MembraneDataset(Dataset):
|
| 59 |
+
def __init__(self, embedding_type, csv_file, esm_model_path, mlm_model_path, mdlm_model_path, device):
|
| 60 |
+
self.data = pd.read_csv(csv_file)
|
| 61 |
+
self.data = self.data[self.data['Sequence'].apply(len) < 1024].reset_index(drop=True)
|
| 62 |
+
|
| 63 |
+
self.embedding_type = embedding_type
|
| 64 |
+
self.device = device
|
| 65 |
+
|
| 66 |
+
self.tokenizer, self.esm_model, self.mlm_model, self.mdlm_model = load_models(esm_model_path, mlm_model_path, mdlm_model_path)
|
| 67 |
+
|
| 68 |
+
# Create multi-class label list
|
| 69 |
+
self.data['label'] = self.data.iloc[:, 3:7].values.tolist()
|
| 70 |
+
self.data['label'] = self.data['label']
|
| 71 |
+
|
| 72 |
+
def __len__(self):
|
| 73 |
+
return len(self.data)
|
| 74 |
+
|
| 75 |
+
def __getitem__(self, idx):
|
| 76 |
+
sequence = self.data.iloc[idx]['Sequence']
|
| 77 |
+
embeddings = get_latents(self.embedding_type, self.tokenizer, self.esm_model, self.mlm_model, self.mdlm_model,
|
| 78 |
+
sequence, self.device)
|
| 79 |
+
labels = torch.tensor(self.data.iloc[idx]['label'], dtype=torch.float32)
|
| 80 |
+
|
| 81 |
+
return embeddings, labels
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# Predict localization with MLP head using pooled embeddings
|
| 85 |
+
class MembranePredictor(nn.Module):
|
| 86 |
+
def __init__(self, input_dim, num_classes):
|
| 87 |
+
super(MembranePredictor, self).__init__()
|
| 88 |
+
self.classifier = nn.Sequential(
|
| 89 |
+
nn.Linear(input_dim, 640),
|
| 90 |
+
nn.ReLU(),
|
| 91 |
+
nn.Linear(640, num_classes)
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
def forward(self, embeddings):
|
| 95 |
+
logits = self.classifier(embeddings)
|
| 96 |
+
logits = torch.mean(logits, dim=1)
|
| 97 |
+
probs = torch.sigmoid(logits)
|
| 98 |
+
return probs # pass logits of dimension 1x8 (8-class distribution) to CE loss
|
| 99 |
+
|
| 100 |
+
# Training function
|
| 101 |
+
def train(model, dataloader, optimizer, criterion, device):
|
| 102 |
+
model.train()
|
| 103 |
+
total_loss = 0
|
| 104 |
+
for embeddings, labels in tqdm(dataloader):
|
| 105 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 106 |
+
optimizer.zero_grad()
|
| 107 |
+
outputs = model(embeddings)
|
| 108 |
+
loss = criterion(outputs, labels)
|
| 109 |
+
loss.backward()
|
| 110 |
+
optimizer.step()
|
| 111 |
+
total_loss += loss.item()
|
| 112 |
+
return total_loss / len(dataloader)
|
| 113 |
+
|
| 114 |
+
# Evaluation function
|
| 115 |
+
def evaluate(model, dataloader, device):
|
| 116 |
+
model.eval()
|
| 117 |
+
preds, true_labels = [], []
|
| 118 |
+
with torch.no_grad():
|
| 119 |
+
for embeddings, labels in tqdm(dataloader):
|
| 120 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 121 |
+
outputs = model(embeddings)
|
| 122 |
+
preds.append(outputs.cpu().numpy())
|
| 123 |
+
true_labels.append(labels.cpu().numpy())
|
| 124 |
+
return preds, true_labels
|
| 125 |
+
|
| 126 |
+
# Metrics calculation
|
| 127 |
+
def calculate_metrics(preds, labels, threshold=0.5):
|
| 128 |
+
all_metrics = []
|
| 129 |
+
for pred, label in zip(preds, labels):
|
| 130 |
+
pred = (pred > threshold).astype(int)
|
| 131 |
+
|
| 132 |
+
accuracy = accuracy_score(label, pred)
|
| 133 |
+
precision = precision_score(label, pred, average='macro')
|
| 134 |
+
recall = recall_score(label, pred, average='macro')
|
| 135 |
+
f1_macro = f1_score(label, pred, average='macro')
|
| 136 |
+
f1_micro = f1_score(label, pred, average='micro')
|
| 137 |
+
|
| 138 |
+
all_metrics.append([accuracy, precision, recall, f1_macro, f1_micro])
|
| 139 |
+
|
| 140 |
+
avg_metrics = np.mean(all_metrics, axis=0)
|
| 141 |
+
return avg_metrics
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
if __name__ == "__main__":
|
| 145 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 146 |
+
|
| 147 |
+
for embedding_type in ['mdlm', 'mlm', 'esm']:
|
| 148 |
+
# Initialize datasets
|
| 149 |
+
train_dataset = MembraneDataset(embedding_type,
|
| 150 |
+
hyperparams['train_data'],
|
| 151 |
+
hyperparams['esm_model_path'],
|
| 152 |
+
hyperparams['mlm_model_path'],
|
| 153 |
+
hyperparams['mdlm_model_path'],
|
| 154 |
+
device)
|
| 155 |
+
test_dataset = MembraneDataset(embedding_type,
|
| 156 |
+
hyperparams['test_data'],
|
| 157 |
+
hyperparams['esm_model_path'],
|
| 158 |
+
hyperparams['mlm_model_path'],
|
| 159 |
+
hyperparams['mdlm_model_path'],
|
| 160 |
+
device)
|
| 161 |
+
|
| 162 |
+
# Prepare dataloaders
|
| 163 |
+
train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
|
| 164 |
+
test_dataloader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
|
| 165 |
+
|
| 166 |
+
# Initialize model, optimizer, and loss function
|
| 167 |
+
input_dim=640 if embedding_type=="mdlm" else 1280
|
| 168 |
+
model = MembranePredictor(input_dim=input_dim, num_classes=4).to(device)
|
| 169 |
+
optimizer = optim.Adam(model.parameters(), lr=hyperparams["learning_rate"])
|
| 170 |
+
criterion = nn.CrossEntropyLoss()
|
| 171 |
+
|
| 172 |
+
# Initialize main directory model checkpoints
|
| 173 |
+
base_checkpoint_dir = f"{path}/benchmarks/Supervised/Membrane Type/model_checkpoints/{embedding_type}"
|
| 174 |
+
# Initialize subdirectory and name it based on hyperparameters
|
| 175 |
+
hyperparam_str = f"batch_{hyperparams['batch_size']}_lr_{hyperparams['learning_rate']}_epochs_{hyperparams['num_epochs']}"
|
| 176 |
+
model_checkpoint_dir = os.path.join(base_checkpoint_dir, hyperparam_str)
|
| 177 |
+
os.makedirs(model_checkpoint_dir, exist_ok=True)
|
| 178 |
+
|
| 179 |
+
# Training loop
|
| 180 |
+
for epoch in range(hyperparams["num_epochs"]):
|
| 181 |
+
# Train the model
|
| 182 |
+
train_loss = train(model, train_dataloader, optimizer, criterion, device)
|
| 183 |
+
print(f"EPOCH {epoch+1}/{hyperparams['num_epochs']}")
|
| 184 |
+
print(f"TRAIN LOSS: {train_loss:.4f}")
|
| 185 |
+
print("\n")
|
| 186 |
+
|
| 187 |
+
# Save the model checkpoint for the current epoch
|
| 188 |
+
checkpoint_path = os.path.join(model_checkpoint_dir, f"epoch{epoch + 1}.pth")
|
| 189 |
+
torch.save({
|
| 190 |
+
'epoch': epoch + 1,
|
| 191 |
+
'model_state_dict': model.state_dict(),
|
| 192 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
| 193 |
+
'loss': train_loss,
|
| 194 |
+
}, checkpoint_path)
|
| 195 |
+
print(f"Checkpoint saved at {checkpoint_path}\n")
|
| 196 |
+
|
| 197 |
+
# Save hyperparameters only once
|
| 198 |
+
if epoch == 0: # Hyperparameters don't change midway through training
|
| 199 |
+
hyperparams_file = os.path.join(model_checkpoint_dir, "hyperparams.txt")
|
| 200 |
+
with open(hyperparams_file, 'w') as f:
|
| 201 |
+
for key, value in hyperparams.items():
|
| 202 |
+
f.write(f"{key}: {value}\n")
|
| 203 |
+
print(f"Hyperparameters saved at {hyperparams_file}\n")
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
# Evaluate model on test dataset
|
| 207 |
+
print("Test set")
|
| 208 |
+
test_preds, test_labels = evaluate(model, test_dataloader, device)
|
| 209 |
+
test_metrics = calculate_metrics(test_preds, test_labels)
|
| 210 |
+
print("TEST METRICS:")
|
| 211 |
+
print(f"Accuracy: {test_metrics[0]:.4f}")
|
| 212 |
+
print(f"Precision: {test_metrics[1]:.4f}")
|
| 213 |
+
print(f"Recall: {test_metrics[2]:.4f}")
|
| 214 |
+
print(f"F1 Macro Score: {test_metrics[3]:.4f}")
|
| 215 |
+
print(f"F1 Micro Score: {test_metrics[4]:.4f}")
|
| 216 |
+
|
| 217 |
+
# Save test results
|
| 218 |
+
test_results_file = os.path.join(model_checkpoint_dir, "test_results.txt")
|
| 219 |
+
with open(test_results_file, 'w') as f:
|
| 220 |
+
f.write("TEST METRICS:\n")
|
| 221 |
+
f.write(f"Accuracy: {test_metrics[0]:.4f}\n")
|
| 222 |
+
f.write(f"Precision: {test_metrics[1]:.4f}\n")
|
| 223 |
+
f.write(f"Recall: {test_metrics[2]:.4f}\n")
|
| 224 |
+
f.write(f"F1 Macro Score: {test_metrics[3]:.4f}\n")
|
| 225 |
+
f.write(f"F1 Micro: {test_metrics[4]:.4f}\n")
|
| 226 |
+
print(f"Test results saved at {test_results_file}\n")
|
benchmarks/Supervised/Membrane Type/membrane_type_test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
benchmarks/Supervised/Membrane Type/membrane_type_train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16b8eec677afa2de578d04ee1a0fc9582b2f8cfc47622cbd6374309cd6ab96f3
|
| 3 |
+
size 12335695
|
benchmarks/Supervised/Membrane Type/split_membrane_type_data.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Splits the DeepLoc 2.1 membrane type data into train/val and testing splits
|
| 2 |
+
# Partition value of "4" indicates testing data
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
path = "/workspace/a03-sgoel/MDpLM/benchmarks/DeepLoc/Membrane Type"
|
| 7 |
+
|
| 8 |
+
df = pd.read_csv(path + "/unsplit_membrane_type_all.csv")
|
| 9 |
+
df = df.drop(columns=['Unnamed: 0'])
|
| 10 |
+
|
| 11 |
+
train = df[df['Partition'] != 4]
|
| 12 |
+
test = df[df['Partition'] == 4]
|
| 13 |
+
|
| 14 |
+
train.to_csv(path + "/membrane_type_train.csv", index=False)
|
| 15 |
+
test.to_csv(path + "/membrane_type_test.csv", index=False)
|
benchmarks/Supervised/Membrane Type/unsplit_membrane_type_all.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d878da32a06092f880262048e3c1eb692721c274b0a458fcc712a0dcbd80c71
|
| 3 |
+
size 15683507
|
benchmarks/Supervised/Solubility/solubility_transformer.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.optim as optim
|
| 4 |
+
from torch.utils.data import DataLoader, Dataset
|
| 5 |
+
from transformers import AutoModel, AutoTokenizer
|
| 6 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
|
| 7 |
+
from sklearn.model_selection import ParameterGrid
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import numpy as np
|
| 11 |
+
import sys
|
| 12 |
+
import os
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
import logging
|
| 15 |
+
|
| 16 |
+
logging.getLogger("transformers").setLevel(logging.ERROR)
|
| 17 |
+
|
| 18 |
+
# Hyperparameters dictionary
|
| 19 |
+
path = "/workspace/sg666/MDpLM"
|
| 20 |
+
hyperparams = {
|
| 21 |
+
"train_data": path + "/data/membrane/train.csv",
|
| 22 |
+
"val_data": path + "/data/membrane/val.csv",
|
| 23 |
+
"test_data": path + "/data/membrane/test.csv",
|
| 24 |
+
'esm_model_path': "facebook/esm2_t33_650M_UR50D",
|
| 25 |
+
'mlm_model_path': path + "/benchmarks/MLM/model_ckpts/best_model_epoch",
|
| 26 |
+
"mdlm_model_path": path + "/checkpoints/membrane_automodel/epochs30_lr3e-4_bsz16_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params",
|
| 27 |
+
"batch_size": 1,
|
| 28 |
+
"learning_rate": 5e-5,
|
| 29 |
+
"num_epochs": 2,
|
| 30 |
+
"num_layers": 4,
|
| 31 |
+
"num_heads": 16,
|
| 32 |
+
"dropout": 0.5
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Helper functions to obtain all embeddings for a sequence
|
| 37 |
+
def load_models(esm_model_path, mlm_model_path, mdlm_model_path):
|
| 38 |
+
esm_tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
|
| 39 |
+
esm_model = AutoModel.from_pretrained(esm_model_path).to(device)
|
| 40 |
+
mlm_model = AutoModel.from_pretrained(mlm_model_path).to(device)
|
| 41 |
+
mdlm_model = AutoModel.from_pretrained(mdlm_model_path).to(device)
|
| 42 |
+
return esm_tokenizer, esm_model, mlm_model, mdlm_model
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def get_latents(embedding_type, esm_model_path, mlm_model_path, mdlm_model_path, sequence, device):
|
| 46 |
+
tokenizer, esm_model, mlm_model, mdlm_model = load_models(esm_model_path, mlm_model_path, mdlm_model_path)
|
| 47 |
+
|
| 48 |
+
if embedding_type == "esm":
|
| 49 |
+
model = esm_model
|
| 50 |
+
elif embedding_type == "mlm":
|
| 51 |
+
model = mlm_model
|
| 52 |
+
elif embedding_type == "mdlm":
|
| 53 |
+
model = mdlm_model
|
| 54 |
+
|
| 55 |
+
inputs = tokenizer(sequence.upper(), return_tensors="pt").to(device)['input_ids']
|
| 56 |
+
with torch.no_grad():
|
| 57 |
+
embeddings = model(inputs).last_hidden_state.squeeze(0)[1:-1]
|
| 58 |
+
|
| 59 |
+
return embeddings
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# Dataset class that loads embeddings and labels
|
| 63 |
+
class SolubilityDataset(Dataset):
|
| 64 |
+
def __init__(self, embedding_type, csv_file, esm_model_path, mlm_model_path, mdlm_model_path, device):
|
| 65 |
+
self.data = pd.read_csv(csv_file).head(5)
|
| 66 |
+
#self.data = self.data[self.data['Sequence'].apply(len) < 1024].reset_index(drop=True)
|
| 67 |
+
self.embedding_type = embedding_type
|
| 68 |
+
self.esm_model_path = esm_model_path
|
| 69 |
+
self.mlm_model_path = mlm_model_path
|
| 70 |
+
self.mdlm_model_path = mdlm_model_path
|
| 71 |
+
self.device = device
|
| 72 |
+
|
| 73 |
+
def __len__(self):
|
| 74 |
+
return len(self.data)
|
| 75 |
+
|
| 76 |
+
def __getitem__(self, idx):
|
| 77 |
+
sequence = self.data.iloc[idx]['Sequence']
|
| 78 |
+
seq_len = len(sequence)
|
| 79 |
+
embeddings = get_latents(self.embedding_type, self.esm_model_path, self.mlm_model_path, self.mdlm_model_path,
|
| 80 |
+
sequence, self.device)
|
| 81 |
+
# Lowercase residues = soluble, uppercase = insoluble
|
| 82 |
+
label = [0 if residue.islower() else 1 for residue in sequence]
|
| 83 |
+
labels = torch.tensor(label, dtype=torch.float32)
|
| 84 |
+
|
| 85 |
+
return embeddings, labels, seq_len
|
| 86 |
+
|
| 87 |
+
# Transformer model class
|
| 88 |
+
class SolubilityPredictor(nn.Module):
|
| 89 |
+
def __init__(self, input_dim, hidden_dim, num_heads, num_layers, dropout):
|
| 90 |
+
super(SolubilityPredictor, self).__init__()
|
| 91 |
+
#self.embedding_dim = input_dim
|
| 92 |
+
# self.self_attention = nn.MultiheadAttention(input_dim, num_heads, dropout)
|
| 93 |
+
# encoder_layer = nn.TransformerEncoderLayer(
|
| 94 |
+
# d_model=hidden_dim,
|
| 95 |
+
# nhead=num_heads,
|
| 96 |
+
# dropout=dropout,
|
| 97 |
+
# batch_first=True
|
| 98 |
+
# )
|
| 99 |
+
# self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
|
| 100 |
+
self.classifier = nn.Sequential(
|
| 101 |
+
nn.Linear(input_dim, 320),
|
| 102 |
+
nn.ReLU(),
|
| 103 |
+
nn.Linear(320, 1)
|
| 104 |
+
)
|
| 105 |
+
self.sigmoid = nn.Sigmoid()
|
| 106 |
+
|
| 107 |
+
def forward(self, embeddings):
|
| 108 |
+
#attn_out, _ = self.self_attention(embeddings, embeddings, embeddings)
|
| 109 |
+
#transformer_out = self.transformer_encoder(attn_out)#.squeeze(1).mean(dim=1)
|
| 110 |
+
#logits = self.classifier(transformer_out)
|
| 111 |
+
|
| 112 |
+
logits = self.classifier(embeddings)
|
| 113 |
+
probs = self.sigmoid(logits.squeeze(-1))
|
| 114 |
+
|
| 115 |
+
return probs # Get probabilities of dimension seq_len
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# Training function
|
| 119 |
+
def train(model, train_loader, val_loader, optimizer, criterion, device):
|
| 120 |
+
"""
|
| 121 |
+
Trains the model for a single epoch.
|
| 122 |
+
Args:
|
| 123 |
+
model (nn.Module): model that will be trained
|
| 124 |
+
dataloader (DataLoader): PyTorch DataLoader with training data
|
| 125 |
+
optimizer (torch.optim): optimizer
|
| 126 |
+
criterion (nn.Module): loss function
|
| 127 |
+
device (torch.device): device (GPU or CPU to train the model
|
| 128 |
+
Returns:
|
| 129 |
+
total_loss (float): model loss
|
| 130 |
+
"""
|
| 131 |
+
# Training loop
|
| 132 |
+
model.train()
|
| 133 |
+
train_loss = 0
|
| 134 |
+
|
| 135 |
+
prog_bar = tqdm(total=len(train_loader), leave=True, file=sys.stdout)
|
| 136 |
+
for step, batch in enumerate(train_loader, start=1):
|
| 137 |
+
embeddings, labels, seq_len = batch
|
| 138 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 139 |
+
embeddings = embeddings.squeeze(1)
|
| 140 |
+
optimizer.zero_grad()
|
| 141 |
+
outputs = model(embeddings)
|
| 142 |
+
loss = criterion(outputs, labels)
|
| 143 |
+
loss.backward()
|
| 144 |
+
optimizer.step()
|
| 145 |
+
train_loss += loss.item()
|
| 146 |
+
prog_bar.update()
|
| 147 |
+
sys.stdout.flush()
|
| 148 |
+
prog_bar.close()
|
| 149 |
+
|
| 150 |
+
# Validation loop
|
| 151 |
+
model.eval()
|
| 152 |
+
val_loss = 0.0
|
| 153 |
+
|
| 154 |
+
prog_bar = tqdm(total=len(val_loader), leave=True, file=sys.stdout)
|
| 155 |
+
for step, batch in enumerate(val_loader):
|
| 156 |
+
embeddings, labels, seq_len = batch
|
| 157 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 158 |
+
with torch.no_grad():
|
| 159 |
+
outputs = model(embeddings)
|
| 160 |
+
loss = criterion(outputs, labels)
|
| 161 |
+
val_loss += loss.item()
|
| 162 |
+
prog_bar.update()
|
| 163 |
+
sys.stdout.flush()
|
| 164 |
+
prog_bar.close()
|
| 165 |
+
|
| 166 |
+
return train_loss/len(train_loader), val_loss/len(val_loader)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
# Evaluation function
|
| 171 |
+
def evaluate(model, dataloader, device):
|
| 172 |
+
"""
|
| 173 |
+
Performs inference on a trained model
|
| 174 |
+
Args:
|
| 175 |
+
model (nn.Module): the trained model
|
| 176 |
+
dataloader (DataLoader): PyTorch DataLoader with testing data
|
| 177 |
+
device (torch.device): device (GPU or CPU) to be used for inference
|
| 178 |
+
Returns:
|
| 179 |
+
preds (list): predicted per-residue disorder labels
|
| 180 |
+
true_labels (list): ground truth per-residue disorder labels
|
| 181 |
+
"""
|
| 182 |
+
model.eval()
|
| 183 |
+
preds, true_labels = [], []
|
| 184 |
+
with torch.no_grad():
|
| 185 |
+
for embeddings, labels, seq_len in tqdm(dataloader):
|
| 186 |
+
embeddings, labels = embeddings.to(device), labels.to(device)
|
| 187 |
+
outputs = model(embeddings)
|
| 188 |
+
preds.append(outputs.cpu().numpy())
|
| 189 |
+
true_labels.append(labels.cpu().numpy())
|
| 190 |
+
return preds, true_labels
|
| 191 |
+
|
| 192 |
+
# Metrics calculation
|
| 193 |
+
def calculate_metrics(preds, labels, threshold=0.5):
|
| 194 |
+
"""
|
| 195 |
+
Calculates metrics to assess model performance
|
| 196 |
+
Args:
|
| 197 |
+
preds (list): model's predictions
|
| 198 |
+
labels (list): ground truth labels
|
| 199 |
+
threshold (float): minimum threshold a prediction must be met to be considered disordered
|
| 200 |
+
Returns:
|
| 201 |
+
accuracy (float): accuracy
|
| 202 |
+
precision (float): precision
|
| 203 |
+
recall (float): recall
|
| 204 |
+
f1 (float): F1 score
|
| 205 |
+
roc_auc (float): AUROC score
|
| 206 |
+
"""
|
| 207 |
+
flat_binary_preds, flat_prob_preds, flat_labels = [], [], []
|
| 208 |
+
|
| 209 |
+
for pred, label in zip(preds, labels):
|
| 210 |
+
flat_binary_preds.extend((pred > threshold).astype(int).flatten())
|
| 211 |
+
flat_prob_preds.extend(pred.flatten())
|
| 212 |
+
flat_labels.extend(label.flatten())
|
| 213 |
+
|
| 214 |
+
flat_binary_preds = np.array(flat_binary_preds)
|
| 215 |
+
flat_prob_preds = np.array(flat_prob_preds)
|
| 216 |
+
flat_labels = np.array(flat_labels)
|
| 217 |
+
|
| 218 |
+
accuracy = accuracy_score(flat_labels, flat_binary_preds)
|
| 219 |
+
precision = precision_score(flat_labels, flat_binary_preds)
|
| 220 |
+
recall = recall_score(flat_labels, flat_binary_preds)
|
| 221 |
+
f1 = f1_score(flat_labels, flat_binary_preds)
|
| 222 |
+
roc_auc = roc_auc_score(flat_labels, flat_prob_preds)
|
| 223 |
+
|
| 224 |
+
return accuracy, precision, recall, f1, roc_auc
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
if __name__ == "__main__":
|
| 228 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 229 |
+
print(device)
|
| 230 |
+
|
| 231 |
+
for embedding_type in ['mlm', 'esm', 'mdlm']:
|
| 232 |
+
best_val_loss = float('inf')
|
| 233 |
+
best_model = None
|
| 234 |
+
|
| 235 |
+
# Load train and test dataset
|
| 236 |
+
train_dataset = SolubilityDataset(embedding_type,
|
| 237 |
+
hyperparams['train_data'],
|
| 238 |
+
hyperparams['esm_model_path'],
|
| 239 |
+
hyperparams['mlm_model_path'],
|
| 240 |
+
hyperparams['mdlm_model_path'],
|
| 241 |
+
device)
|
| 242 |
+
test_dataset = SolubilityDataset(embedding_type,
|
| 243 |
+
hyperparams['test_data'],
|
| 244 |
+
hyperparams['esm_model_path'],
|
| 245 |
+
hyperparams['mlm_model_path'],
|
| 246 |
+
hyperparams['mdlm_model_path'],
|
| 247 |
+
device)
|
| 248 |
+
val_dataset = SolubilityDataset(embedding_type,
|
| 249 |
+
hyperparams['val_data'],
|
| 250 |
+
hyperparams['esm_model_path'],
|
| 251 |
+
hyperparams['mlm_model_path'],
|
| 252 |
+
hyperparams['mdlm_model_path'],
|
| 253 |
+
device)
|
| 254 |
+
|
| 255 |
+
# Load PyTorch datasets into DataLoaders
|
| 256 |
+
train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
|
| 257 |
+
val_dataloader = DataLoader(val_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
|
| 258 |
+
test_dataloader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
|
| 259 |
+
|
| 260 |
+
# Set device to GPU
|
| 261 |
+
|
| 262 |
+
### Grid search to explore hyperparameter space
|
| 263 |
+
# Define hyperparameters
|
| 264 |
+
param_grid = {
|
| 265 |
+
'learning_rate': [5e-4],
|
| 266 |
+
'batch_size': [1],
|
| 267 |
+
'num_heads': [4],
|
| 268 |
+
'num_layers': [2],
|
| 269 |
+
'dropout': [0.5],
|
| 270 |
+
'num_epochs': [5]
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
# Loop over the parameter grid
|
| 274 |
+
grid = ParameterGrid(param_grid)
|
| 275 |
+
for params in grid:
|
| 276 |
+
# Update hyperparameters
|
| 277 |
+
hyperparams.update(params)
|
| 278 |
+
|
| 279 |
+
# Update model with the new set of hyperparms
|
| 280 |
+
input_dim=640 if embedding_type=="mdlm" else 1280
|
| 281 |
+
hidden_dim = input_dim
|
| 282 |
+
model = SolubilityPredictor(
|
| 283 |
+
input_dim=input_dim,
|
| 284 |
+
hidden_dim=hidden_dim,
|
| 285 |
+
num_layers=hyperparams["num_layers"],
|
| 286 |
+
num_heads=hyperparams["num_heads"],
|
| 287 |
+
dropout=hyperparams['dropout']
|
| 288 |
+
)
|
| 289 |
+
model = model.to(device) # Push model to GPU
|
| 290 |
+
|
| 291 |
+
# Update optimizer
|
| 292 |
+
optimizer = optim.Adam(model.parameters(), lr=hyperparams["learning_rate"])
|
| 293 |
+
criterion = nn.BCELoss()
|
| 294 |
+
num_epochs = hyperparams['num_epochs']
|
| 295 |
+
|
| 296 |
+
# Train
|
| 297 |
+
for epoch in range(hyperparams["num_epochs"]):
|
| 298 |
+
print(f"EPOCH {epoch+1}/{hyperparams['num_epochs']}")
|
| 299 |
+
train_loss, val_loss = train(model, train_dataloader, val_dataloader, optimizer, criterion, device)
|
| 300 |
+
print(f"TRAIN LOSS: {train_loss:.4f}")
|
| 301 |
+
print(f"VALIDATION LOSS: {val_loss:.4f}\n")
|
| 302 |
+
sys.stdout.flush()
|
| 303 |
+
|
| 304 |
+
if val_loss < best_val_loss:
|
| 305 |
+
best_val_loss = val_loss
|
| 306 |
+
best_model = model.state_dict()
|
| 307 |
+
|
| 308 |
+
# Evaluate model on test sequences
|
| 309 |
+
print("TEST METRICS:")
|
| 310 |
+
test_preds, test_labels = evaluate(model, test_dataloader, device)
|
| 311 |
+
test_metrics = calculate_metrics(test_preds, test_labels)
|
| 312 |
+
print(f"Accuracy: {test_metrics[0]:.4f}")
|
| 313 |
+
print(f"Precision: {test_metrics[1]:.4f}")
|
| 314 |
+
print(f"Recall: {test_metrics[2]:.4f}")
|
| 315 |
+
print(f"F1 Score: {test_metrics[3]:.4f}")
|
| 316 |
+
print(f"ROC AUC: {test_metrics[4]:.4f}")
|
| 317 |
+
print(f"\n")
|
| 318 |
+
sys.stdout.flush()
|
| 319 |
+
|
| 320 |
+
### Save model and metrics for this hyperparameter combination
|
| 321 |
+
folder_name = f"{path}/benchmarks/Supervised/Solubility/transformer_models/{embedding_type}/lr{hyperparams['learning_rate']}_bs{hyperparams['batch_size']}_epochs{hyperparams['num_epochs']}_layers{hyperparams['num_layers']}_heads{hyperparams['num_heads']}_drpt{hyperparams['dropout']}"
|
| 322 |
+
os.makedirs(folder_name, exist_ok=True)
|
| 323 |
+
|
| 324 |
+
# Save current model for this hyperparameter combination
|
| 325 |
+
model_file_path = os.path.join(folder_name, "model.pth")
|
| 326 |
+
torch.save(model.state_dict(), model_file_path)
|
| 327 |
+
|
| 328 |
+
# Save hyperparameters and test metrics to txt file
|
| 329 |
+
output_file_path = os.path.join(folder_name, "hyperparams_and_test_results.txt")
|
| 330 |
+
with open(output_file_path, 'w') as out_file:
|
| 331 |
+
for key, value in hyperparams.items():
|
| 332 |
+
out_file.write(f"{key}: {value}\n")
|
| 333 |
+
|
| 334 |
+
out_file.write("\nTEST METRICS:\n")
|
| 335 |
+
out_file.write(f"Accuracy: {test_metrics[0]:.4f}\n")
|
| 336 |
+
out_file.write(f"Precision: {test_metrics[1]:.4f}\n")
|
| 337 |
+
out_file.write(f"Recall: {test_metrics[2]:.4f}\n")
|
| 338 |
+
out_file.write(f"F1 Score: {test_metrics[3]:.4f}\n")
|
| 339 |
+
out_file.write(f"ROC AUC: {test_metrics[4]:.4f}\n")
|
| 340 |
+
|
| 341 |
+
# Save the best model and its hyperparameters
|
| 342 |
+
if best_model is not None:
|
| 343 |
+
best_model_dir = f"{path}/benchmarks/Supervised/Solubility/transformer_models/{embedding_type}"
|
| 344 |
+
os.makedirs(best_model_dir, exist_ok=True)
|
| 345 |
+
best_model_path = os.path.join(best_model_dir, "best_model.pth")
|
| 346 |
+
torch.save(best_model, best_model_path)
|
| 347 |
+
|
| 348 |
+
# Save the hyperparameters for the best model
|
| 349 |
+
best_hyperparams_path = f"{path}/benchmarks/Supervised/Solubility/transformer_models/{embedding_type}/best_model_hyperparams.txt"
|
| 350 |
+
with open(best_hyperparams_path, 'w') as out_file:
|
| 351 |
+
out_file.write("Best Validation Loss: {:.4f}\n".format(best_val_loss))
|
| 352 |
+
for key, value in hyperparams.items():
|
| 353 |
+
out_file.write(f"{key}: {value}\n")
|
checkpoints/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "facebook/esm2_t30_150M_UR50D",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"EsmModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"emb_layer_norm_before": false,
|
| 9 |
+
"esmfold_config": null,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.0,
|
| 12 |
+
"hidden_size": 640,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 2560,
|
| 15 |
+
"is_folding_model": false,
|
| 16 |
+
"layer_norm_eps": 1e-05,
|
| 17 |
+
"mask_token_id": 32,
|
| 18 |
+
"max_position_embeddings": 1026,
|
| 19 |
+
"model_type": "esm",
|
| 20 |
+
"num_attention_heads": 20,
|
| 21 |
+
"num_hidden_layers": 30,
|
| 22 |
+
"pad_token_id": 1,
|
| 23 |
+
"position_embedding_type": "rotary",
|
| 24 |
+
"token_dropout": true,
|
| 25 |
+
"torch_dtype": "float32",
|
| 26 |
+
"transformers_version": "4.44.2",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"vocab_list": null,
|
| 29 |
+
"vocab_size": 33
|
| 30 |
+
}
|
config.yaml
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defaults:
|
| 2 |
+
- _self_
|
| 3 |
+
- /callbacks: [checkpoint_every_n_steps, checkpoint_monitor, learning_rate_monitor]
|
| 4 |
+
- /model: small
|
| 5 |
+
- /strategy: ddp
|
| 6 |
+
- /noise: loglinear
|
| 7 |
+
- /lr_scheduler: constant_warmup
|
| 8 |
+
|
| 9 |
+
mode: sample_eval # train / ppl_eval / sample_eval
|
| 10 |
+
diffusion: absorbing_state
|
| 11 |
+
backbone: membrane_esm_finetune # dit / dimamba / ar / vanilla_esm_pretrain / membrane_esm_finetune
|
| 12 |
+
parameterization: subs # subs / d3pm / sedd
|
| 13 |
+
time_conditioning: False
|
| 14 |
+
T: 0 # 0 (continuous time) / 1000
|
| 15 |
+
subs_masking: False
|
| 16 |
+
|
| 17 |
+
seed: 42
|
| 18 |
+
|
| 19 |
+
data:
|
| 20 |
+
train:
|
| 21 |
+
vanilla_esm_train_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/train.csv
|
| 22 |
+
membrane_esm_train_path: /workspace/sg666/MDpLM/data/membrane/train.csv
|
| 23 |
+
wrap: null
|
| 24 |
+
test:
|
| 25 |
+
vanilla_esm_test_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/test.csv
|
| 26 |
+
membrane_esm_test_path: /workspace/sg666/MDpLM/data/membrane/test.csv
|
| 27 |
+
wrap: null
|
| 28 |
+
valid:
|
| 29 |
+
vanilla_esm_valid_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/val.csv
|
| 30 |
+
membrane_esm_valid_path: /workspace/sg666/MDpLM/data/membrane/val.csv
|
| 31 |
+
wrap: null
|
| 32 |
+
wrapping: True
|
| 33 |
+
|
| 34 |
+
loader:
|
| 35 |
+
global_batch_size: 8
|
| 36 |
+
eval_global_batch_size: ${.global_batch_size}
|
| 37 |
+
# Note: batch_size and eval_batch_size are **per machine**
|
| 38 |
+
batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
|
| 39 |
+
eval_batch_size: ${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
|
| 40 |
+
num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"}
|
| 41 |
+
pin_memory: True
|
| 42 |
+
|
| 43 |
+
sampling:
|
| 44 |
+
predictor: ddpm_cache # analytic, ddpm, ddpm_cache
|
| 45 |
+
steps: 128
|
| 46 |
+
noise_removal: True
|
| 47 |
+
# TODO(yair): @subham, why aren't these params under `eval`?
|
| 48 |
+
num_sample_batches: 2 # Total samples: `num_gpus` * `loader.eval_batch_size` * num_sample_batches
|
| 49 |
+
num_sample_log: 2
|
| 50 |
+
semi_ar: False
|
| 51 |
+
stride_length: 1
|
| 52 |
+
num_strides: 1
|
| 53 |
+
|
| 54 |
+
training:
|
| 55 |
+
ema: 0.9999
|
| 56 |
+
antithetic_sampling: True
|
| 57 |
+
importance_sampling: False
|
| 58 |
+
sampling_eps: 1e-3
|
| 59 |
+
change_of_variables: False
|
| 60 |
+
mlm_model_path: /workspace/sg666/MDpLM/benchmarks/MLM/model_ckpts_650M/best_model_epoch
|
| 61 |
+
esm_model_path: facebook/esm2_t30_150M_UR50D
|
| 62 |
+
focus_mask: False
|
| 63 |
+
|
| 64 |
+
eval:
|
| 65 |
+
checkpoint_path: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/eos-wrapping_epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/checkpoints/best.ckpt # Used to evaluate a checkpoint after training.
|
| 66 |
+
disable_ema: False
|
| 67 |
+
compute_generative_perplexity: False
|
| 68 |
+
perplexity_batch_size: 8
|
| 69 |
+
compute_perplexity_on_sanity: False
|
| 70 |
+
gen_ppl_eval_model_name_or_path: gpt2-large # gpt2-large, meta-llama/Llama-2-7b-hf
|
| 71 |
+
generate_samples: True
|
| 72 |
+
generation_model: /workspace/sg666/MDpLM/checkpoints/membrane_automodel/epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/
|
| 73 |
+
|
| 74 |
+
optim:
|
| 75 |
+
weight_decay: 0.075
|
| 76 |
+
lr: 3e-4
|
| 77 |
+
beta1: 0.9
|
| 78 |
+
beta2: 0.999
|
| 79 |
+
eps: 1e-8
|
| 80 |
+
|
| 81 |
+
Model:
|
| 82 |
+
hidden_size: 1280
|
| 83 |
+
cond_dim: 256
|
| 84 |
+
n_heads: 20
|
| 85 |
+
n_blocks: 4
|
| 86 |
+
dropout: 0.5
|
| 87 |
+
length: null #512
|
| 88 |
+
scale_by_sigma: True
|
| 89 |
+
|
| 90 |
+
trainer:
|
| 91 |
+
_target_: lightning.Trainer
|
| 92 |
+
accelerator: cuda
|
| 93 |
+
num_nodes: 1
|
| 94 |
+
devices: ${device_count:}
|
| 95 |
+
accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}
|
| 96 |
+
gradient_clip_val: 1.0
|
| 97 |
+
precision: bf16
|
| 98 |
+
num_sanity_val_steps: 2
|
| 99 |
+
max_epochs: 60
|
| 100 |
+
max_steps: 1_000_000
|
| 101 |
+
log_every_n_steps: 10
|
| 102 |
+
limit_train_batches: 1.0 # train on full dataset, can be used to toggle quick run
|
| 103 |
+
limit_val_batches: 1.0 # validate on full dataset, can be used to toggle quick run
|
| 104 |
+
val_check_interval: 955
|
| 105 |
+
|
| 106 |
+
wandb:
|
| 107 |
+
project: MDpLM_finetune_membrane_200k-seqs
|
| 108 |
+
notes: null
|
| 109 |
+
group: programmablebio
|
| 110 |
+
job_type: null
|
| 111 |
+
name: dit_test #dit_wrapping_epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16
|
| 112 |
+
id: ${.name}_${seed}
|
| 113 |
+
|
| 114 |
+
hydra:
|
| 115 |
+
run:
|
| 116 |
+
dir: /workspace/sg666/MDpLM/outputs/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S}
|
| 117 |
+
job:
|
| 118 |
+
chdir: true
|
| 119 |
+
|
| 120 |
+
checkpointing:
|
| 121 |
+
# Use custom `save_dir` if, e.g., saving to S3 bucket, otherwise leave this parameter as is
|
| 122 |
+
save_dir: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/
|
| 123 |
+
# Note: `checkpoints` path should correspond to `checkpoint_every_n_steps.dirpath`
|
| 124 |
+
resume_from_ckpt: false
|
| 125 |
+
resume_ckpt_path: ${.save_dir}/epochs30_lr3e-4_bsz8_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params_no-compile/checkpoints/last.ckpt #/checkpoints/last.ckpt
|
| 126 |
+
pretrained_esm_mdlm_automodel_path: /workspace/sg666/MDpLM/checkpoints/vanilla_esm_pretrained_automodel/epochs10_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/
|
| 127 |
+
finetuned_esm_mdlm_automodel_path: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/
|
data/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
data/membrane/test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/membrane/train.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/membrane/val.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/uniref/100k_seqs/check_data.ipynb
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 21,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import pandas as pd\n",
|
| 10 |
+
"import seaborn as sns\n",
|
| 11 |
+
"import matplotlib.pyplot as plt\n",
|
| 12 |
+
"import numpy as np"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": 4,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [],
|
| 20 |
+
"source": [
|
| 21 |
+
"path = \"/home/sg666/MDpLM/data/uniref50/100k_seqs\""
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 5,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"train = pd.read_csv(path + \"/train.csv\")\n",
|
| 31 |
+
"test = pd.read_csv(path + \"/test.csv\")\n",
|
| 32 |
+
"val = pd.read_csv(path + '/val.csv')"
|
| 33 |
+
]
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"cell_type": "code",
|
| 37 |
+
"execution_count": 23,
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"outputs": [],
|
| 40 |
+
"source": [
|
| 41 |
+
"valid_residues = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V']\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"for df in [train, test, val]:\n",
|
| 44 |
+
" df['Length'] = df['Sequence'].str.len()\n",
|
| 45 |
+
"\n",
|
| 46 |
+
" for residue in valid_residues:\n",
|
| 47 |
+
" df[residue] = 0\n",
|
| 48 |
+
"\n",
|
| 49 |
+
" for idx, row in df.iterrows():\n",
|
| 50 |
+
" sequence = row['Sequence']\n",
|
| 51 |
+
"\n",
|
| 52 |
+
" for residue in valid_residues:\n",
|
| 53 |
+
" df.at[idx, residue] = sequence.count(residue)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": 28,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [
|
| 61 |
+
{
|
| 62 |
+
"data": {
|
| 63 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/IAAAIjCAYAAACgdyAGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABst0lEQVR4nO3dd3yNd//H8fdJIhOxEzsxagZFqRlqJDVqr1IrVW3NWsVttVqzlNaqu0Zp7VWtonYVpTatomoTMWOVkFy/P/xybkdOhkhycvF6Ph7nwfle33Ndn+vKlZPzPt9rWAzDMAQAAAAAAEzBydEFAAAAAACAhCPIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIA3gq7du3l5+fX7Iv5+HDh+rXr59y584tJycnNWzYMNmX+aSoqCgVL15cn376aYovOzZ+fn6qV6+eo8tAHP7880+5uLjo8OHDji7lmVksFg0bNszRZbyQNm/eLIvFos2bN6fock+dOiWLxaLZs2en6HITavbs2bJYLDp16lS8ff38/NS+fftkrwnP5ml+pgD+hyAPPIeGDRsmi8WiK1eu2J1evHhxVatWLcmWZ7FYbB7p06dXYGCgVq1aleh5zpw5U2PHjlXTpk31zTff6IMPPpD06IPZk8uzWCx69913Y8zjxo0beuedd5Q1a1Z5eXmpevXq2rt3b4JrmD9/vs6ePauuXbta227fvq2hQ4cqODhYmTJlivcD75EjRxQcHKy0adMqU6ZMeuutt3T58uUY/aKiojRmzBj5+/vL3d1dJUqU0Pz58xNca0JERUVpzpw5Kl++vDJlyqR06dLppZdeUtu2bfXbb78l6bJeZEWLFlXdunU1ZMiQZJl/9Ife+B4p8YXbs4rt9/nJR1KFyhEjRmjFihVJMq/U4I033pCnp6du3boVa5/WrVvL1dVVV69eTdJlR3/REP1wdnZWtmzZ1LRpUx05ciRJl2U2T26bxx8tW7Z0dHkAnhMuji4AgLn897//VVRUVIz2WrVqqW3btjIMQ6dPn9bUqVNVv359rV69WkFBQU+9nI0bNypnzpz6/PPPY0wrVaqUevfubdP20ksv2TyPiopS3bp1deDAAfXt21dZsmTRlClTVK1aNe3Zs0cFCxaMt4axY8eqZcuW8vb2trZduXJFH3/8sfLkyaOSJUvGOVp27tw5Va1aVd7e3hoxYoRu376tzz77TIcOHdKuXbvk6upq7fuf//xHo0aNUqdOnfTKK6/o+++/15tvvpmkH/y6d++uyZMnq0GDBmrdurVcXFx09OhRrV69Wvny5dOrr76aJMuB9O6776pOnTo6ceKE8ufPn6Tzrlq1qubOnWvT9vbbb6tcuXJ65513rG1p06Z95mX9+++/cnFJvo8KEyZM0O3bt63Pf/rpJ82fP1+ff/65smTJYm2vWLFikixvxIgRatq0qUOO8EkOrVu31g8//KDly5erbdu2MabfvXtX33//vYKDg5U5c+ZkqaF79+565ZVX9ODBAx08eFDTpk3T5s2bdfjwYfn6+ib58t566y21bNlSbm5uST7vpBa9bR5nhi/YUpqZfqZAqmIAeO4MHTrUkGRcvnzZ7vRixYoZgYGBSbY8SUaXLl1s2v78809DkvH6668nap7Vq1c3ihUrFqM9b968Rt26deN9/cKFCw1JxuLFi61tYWFhRoYMGYxWrVrF+/q9e/cakoz169fbtN+7d8+4ePGiYRiG8fvvvxuSjFmzZtmdx3vvvWd4eHgYp0+ftratW7fOkGR89dVX1rZz584ZadKksdmGUVFRRpUqVYxcuXIZDx8+tLYndP2fFBoaalgsFqNTp04xpkVFRRmXLl166nkidhEREUbGjBmNwYMHp8jyvLy8jHbt2sXZ58GDB8b9+/dTpJ7EGjt2rCHJOHnyZLLMPyHbKbXYtGmTIcnYtGlTrH3u3r1rpEuXzggKCrI7fd68eYYkY8GCBQle7smTJ+N8X3uyvsffYw3DMKZOnWpIMkaPHp3gZSaXvHnzOuTnHdu2iYsZfj8BpC4cWg/AehjgokWL9OmnnypXrlxyd3dXjRo19Pfff9v0Teg58kWKFFGWLFl04sQJm/b79+9r6NChKlCggNzc3JQ7d27169dP9+/fl/S/8zM3bdqkP/74w3o44pMj3xEREbpz506sy1+yZIl8fHzUuHFja1vWrFnVvHlzff/999blxWbFihVydXVV1apVbdrd3NwSPMq0dOlS1atXT3ny5LG21axZUy+99JIWLVpkbfv+++/14MEDvf/++9Y2i8Wi9957T+fOndOOHTviXM4333wjFxcX9e3bN9Y+J0+elGEYqlSpUoxpFotF2bJls2m7ceOGevbsqdy5c8vNzU0FChTQ6NGjYxyNcePGDbVv317e3t7KkCGD2rVrp/3798c4HLpatWp2T+ewtz9FRUVpwoQJKlasmNzd3eXj46POnTvr+vXrNv2irxfw66+/qly5cnJ3d1e+fPk0Z86cGMu5ceOGPvjgA/n5+cnNzU25cuVS27ZtbU4/iW/fjLZu3TpVrlxZGTJkUNq0aVWoUCENHDjQpk+aNGlUrVo1ff/99zFqSQnRv0efffaZJkyYoPz588vNzU1//vmnIiIiNGTIEJUpU0be3t7y8vJSlSpVtGnTphjzefIc+ejTdv7++2+1b99eGTJkkLe3tzp06KC7d+8m2/p8++23KlOmjDw8PJQpUya1bNlSZ8+etelz/PhxNWnSRL6+vnJ3d1euXLnUsmVLhYeHW9flzp07+uabb6zvK4k5f/r7779X3bp1lSNHDrm5uSl//vwaPny4IiMjbfpVq1ZNxYsX159//qnq1avL09NTOXPm1JgxY2LM89y5c2rYsKG8vLyULVs2ffDBB/G+R0mSh4eHGjdurA0bNigsLCzG9Hnz5ildunR64403dO3aNfXp00cBAQFKmzat0qdPr9dff10HDhx46m0QlypVqkhSjPf+8+fPq2PHjvLx8ZGbm5uKFSummTNnxnj9l19+qWLFisnT01MZM2ZU2bJlNW/ePOt0e+dTG4ahTz75RLly5ZKnp6eqV6+uP/74I8a8o/ffJ8V2jvbq1atVpUoVeXl5KV26dKpbt67d+T6tuH4/Jemvv/5S06ZNlSlTJrm7u6ts2bJauXJljPn88ccfeu211+Th4aFcuXLpk08+0cyZM2OsS2zXurB3DYGEvPc/Xv/06dOt9b/yyiv6/fffYyznr7/+UvPmzZU1a1Z5eHioUKFC+s9//mOd/izbPzQ0VB06dFCuXLnk5uam7Nmzq0GDBpxvjxcCh9YDsBo1apScnJzUp08fhYeHa8yYMWrdurV27tz51PMKDw/X9evXbQ4rjoqK0htvvKFff/1V77zzjooUKaJDhw7p888/17Fjx7RixQplzZpVc+fO1aeffqrbt29r5MiRkh59MRBt48aN8vT0VGRkpPLmzasPPvhAPXr0sFn+vn37VLp0aTk52X5fWa5cOU2fPl3Hjh1TQEBArPVv375dxYsXV5o0aZ563aVHH1rDwsJUtmzZGNPKlSunn376yaZWLy8vm3WM7hc9vXLlynaXM336dL377rsaOHCgPvnkk1jryZs3ryRp8eLFatasmTw9PWPte/fuXQUGBur8+fPq3Lmz8uTJo+3bt2vAgAG6ePGiJkyYIOnRh+cGDRro119/1bvvvqsiRYpo+fLlateuXazzTojOnTtr9uzZ6tChg7p3766TJ09q0qRJ2rdvn7Zt22bzM/n777/VtGlThYSEqF27dpo5c6bat2+vMmXKqFixYpIeXdegSpUqOnLkiDp27KjSpUvrypUrWrlypc6dO6csWbIkaN+UHn1wrlevnkqUKKGPP/5Ybm5u+vvvv7Vt27YY61GmTBl9//33unnzptKnT/9M2ySxZs2apXv37umdd96Rm5ubMmXKpJs3b+rrr79Wq1at1KlTJ926dUszZsxQUFCQdu3apVKlSsU73+bNm8vf318jR47U3r179fXXXytbtmwaPXp0kq/Dp59+qsGDB6t58+Z6++23dfnyZX355ZeqWrWq9u3bpwwZMigiIkJBQUG6f/++unXrJl9fX50/f14//vijbty4IW9vb82dOzfGKQiJOe1h9uzZSps2rXr16qW0adNq48aNGjJkiG7evKmxY8fa9L1+/bqCg4PVuHFjNW/eXEuWLNGHH36ogIAAvf7665Ienb5Qo0YNnTlzRt27d1eOHDk0d+5cbdy4MUH1tG7dWt98840WLVpkcz2Pa9euae3atWrVqpU8PDz0xx9/aMWKFWrWrJn8/f116dIlffXVVwoMDNSff/6pHDlyPPW2sCc6QGXMmNHadunSJb366quyWCzq2rWrsmbNqtWrVyskJEQ3b95Uz549JT06dat79+5q2rSpevTooXv37ungwYPauXOn3nzzzViXOWTIEH3yySeqU6eO6tSpo71796p27dqKiIhI9HrMnTtX7dq1U1BQkEaPHq27d+9q6tSpqly5svbt25egL7Rv3boV41o1mTJlsv7f3u/nH3/8oUqVKilnzpzq37+/vLy8tGjRIjVs2FBLly5Vo0aNJD0KsNWrV9fDhw+t/aZPny4PD49Er3NC3/ujzZs3T7du3VLnzp1lsVg0ZswYNW7cWP/884/1ffrgwYOqUqWK0qRJo3feeUd+fn46ceKEfvjhhzgvJJvQ7d+kSRP98ccf6tatm/z8/BQWFqZ169bpzJkznMaA55+DjwgAkAye9tD66MMAixQpYnNo38SJEw1JxqFDh6xt7dq1M/LmzWszP0lGSEiIcfnyZSMsLMzYvXu3ERwcbEgyxo4da+03d+5cw8nJydi6davN66dNm2ZIMrZt22ZtCwwMtHtoff369Y3Ro0cbK1asMGbMmGFUqVLFkGT069fPpp+Xl5fRsWPHGK9ftWqVIclYs2aN3W0TLVeuXEaTJk3i7BPXofXR0+bMmRNjWt++fQ1Jxr179wzDMIy6desa+fLli9Hvzp07hiSjf//+1rbHD62fOHGiYbFYjOHDh8dZZ7S2bdsakoyMGTMajRo1Mj777DPjyJEjMfoNHz7c8PLyMo4dO2bT3r9/f8PZ2dk4c+aMYRiGsWLFCkOSMWbMGGufhw8fWn8mj2+XwMBAu6dzPLk/bd261ZBkfPfddzb91qxZE6M9b968hiTjl19+sbaFhYUZbm5uRu/eva1tQ4YMMSQZy5Yti7H8qKgowzASvm9+/vnncf5uPS76sOadO3fG2/dZPXnIePTh0enTpzfCwsJs+j58+DDGIbzXr183fHx8YvzOSDKGDh1qfR793vJkv0aNGhmZM2d+5vV48tD6U6dOGc7Ozsann35q0+/QoUOGi4uLtX3fvn0JOpQ5KQ6tv3v3boy2zp07G56entbfacN4tM8/+R5w//59w9fX1+a9ZcKECYYkY9GiRda2O3fuGAUKFIj30HrDePTzzJ49u1GhQgWb9uh9d+3atYZhPDotKDIy0qbPyZMnDTc3N+Pjjz+2aYvtfe1x0X83Zs6caVy+fNm4cOGCsWbNGqNAgQKGxWIxdu3aZe0bEhJiZM+e3bhy5YrNPFq2bGl4e3tbt2mDBg3svu8/btasWTb7SFhYmOHq6mrUrVvX+vtsGIYxcOBAQ5LNzzt6/41vnrdu3TIyZMgQ41Sk0NBQw9vb2+4pSva2jb3HyZMn4/z9rFGjhhEQEGCzL0VFRRkVK1Y0ChYsaG3r2bNnjPeXsLAww9vbO8bpKU/+Hkd78tSDhL73R9efOXNm49q1a9Z+33//vSHJ+OGHH6xtVatWNdKlS2dziln0OkVL7Pa/fv16jM8ZwIuEQ+sBWHXo0MHmAmzRh0j+888/8b52xowZypo1q7Jly6ayZctqw4YN6tevn3r16mXts3jxYhUpUkSFCxfWlStXrI/XXntNkuwe2vuklStXql+/fmrQoIE6duyoLVu2KCgoSOPHj9e5c+es/f7991+7F85xd3e3To/L1atXbUaUnlb0/BNSQ2JqHTNmjHr06KHRo0dr0KBBCapp1qxZmjRpkvz9/bV8+XL16dNHRYoUUY0aNXT+/Hlrv8WLF6tKlSrKmDGjzc+pZs2aioyM1C+//CLp0YXJXFxc9N5771lf6+zsrG7duiWoHnsWL14sb29v1apVy2bZZcqUUdq0aWPsI0WLFrXup9Kj0ycKFSpks88uXbpUJUuWtI5kPS76MNuE7psZMmSQ9OjwansXfXxc9P4T290jUkKTJk2UNWtWmzZnZ2fr73lUVJSuXbumhw8fqmzZsgm+q8OTd4moUqWKrl69qps3byZN4f9v2bJlioqKUvPmzW1+Lr6+vipYsKD15xJ9Qcq1a9cm6yH+kmxGPKNHXKtUqaK7d+/qr7/+sumbNm1atWnTxvrc1dVV5cqVs9k/f/rpJ2XPnl1Nmza1tnl6etpcuDAuzs7OatmypXbs2GFzOPG8efPk4+OjGjVqSHr0XhR9hFJkZKSuXr1qPTXkae7m8aSOHTsqa9asypEjh4KDgxUeHq65c+daL/JmGIaWLl2q+vXryzAMm59jUFCQwsPDrcvPkCGDzp07Z/fw7NisX79eERER6tatm81h89Gj/Imxbt063bhxQ61atbKp19nZWeXLl0/Q3yrp0ZEC69ats3k8fmrWk7+f165d08aNG9W8eXPrvnXlyhVdvXpVQUFBOn78uPW9+qefftKrr75qPXJLevT+17p160Svd0Lf+6O1aNHC5u/kk58ZLl++rF9++UUdO3a0OcVMkt1THKIldPt7eHjI1dVVmzdvjnHqFfAi4NB64AVl74/ok39oo/9AJ+QPZIMGDdS1a1dFRETo999/14gRI3T37l2bQ9uPHz+uI0eOxAgW0eyd4xkfi8WiDz74QGvXrtXmzZutH5o9PDzsnmN679496/T4GIbx1PVEi55/Qmp42lq3bNmiVatW6cMPP4zzvPgnOTk5qUuXLurSpYuuXr2qbdu2adq0aVq9erVatmyprVu3Snr0czp48GC8P6fTp08re/bsMa6OXqhQoQTX9KTjx48rPDw8xjn7Ty472pP7rPRov318nz1x4oSaNGkS73ITsm+2aNFCX3/9td5++231799fNWrUUOPGjdW0adMYp3FE7z9xfWCNiIjQtWvX7E5LmzbtM1953t/f3277N998o3Hjxumvv/7SgwcP4u3/pLjeK5LyNILjx4/LMIxY7zIRffiuv7+/evXqpfHjx+u7775TlSpV9MYbb6hNmzY2d51ICn/88YcGDRqkjRs3xvjiIvp8/Gi5cuWK8fPPmDGjDh48aH1++vRpFShQIEa/p/k9at26tT7//HPNmzdPAwcO1Llz57R161Z1795dzs7Okh59aTNx4kRNmTJFJ0+etDmn/1muaD9kyBBVqVJFt2/f1vLly7VgwQKb34XLly/rxo0bmj59uqZPn253HtG/Xx9++KHWr1+vcuXKqUCBAqpdu7befPNNu9f2iHb69GlJirGPZM2aNdFfxh4/flySrF/kPSmh+3hAQIBq1qwZ6/Qnf9/+/vtvGYahwYMHa/DgwXZfExYWppw5c+r06dMqX758jOnP+v6bkPf+aPF9ZogO9MWLF3/qOqT4t7+bm5tGjx6t3r17y8fHR6+++qrq1auntm3bJssdE4DUhiAPPIfiG3W+e/eutc/joj/wPSkhgTZXrlzWDyx16tRRlixZ1LVrV1WvXt16wbmoqCgFBARo/PjxdueRO3fueJcT1+seD0TZs2fXxYsXY/SNbovvfNDMmTM/0zf82bNnt1nekzVkypTJOgqfPXt2bdq0SYZh2HyYj63WYsWK6caNG5o7d646d+6c4PD1uMyZM+uNN97QG2+8oWrVqmnLli06ffq08ubNq6ioKNWqVUv9+vWz+9onb/WXEBaLxe5+9OQFwqKiopQtWzZ99913dudjb3TZnqf9Eiah+6aHh4d++eUXbdq0SatWrdKaNWu0cOFCvfbaa/r5559t6onefx6/jdqTtm/frurVq9udNnToULsXqHoa9r6w+vbbb9W+fXs1bNhQffv2VbZs2eTs7KyRI0fGuEBZbJJqu8cnKipKFotFq1evtrvMx7/oGDdunNq3b6/vv/9eP//8s7p3766RI0fqt99+U65cuZKknhs3bigwMFDp06fXxx9/rPz588vd3V179+7Vhx9+GOMojZTaTmXKlFHhwoU1f/58DRw4UPPnz5dhGDajsyNGjNDgwYPVsWNHDR8+XJkyZZKTk5N69uwZ79ElcXk8rDZs2FB3795Vp06dVLlyZeXOnds67zZt2sR6/YwSJUpIenQtlKNHj+rHH3/UmjVrtHTpUk2ZMkVDhgzRRx99lOgao8X2pZq99yHp0Xna9gJhUt2S8cnfz+jl9unTJ9bbthYoUCBJli3ZX++nee9Prv37abZ/z549Vb9+fa1YsUJr167V4MGDNXLkSG3cuFEvv/zyM9UBpHYEeeA5FH1hs6NHj8YIx3fv3tXZs2dVu3btZK2hc+fO+vzzzzVo0CA1atRIFotF+fPn14EDB1SjRo04RymfVvS3/o+HvFKlSmnr1q2KioqyGR3auXOnPD094w2jhQsX1smTJxNdU86cOZU1a1bt3r07xrQnLyhWqlQpff311zpy5IiKFi1qU2v09MdlyZJFS5YsUeXKlVWjRg39+uuvz3ShqrJly2rLli26ePGi8ubNq/z58+v27dtxjiRJj/azDRs26Pbt2zaB6ujRozH6ZsyY0e4pGtGjadHy58+v9evXq1KlSs900aYn53n48OF4+yR033RyclKNGjVUo0YNjR8/XiNGjNB//vMfbdq0yWabnTx5Uk5OTnHuayVLltS6devsTsuXL1+cdSTWkiVLlC9fPi1btsxmXYcOHZosy3sW+fPnl2EY8vf3T9AXSAEBAQoICNCgQYO0fft2VapUSdOmTbNeCPJZ33c2b96sq1evatmyZTZ3tHiW94q8efPq8OHDMb7Is/d7FJfWrVtr8ODBOnjwoObNm6eCBQva3MN8yZIlql69umbMmGHzuhs3bsT5ZdPTGjVqlJYvX65PP/1U06ZNU9asWZUuXTpFRkbG+54iSV5eXmrRooVatGihiIgINW7cWJ9++qkGDBhg9wvo6L93x48ft/mduXz5cowvY6NHjG/cuGE9TUay/z4kSdmyZUtQzUkluv40adIk6P03euT6cbG9/964ccOmLSIiIsYXzQl970+o6PWJ7/33SU+7/fPnz6/evXurd+/eOn78uEqVKqVx48bp22+/ffqiARPhHHngOVSjRg25urpq6tSpMUZapk+frocPH1qvmJxcXFxc1Lt3bx05csR6C67mzZvr/Pnz+u9//xuj/7///hvn7eSkRyPuT44gPHjwQKNGjZKrq6vNyGbTpk116dIlLVu2zNp25coVLV68WPXr17d7TvrjKlSooMOHDyfoFlCxadKkiX788Ueb22Rt2LBBx44dU7NmzaxtDRo0UJo0aTRlyhRrm2EYmjZtmnLmzKmKFSvGmHeuXLm0fv16/fvvv6pVq5auXr0aZy2hoaHWWxs9LiIiQhs2bJCTk5N1pKd58+basWOH1q5dG6P/jRs39PDhQ0mPjrx4+PChpk6dap0eGRmpL7/8Msbr8ufPr7/++kuXL1+2th04cCDG1d6bN2+uyMhIDR8+PMY8Hj58GOPDaEI0adJEBw4c0PLly2NMix45Sui+ae8w+OgvWp7cV/bs2aNixYrFeWh3xowZVbNmTbuP5Ary0aNoj4+a7dy5M97bHDpC48aN5ezsrI8++ijGKJ9hGNb9/ubNm9b9MlpAQICcnJxsfi5eXl6J2oei2dt2ERERNr+7T6tOnTq6cOGClixZYm27e/durIehxyZ69H3IkCHav39/jHOlnZ2dY2zDxYsX21wfIynkz59fTZo00ezZsxUaGipnZ2c1adJES5cutRvoHn9PePJ9zNXVVUWLFpVhGDangDyuZs2aSpMmjb788kub9XvyCuvRtUmyOdc7+paEjwsKClL69Ok1YsQIu8t9vOaklC1bNlWrVk1fffWV3aO5Hl9unTp19Ntvv2nXrl020+0dzZQ/f/4Y57dPnz49xt/ThL73J1TWrFlVtWpVzZw5U2fOnLGZFteofUK3/927d62noEXLnz+/0qVL90x/uwGzYEQeeA5ly5ZNQ4YM0aBBg1S1alW98cYb8vT01Pbt2zV//nzVrl1b9evXT/Y62rdvryFDhmj06NFq2LCh3nrrLS1atEjvvvuuNm3apEqVKikyMlJ//fWXFi1apLVr19q9XVu0lStX6pNPPlHTpk3l7++va9euad68eTp8+LBGjBhhcwhe06ZN9eqrr6pDhw76888/lSVLFk2ZMkWRkZEJOkSzQYMGGj58uLZs2RLj6IVJkybpxo0bunDhgiTphx9+sF5or1u3btbgNnDgQC1evFjVq1dXjx49dPv2bY0dO1YBAQHq0KGDdX65cuVSz549NXbsWD148ECvvPKKVqxYoa1bt+q7776L9fDFAgUK6Oeff1a1atUUFBSkjRs3xnru5rlz51SuXDm99tprqlGjhnx9fRUWFqb58+frwIED6tmzp3VUrm/fvlq5cqXq1atnvZXbnTt3dOjQIS1ZskSnTp1SlixZVL9+fVWqVEn9+/fXqVOnVLRoUS1btizGecLSowtijR8/XkFBQQoJCVFYWJimTZumYsWK2ZxnHBgYqM6dO2vkyJHav3+/ateurTRp0uj48eNavHixJk6caHNRsITo27evlixZombNmqljx44qU6aMrl27ppUrV2ratGkqWbJkgvfNjz/+WL/88ovq1q2rvHnzKiwsTFOmTFGuXLlsbhH44MEDbdmyRe+///5T1ZoS6tWrp2XLlqlRo0aqW7euTp48qWnTpqlo0aK6fft2ki1n2LBh+uijj7Rp0yZVq1YtUfPInz+/PvnkEw0YMECnTp1Sw4YNlS5dOp08eVLLly/XO++8oz59+mjjxo3q2rWrmjVrppdeekkPHz7U3LlzrSEyWpkyZbR+/XqNHz9eOXLkkL+/v/U8Y4vFosDAQG3evDnWeipWrKiMGTOqXbt26t69uywWi+bOnftMhxJ36tRJkyZNUtu2bbVnzx5lz55dc+fOjfMWkfb4+/urYsWK1i9Onwzy9erV08cff6wOHTqoYsWKOnTokL777rtk+cKob9++WrRokSZMmKBRo0Zp1KhR2rRpk8qXL69OnTqpaNGiunbtmvbu3av169dbvyCrXbu2fH19ValSJfn4+OjIkSOaNGmS6tatq3Tp0tldVtasWdWnTx+NHDlS9erVU506dbRv3z6tXr06xpEGtWvXVp48eRQSEqK+ffvK2dlZM2fOVNasWW2CZvr06TV16lS99dZbKl26tFq2bGnts2rVKlWqVEmTJk1K8u0mSZMnT1blypUVEBCgTp06KV++fLp06ZJ27Nihc+fO6cCBA5Kkfv36ae7cuQoODlaPHj2st5/LmzevzTUYJOntt9/Wu+++qyZNmqhWrVo6cOCA1q5dG2P7JPS9/2l88cUXqly5skqXLq133nlH/v7+OnXqlFatWqX9+/fbfU1Ct/+xY8dUo0YNNW/eXEWLFpWLi4uWL1+uS5cuqWXLlk9VJ2BKKXR1fAAO8O233xqvvvqq4eXlZbi5uRmFCxc2PvroI5vb2hjG/26V8+Stm+zdhii228916dLFbg3Dhg2zuYVSRESEMXr0aKNYsWKGm5ubkTFjRqNMmTLGRx99ZISHh1tfZ+/2c7t37zbq169v5MyZ03B1dTXSpk1rVK5c2ea2TY+7du2aERISYmTOnNnw9PQ0AgMDjd9//z2uTWajRIkSRkhISIz26Nue2Xs8fssfwzCMw4cPG7Vr1zY8PT2NDBkyGK1btzZCQ0NjzDMyMtIYMWKEkTdvXsPV1dUoVqyY8e2339pddvTt56Lt3LnTSJcunVG1alW7t8YyDMO4efOmMXHiRCMoKMjIlSuXkSZNGiNdunRGhQoVjP/+9782twIyjEe3/xkwYIBRoEABw9XV1ciSJYtRsWJF47PPPjMiIiKs/a5evWq89dZbRvr06Q1vb2/jrbfest4K7MnbV3377bdGvnz5DFdXV6NUqVLG2rVr7e5PhmEY06dPN8qUKWN4eHgY6dKlMwICAox+/foZFy5ciHNbGIb9W91dvXrV6Nq1q3XfyZUrl9GuXTub22ElZN/csGGD0aBBAyNHjhyGq6urkSNHDqNVq1Yxbte0evVqQ5Jx/Phxuz+PpBbb7efs3ZYpKirKuq+5ubkZL7/8svHjjz/G+rtt7/ZzT95+78nbRxmGYfTu3duwWCx2b3EYmydvPxdt6dKlRuXKlQ0vLy/Dy8vLKFy4sNGlSxfj6NGjhmEYxj///GN07NjRyJ8/v+Hu7m5kypTJqF69urF+/Xqb+fz1119G1apVDQ8PD5tbk926dcuQZLRs2TLeGrdt22a8+uqrhoeHh5EjRw6jX79+xtq1a2PcKi62W2ja286nT5823njjDcPT09PIkiWL0aNHD+stF+O7/dzjJk+ebEgyypUrF2PavXv3jN69exvZs2c3PDw8jEqVKhk7duyI8fvytLefi+2Wf9WqVTPSp09v3LhxwzAMw7h06ZLRpUsXI3fu3EaaNGkMX19fo0aNGsb06dOtr/nqq6+MqlWrGpkzZzbc3NyM/PnzG3379rX522BvX4uMjDQ++ugj67pVq1bNOHz4cIzbqxmGYezZs8coX7684erqauTJk8cYP3683XlGr2NQUJDh7e1tuLu7G/nz5zfat29v7N69+5m2TVy/n4ZhGCdOnDDatm1r+Pr6GmnSpDFy5sxp1KtXz1iyZIlNv4MHDxqBgYGGu7u7kTNnTmP48OHGjBkz7G6fDz/80MiSJYvh6elpBAUFGX///bfd7ZOQ9/646n/yPcMwHv0dbNSokZEhQwbD3d3dKFSokDF48GDr9MRu/ytXrhhdunQxChcubHh5eRne3t5G+fLlY/1MADxvLIaRxFdcAYDnxNy5c9WlSxedOXPG5nxKxO3UqVPy9/fXrFmz1L59e0eX4xANGzaUxWKxezj/i6JcuXLKmzevFi9e7OhS4vXTTz+pXr16OnDggAICAhxdDpBos2fPVocOHXTy5En5+fk5uhwAyYhz5AEgFq1bt1aePHk0efJkR5cCEzly5Ih+/PFHu+f5vyhu3rypAwcO6OOPP3Z0KQmyadMmtWzZkhAPADANzpEHgFg4OTk99dV2gSJFijz1RaGeN+nTpzfVxabGjh3r6BIAAHgqjMgDAAAAAGAinCMPAAAAAICJMCIPAAAAAICJEOQBAAAAADARLnZnR1RUlC5cuKB06dLJYrE4uhwAAAAAwHPOMAzdunVLOXLkkJNT3GPuBHk7Lly4oNy5czu6DAAAAADAC+bs2bPKlStXnH0I8nakS5dO0qMNmD59egdXAwAAAAB43t28eVO5c+e25tG4EOTtiD6cPn369AR5AAAAAECKScjp3VzsDgAAAAAAEyHIAwAAAABgIgR5AAAAAABMxOHnyE+ePFljx45VaGioSpYsqS+//FLlypWz2/ePP/7QkCFDtGfPHp0+fVqff/65evbs+UzzBAAAAADELzIyUg8ePHB0Gabl7OwsFxeXJLnFuUOD/MKFC9WrVy9NmzZN5cuX14QJExQUFKSjR48qW7ZsMfrfvXtX+fLlU7NmzfTBBx8kyTwBAAAAAHG7ffu2zp07J8MwHF2KqXl6eip79uxydXV9pvlYDAf+JMqXL69XXnlFkyZNkiRFRUUpd+7c6tatm/r37x/na/38/NSzZ88YI/KJmef9+/d1//596/Poy/6Hh4dz1XoAAAAAL7TIyEgdP35cnp6eypo1a5KMKL9oDMNQRESELl++rMjISBUsWFBOTrZnut+8eVPe3t4JyqEOG5GPiIjQnj17NGDAAGubk5OTatasqR07dqToPEeOHKmPPvooUcsEAAAAgOfZgwcPZBiGsmbNKg8PD0eXY1oeHh5KkyaNTp8+rYiICLm7uyd6Xg672N2VK1cUGRkpHx8fm3YfHx+Fhoam6DwHDBig8PBw6+Ps2bOJWj4AAAAAPK8YiX92T47CJ5bDL3aXGri5ucnNzc3RZQAAAAAAEC+HjchnyZJFzs7OunTpkk37pUuX5Ovrm2rmCQAAAABAauKwEXlXV1eVKVNGGzZsUMOGDSU9ujDdhg0b1LVr11QzTwAAAABATH79V6Xo8k6Nqpuiy7MntouupzSHHlrfq1cvtWvXTmXLllW5cuU0YcIE3blzRx06dJAktW3bVjlz5tTIkSMlPbqY3Z9//mn9//nz57V//36lTZtWBQoUSNA8AQAAAADPt/jO5x86dKiGDRv21PP9/fff5eXllciqko5Dg3yLFi10+fJlDRkyRKGhoSpVqpTWrFljvVjdmTNnbC4GcOHCBb388svW55999pk+++wzBQYGavPmzQmaJwAAAADg+Xbx4kXr/xcuXKghQ4bo6NGj1ra0adNa/28YhiIjI+XiEn88zpo1a9IWmkgOO0c+WteuXXX69Gndv39fO3fuVPny5a3TNm/erNmzZ1uf+/n5yTCMGI/oEJ+QeQIAAAAAnm++vr7Wh7e3tywWi/X5X3/9pXTp0mn16tUqU6aM3Nzc9Ouvv+rEiRNq0KCBfHx8lDZtWr3yyitav369zXz9/Pw0YcIE63OLxaKvv/5ajRo1kqenpwoWLKiVK1cm+/o5PMgDAAAAAJDS+vfvr1GjRunIkSMqUaKEbt++rTp16mjDhg3at2+fgoODVb9+fZ05cybO+Xz00Udq3ry5Dh48qDp16qh169a6du1astZOkAcAAAAAvHA+/vhj1apVS/nz51emTJlUsmRJde7cWcWLF1fBggU1fPhw5c+fP94R9vbt26tVq1YqUKCARowYodu3b2vXrl3JWjtBHgAAAADwwilbtqzN89u3b6tPnz4qUqSIMmTIoLRp0+rIkSPxjsiXKFHC+n8vLy+lT59eYWFhyVJzNIde7A4AAAAAAEd48urzffr00bp16/TZZ5+pQIEC8vDwUNOmTRURERHnfNKkSWPz3GKxKCoqKsnrfRxBHgAAAADwwtu2bZvat2+vRo0aSXo0Qn/q1CnHFhULgjwAAEhxfv1XxTn91Ki6KVQJAACPFCxYUMuWLVP9+vVlsVg0ePDgZB9ZTyyCPAAASH2GeSegT3jy1wEAiNXz9qXr+PHj1bFjR1WsWFFZsmTRhx9+qJs3bzq6LLsshmEYji4itbl586a8vb0VHh6u9OnTO7ocAACeO/GOyLu/Ge88AvzzxNvnULtDCa4JAGDfvXv3dPLkSfn7+8vd3d3R5ZhaXNvyaXIoV60HAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBEXBxdAAAAAADAhIZ5p/DywlN2eakYI/IAAAAAgOeKxWKJ8zFs2LBnmveKFSuSrNbEYEQeAAAAAPBcuXjxovX/Cxcu1JAhQ3T06FFrW9q0aR1RVpJhRB4AAAAA8Fzx9fW1Pry9vWWxWGzaFixYoCJFisjd3V2FCxfWlClTrK+NiIhQ165dlT17drm7uytv3rwaOXKkJMnPz0+S1KhRI1ksFuvzlMaIPAAAAADghfHdd99pyJAhmjRpkl5++WXt27dPnTp1kpeXl9q1a6cvvvhCK1eu1KJFi5QnTx6dPXtWZ8+elST9/vvvypYtm2bNmqXg4GA5Ozs7ZB0I8gAAAACAF8bQoUM1btw4NW7cWJLk7++vP//8U1999ZXatWunM2fOqGDBgqpcubIsFovy5s1rfW3WrFklSRkyZJCvr69D6pcI8gAAAACAF8SdO3d04sQJhYSEqFOnTtb2hw8fytv70VX427dvr1q1aqlQoUIKDg5WvXr1VLt2bUeVbBdBHgAAAADwQrh9+7Yk6b///a/Kly9vMy36MPnSpUvr5MmTWr16tdavX6/mzZurZs2aWrJkSYrXGxuCPAAAAADgheDj46McOXLon3/+UevWrWPtlz59erVo0UItWrRQ06ZNFRwcrGvXrilTpkxKkyaNIiMjU7DqmAjyAAAAAIAXxkcffaTu3bvL29tbwcHBun//vnbv3q3r16+rV69eGj9+vLJnz66XX35ZTk5OWrx4sXx9fZUhQwZJj65cv2HDBlWqVElubm7KmDFjiq8DQR4AAAAA8PSGhTu6gkR5++235enpqbFjx6pv377y8vJSQECAevbsKUlKly6dxowZo+PHj8vZ2VmvvPKKfvrpJzk5Pbp7+7hx49SrVy/997//Vc6cOXXq1KkUXweLYRhGii81lbt586a8vb0VHh6u9OnTO7ocAACeO379V8U5/ZT7m/HOI8A/T7x9DrU7lOCaAAD23bt3TydPnpS/v7/c3d0dXY6pxbUtnyaHOiVnkQAAAAAAIGkR5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIm4OLoAAAAAAID5BHwTkKLLO9TuUIour1q1aipVqpQmTJiQostNCEbkAQAAAADPlfr16ys4ONjutK1bt8pisejgwYMpXFXSIcgDAAAAAJ4rISEhWrdunc6dOxdj2qxZs1S2bFmVKFHCAZUlDYI8AAAAAOC5Uq9ePWXNmlWzZ8+2ab99+7YWL16shg0bqlWrVsqZM6c8PT0VEBCg+fPnO6bYRCDIAwAAAACeKy4uLmrbtq1mz54twzCs7YsXL1ZkZKTatGmjMmXKaNWqVTp8+LDeeecdvfXWW9q1a5cDq044gjwAAAAA4LnTsWNHnThxQlu2bLG2zZo1S02aNFHevHnVp08flSpVSvny5VO3bt0UHBysRYsWObDihCPIAwAAAACeO4ULF1bFihU1c+ZMSdLff/+trVu3KiQkRJGRkRo+fLgCAgKUKVMmpU2bVmvXrtWZM2ccXHXCEOQBAAAAAM+lkJAQLV26VLdu3dKsWbOUP39+BQYGauzYsZo4caI+/PBDbdq0Sfv371dQUJAiIiIcXXKCEOQBAAAAAM+l5s2by8nJSfPmzdOcOXPUsWNHWSwWbdu2TQ0aNFCbNm1UsmRJ5cuXT8eOHXN0uQlGkAcAAAAAPJfSpk2rFi1aaMCAAbp48aLat28vSSpYsKDWrVun7du368iRI+rcubMuXbrk2GKfgoujCwAAAAAAmM+hdoccXUKChISEaMaMGapTp45y5MghSRo0aJD++ecfBQUFydPTU++8844aNmyo8PBwB1ebMAR5AAAAAMBzq0KFCja3oJOkTJkyacWKFXG+bvPmzclX1DMiyJucX/9V8fY5NapuClQCAAAAAEgJBPkXwTDveLsE+OeJc7pZDpsBAAAAgOcdF7sDAAAAAMBECPIAAAAAAJgIQR4AAAAAEK8nLxiHp5dU25AgDwAAAACIlbOzsyQpIiLCwZWY3927dyVJadKkeab5cLE7AAAAAECsXFxc5OnpqcuXLytNmjRycmI8+GkZhqG7d+8qLCxMGTJksH45klgEeQAAAABArCwWi7Jnz66TJ0/q9OnTji7H1DJkyCBfX99nng9BHgAAAAAQJ1dXVxUsWJDD659BmjRpnnkkPhpBHgAAAAAQLycnJ7m7uzu6DIiL3QEAAAAAYCoEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiTg8yE+ePFl+fn5yd3dX+fLltWvXrjj7L168WIULF5a7u7sCAgL0008/2Uy/ffu2unbtqly5csnDw0NFixbVtGnTknMVAAAAAABIMQ4N8gsXLlSvXr00dOhQ7d27VyVLllRQUJDCwsLs9t++fbtatWqlkJAQ7du3Tw0bNlTDhg11+PBha59evXppzZo1+vbbb3XkyBH17NlTXbt21cqVK1NqtQAAAAAASDYODfLjx49Xp06d1KFDB+vIuaenp2bOnGm3/8SJExUcHKy+ffuqSJEiGj58uEqXLq1JkyZZ+2zfvl3t2rVTtWrV5Ofnp3feeUclS5aMd6QfAAAAAAAzcFiQj4iI0J49e1SzZs3/FePkpJo1a2rHjh12X7Njxw6b/pIUFBRk079ixYpauXKlzp8/L8MwtGnTJh07dky1a9eOtZb79+/r5s2bNg8AAAAAAFIjhwX5K1euKDIyUj4+PjbtPj4+Cg0Ntfua0NDQePt/+eWXKlq0qHLlyiVXV1cFBwdr8uTJqlq1aqy1jBw5Ut7e3tZH7ty5n2HNAAAAAABIPg6/2F1S+/LLL/Xbb79p5cqV2rNnj8aNG6cuXbpo/fr1sb5mwIABCg8Ptz7Onj2bghUDAAAAAJBwLo5acJYsWeTs7KxLly7ZtF+6dEm+vr52X+Pr6xtn/3///VcDBw7U8uXLVbduXUlSiRIltH//fn322WcxDsuP5ubmJjc3t2ddJQAAAAAAkp3DRuRdXV1VpkwZbdiwwdoWFRWlDRs2qEKFCnZfU6FCBZv+krRu3Tpr/wcPHujBgwdycrJdLWdnZ0VFRSXxGgAAAAAAkPIcNiIvPbpVXLt27VS2bFmVK1dOEyZM0J07d9ShQwdJUtu2bZUzZ06NHDlSktSjRw8FBgZq3Lhxqlu3rhYsWKDdu3dr+vTpkqT06dMrMDBQffv2lYeHh/LmzastW7Zozpw5Gj9+vMPWEwAAAACApOLQIN+iRQtdvnxZQ4YMUWhoqEqVKqU1a9ZYL2h35swZm9H1ihUrat68eRo0aJAGDhyoggULasWKFSpevLi1z4IFCzRgwAC1bt1a165dU968efXpp5/q3XffTfH1AwAAAAAgqVkMwzAcXURqc/PmTXl7eys8PFzp06d3dDlx8uu/Kt4+p9zfjLdPgH+eOKcfancowTUBABCf+P5+JcXfLom/XwAA83iaHPrcXbUeAAAAAIDnGUEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBEXRxcAAAAAIHUJ+CYgzumH2h1KoUoA2MOIPAAAAAAAJsKIPAAAAPCc8Ou/Kt4+p9zfjH9G/nmSoBoAyYUReQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATMTF0QUAAF4sfv1XxdvnlPub8c9oWHgSVAMAAGA+BHkAgCkFfBMQ5/RD7Q6lUCUAAAApi0PrAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABNxcXQBAAAAAAAkVsA3AfH2OdTuUApUknIYkQcAAAAAwEQYkQcAAAAAOIRf/1Xx9jnl/mbcHfzzJFE15sGIPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJuLi6AIAwNSGecczPTxl6gAAAMALgxF5AAAAAABMxOFBfvLkyfLz85O7u7vKly+vXbt2xdl/8eLFKly4sNzd3RUQEKCffvopRp8jR47ojTfekLe3t7y8vPTKK6/ozJkzybUKAAAAAACkGIceWr9w4UL16tVL06ZNU/ny5TVhwgQFBQXp6NGjypYtW4z+27dvV6tWrTRy5EjVq1dP8+bNU8OGDbV3714VL15cknTixAlVrlxZISEh+uijj5Q+fXr98ccfcnd3T+nVQ2LEd5iyxKHKSDF+/VfF2+cUby0AAABIYQ4N8uPHj1enTp3UoUMHSdK0adO0atUqzZw5U/3794/Rf+LEiQoODlbfvn0lScOHD9e6des0adIkTZs2TZL0n//8R3Xq1NGYMWOsr8ufP38KrA3ik1ShKOCbgDinH2p3KKElAQAAAIDpOCzIR0REaM+ePRowYIC1zcnJSTVr1tSOHTvsvmbHjh3q1auXTVtQUJBWrFghSYqKitKqVavUr18/BQUFad++ffL399eAAQPUsGHDWGu5f/++7t+/b31+8+bNxK8YADwmvi+eJL58AgAAwNNx2DnyV65cUWRkpHx8fGzafXx8FBoaavc1oaGhcfYPCwvT7du3NWrUKAUHB+vnn39Wo0aN1LhxY23ZsiXWWkaOHClvb2/rI3fu3M+4dgAAAAAAJA+HX+wuKUVFRUmSGjRooA8++EClSpVS//79Va9ePeuh9/YMGDBA4eHh1sfZs2dTqmQAAAAAAJ6Kww6tz5Ili5ydnXXp0iWb9kuXLsnX19fua3x9fePsnyVLFrm4uKho0aI2fYoUKaJff/011lrc3Nzk5uaWmNUAAAAAACBFOWxE3tXVVWXKlNGGDRusbVFRUdqwYYMqVKhg9zUVKlSw6S9J69ats/Z3dXXVK6+8oqNHj9r0OXbsmPLmzZvEawAAAAAAQMpz6FXre/XqpXbt2qls2bIqV66cJkyYoDt37livYt+2bVvlzJlTI0eOlCT16NFDgYGBGjdunOrWrasFCxZo9+7dmj59unWeffv2VYsWLVS1alVVr15da9as0Q8//KDNmzc7YhUBAAAAAEhSDg3yLVq00OXLlzVkyBCFhoaqVKlSWrNmjfWCdmfOnJGT0/8OGqhYsaLmzZunQYMGaeDAgSpYsKBWrFhhvYe8JDVq1EjTpk3TyJEj1b17dxUqVEhLly5V5cqVU3z9AAAAAABIag4N8pLUtWtXde3a1e40e6PozZo1U7NmzeKcZ8eOHdWxY8ekKA8AAAAAgFTlubpqPQAAAAAAz7tEBfl//vknqesAAAAAAAAJkKggX6BAAVWvXl3ffvut7t27l9Q1AQAAAACAWCQqyO/du1clSpRQr1695Ovrq86dO2vXrl1JXRsAAAAAAHhCooJ8qVKlNHHiRF24cEEzZ87UxYsXVblyZRUvXlzjx4/X5cuXk7pOAAAAAACgZ7zYnYuLixo3bqzFixdr9OjR+vvvv9WnTx/lzp1bbdu21cWLF5OqTgAAAAAAoGcM8rt379b777+v7Nmza/z48erTp49OnDihdevW6cKFC2rQoEFS1QkAAAAAAJTI+8iPHz9es2bN0tGjR1WnTh3NmTNHderUkZPTo+8F/P39NXv2bPn5+SVlrQAAAAAAvPASFeSnTp2qjh07qn379sqePbvdPtmyZdOMGTOeqTgAAAAAiEvANwFxTj/U7lAKVQKknEQF+ePHj8fbx9XVVe3atUvM7AEAAAA85/z6r4q3zyn3N+OfkX+eJKgGMJdEnSM/a9YsLV68OEb74sWL9c033zxzUQAAAAAAwL5EBfmRI0cqS5YsMdqzZcumESNGPHNRAAAAAADAvkQF+TNnzsjf3z9Ge968eXXmzJlnLgoAAAAAANiXqCCfLVs2HTx4MEb7gQMHlDlz5mcuCgAAAAAA2JeoIN+qVSt1795dmzZtUmRkpCIjI7Vx40b16NFDLVu2TOoaAQAAAADA/0vUVeuHDx+uU6dOqUaNGnJxeTSLqKgotW3blnPkAQBwkPiuAH1qVN0UqgQAACSnRAV5V1dXLVy4UMOHD9eBAwfk4eGhgIAA5c2bN6nrAwAAKYj7MQMAkPolKshHe+mll/TSSy8lVS0AACA5DfOOvw/3YwYAINVLVJCPjIzU7NmztWHDBoWFhSkqKspm+saNG5OkOABJJCEf3oeFJ38dAAAAAJ5ZooJ8jx49NHv2bNWtW1fFixeXxWJJ6roAAAAAAIAdiQryCxYs0KJFi1SnTp2krgdAIsR7gSv3FCoEAAAAQLJL9MXuChQokNS1AHCg+C5wJXGRKwAAACA1SNR95Hv37q2JEyfKMIykrgcAAAAAAMQhUSPyv/76qzZt2qTVq1erWLFiSpMmjc30ZcuWJUlxAAAAAADAVqKCfIYMGdSoUaOkrgUAAAAAAMQjUUF+1qxZSV0HAAAAAABIgESdIy9JDx8+1Pr16/XVV1/p1q1bkqQLFy7o9u3bSVYcAAAAAACwlagR+dOnTys4OFhnzpzR/fv3VatWLaVLl06jR4/W/fv3NW3atKSuEwAAAAAAKJEj8j169FDZsmV1/fp1eXh4WNsbNWqkDRs2JFlxAAAAAADAVqJG5Ldu3art27fL1dXVpt3Pz0/nz59PksIAAAAAAEBMiRqRj4qKUmRkZIz2c+fOKV26dM9cFAAAAAAAsC9RQb527dqaMGGC9bnFYtHt27c1dOhQ1alTJ6lqAwAAAAAAT0jUofXjxo1TUFCQihYtqnv37unNN9/U8ePHlSVLFs2fPz+pawQAAAAAAP8vUUE+V65cOnDggBYsWKCDBw/q9u3bCgkJUevWrW0ufgcAAABACvgmIN4+h9odSoFKADwPEhXkJcnFxUVt2rRJyloAAAAAAEA8EhXk58yZE+f0tm3bJqoYAAAAwJSGecc93T9PytQB4IWQqCDfo0cPm+cPHjzQ3bt35erqKk9PT4I8AAB4ccQX4CRpWHjy14Fk49d/Vbx9TrmnQCEA8P8SFeSvX78eo+348eN677331Ldv32cuCgAAILWIL8QlJMBxfjQAICkl6vZz9hQsWFCjRo2KMVoPAAAAAACSTpIFeenRBfAuXLiQlLMEAAAAAACPSdSh9StXrrR5bhiGLl68qEmTJqlSpUpJUhgAAAAAAIgpUUG+YcOGNs8tFouyZs2q1157TePGjUuKugAAAAAAgB2JCvJRUVFJXQcAAAAAAEiAJD1HHgAAAAAAJK9Ejcj36tUrwX3Hjx+fmEUAAAAAAAA7EhXk9+3bp3379unBgwcqVKiQJOnYsWNydnZW6dKlrf0sFkvSVAkAAAAAACQlMsjXr19f6dKl0zfffKOMGTNKkq5fv64OHTqoSpUq6t27d5IWCQAAAAAAHknUOfLjxo3TyJEjrSFekjJmzKhPPvmEq9YDAAAAAJCMEhXkb968qcuXL8dov3z5sm7duvXMRQEAAAAAAPsSFeQbNWqkDh06aNmyZTp37pzOnTunpUuXKiQkRI0bN07qGgEAAAAAwP9L1Dny06ZNU58+ffTmm2/qwYMHj2bk4qKQkBCNHTs2SQsEAAAAAAD/k6gg7+npqSlTpmjs2LE6ceKEJCl//vzy8vJK0uIAAAAAAICtRB1aH+3ixYu6ePGiChYsKC8vLxmGkVR1AQAAAAAAOxIV5K9evaoaNWropZdeUp06dXTx4kVJUkhICLeeAwAAAAAgGSUqyH/wwQdKkyaNzpw5I09PT2t7ixYttGbNmiQrDgAAAAAA2ErUOfI///yz1q5dq1y5ctm0FyxYUKdPn06SwgAAAAAAQEyJGpG/c+eOzUh8tGvXrsnNze2ZiwIAAAAAAPYlKshXqVJFc+bMsT63WCyKiorSmDFjVL169SQrDgAAAAAA2ErUofVjxoxRjRo1tHv3bkVERKhfv376448/dO3aNW3bti2pawQAAAAAAP8vUSPyxYsX17Fjx1S5cmU1aNBAd+7cUePGjbVv3z7lz58/qWsEAAAAAAD/76lH5B88eKDg4GBNmzZN//nPf5KjJgAAAAAAEIunHpFPkyaNDh48mBy1AAAAAACAeCTq0Po2bdpoxowZSV0LAAAAAACIR6Iudvfw4UPNnDlT69evV5kyZeTl5WUzffz48UlSHAAAAAAAsPVUQf6ff/6Rn5+fDh8+rNKlS0uSjh07ZtPHYrEkXXUAAAAAAMDGUwX5ggUL6uLFi9q0aZMkqUWLFvriiy/k4+OTLMUBAAAAAABbT3WOvGEYNs9Xr16tO3fuJGlBAAAAAAAgdom62F20J4M9AAAAAABIXk8V5C0WS4xz4DknHgAAAACAlPNU58gbhqH27dvLzc1NknTv3j29++67Ma5av2zZsqSrEAAAAAAAWD1VkG/Xrp3N8zZt2iRpMQAAAAAAIG5PFeRnzZqVXHUAAAAAAIAEeKaL3QEAAAAAgJRFkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADCRVBHkJ0+eLD8/P7m7u6t8+fLatWtXnP0XL16swoULy93dXQEBAfrpp59i7fvuu+/KYrFowoQJSVw1AAAAAAApz+FBfuHCherVq5eGDh2qvXv3qmTJkgoKClJYWJjd/tu3b1erVq0UEhKiffv2qWHDhmrYsKEOHz4co+/y5cv122+/KUeOHMm9GgAAAAAApAiHB/nx48erU6dO6tChg4oWLapp06bJ09NTM2fOtNt/4sSJCg4OVt++fVWkSBENHz5cpUuX1qRJk2z6nT9/Xt26ddN3332nNGnSpMSqAAAAAACQ7FwcufCIiAjt2bNHAwYMsLY5OTmpZs2a2rFjh93X7NixQ7169bJpCwoK0ooVK6zPo6Ki9NZbb6lv374qVqxYvHXcv39f9+/ftz6/efPmU64Jnkd+/VfFOf3UqLrxziPgm4A4px9qd+ipagIAAAAAhwb5K1euKDIyUj4+PjbtPj4++uuvv+y+JjQ01G7/0NBQ6/PRo0fLxcVF3bt3T1AdI0eO1EcfffSU1eOFN8w7/j7+eZK/DgAAAAAvFIcfWp/U9uzZo4kTJ2r27NmyWCwJes2AAQMUHh5ufZw9ezaZqwQAAAAAIHEcOiKfJUsWOTs769KlSzbtly5dkq+vr93X+Pr6xtl/69atCgsLU548/xsJjYyMVO/evTVhwgSdOnUqxjzd3Nzk5ub2jGsDAACAhIrvFDYp/tPY4juFTeI0NgDPJ4eOyLu6uqpMmTLasGGDtS0qKkobNmxQhQoV7L6mQoUKNv0lad26ddb+b731lg4ePKj9+/dbHzly5FDfvn21du3a5FsZAAAAAABSgENH5CWpV69eateuncqWLaty5cppwoQJunPnjjp06CBJatu2rXLmzKmRI0dKknr06KHAwECNGzdOdevW1YIFC7R7925Nnz5dkpQ5c2ZlzpzZZhlp0qSRr6+vChUqlLIrBwAAgMSL73o0XIsGwAvK4UG+RYsWunz5soYMGaLQ0FCVKlVKa9assV7Q7syZM3Jy+t+BAxUrVtS8efM0aNAgDRw4UAULFtSKFStUvHhxR60CAAAAAAApxuFBXpK6du2qrl272p22efPmGG3NmjVTs2bNEjx/e+fFAwAAAABgRs/dVesBAAAAAHiepYoReQBACojvXNNh4SlTBwAAAJ4JI/IAAAAAAJgII/IA8BxI0P2Y3eOezv2YAQAAzIEReQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwERdHFwAAAAAASF0CvgmIt8+hdodSoBLYQ5AHAAAAgOeIX/9VcU4/5f5m/DPxz5NE1SA5cGg9AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAibg4ugAAAACkHgHfBMTb51C7QylQCQAgNgR5AABi4dd/Vbx9Trm/GW+fAP88cU4nFCGpJGifHVU3BSoBACQngjwAAMCLZJh33NPj+eIJAOB4nCMPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwERSRZCfPHmy/Pz85O7urvLly2vXrl1x9l+8eLEKFy4sd3d3BQQE6KeffrJOe/DggT788EMFBATIy8tLOXLkUNu2bXXhwoXkXg0AAAAAAJKdw4P8woUL1atXLw0dOlR79+5VyZIlFRQUpLCwMLv9t2/frlatWikkJET79u1Tw4YN1bBhQx0+fFiSdPfuXe3du1eDBw/W3r17tWzZMh09elRvvPFGSq4WAAAAAADJwuFBfvz48erUqZM6dOigokWLatq0afL09NTMmTPt9p84caKCg4PVt29fFSlSRMOHD1fp0qU1adIkSZK3t7fWrVun5s2bq1ChQnr11Vc1adIk7dmzR2fOnEnJVQMAAAAAIMk5NMhHRERoz549qlmzprXNyclJNWvW1I4dO+y+ZseOHTb9JSkoKCjW/pIUHh4ui8WiDBky2J1+//593bx50+YBAAAAAEBq5NAgf+XKFUVGRsrHx8em3cfHR6GhoXZfExoa+lT97927pw8//FCtWrVS+vTp7fYZOXKkvL29rY/cuXMnYm0AAAAAAEh+Dj+0Pjk9ePBAzZs3l2EYmjp1aqz9BgwYoPDwcOvj7NmzKVglAAAAAAAJ5+LIhWfJkkXOzs66dOmSTfulS5fk6+tr9zW+vr4J6h8d4k+fPq2NGzfGOhovSW5ubnJzc0vkWgAAAAAAkHIcOiLv6uqqMmXKaMOGDda2qKgobdiwQRUqVLD7mgoVKtj0l6R169bZ9I8O8cePH9f69euVOXPm5FkBAAAAAABSmENH5CWpV69eateuncqWLaty5cppwoQJunPnjjp06CBJatu2rXLmzKmRI0dKknr06KHAwECNGzdOdevW1YIFC7R7925Nnz5d0qMQ37RpU+3du1c//vijIiMjrefPZ8qUSa6uro5ZUQAAAAAAkoDDg3yLFi10+fJlDRkyRKGhoSpVqpTWrFljvaDdmTNn5OT0vwMHKlasqHnz5mnQoEEaOHCgChYsqBUrVqh48eKSpPPnz2vlypWSpFKlStksa9OmTapWrVqKrBcAAAAAAMnB4UFekrp27aquXbvanbZ58+YYbc2aNVOzZs3s9vfz85NhGElZHgAAAAAAqcZzfdV6AAAAAACeNwR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMxMXRBQB4sfj1XxXn9FOj6qZQJQAAAIA5EeQBmE7ANwHx9jnU7lAKVAIAAACkPII8gNRlmHf8ffzzJH8dAAAAQCrFOfIAAAAAAJgII/IAAAAAEI94r/Pj/ma88wiI56hCTg1EQjEiDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiaSKID958mT5+fnJ3d1d5cuX165du+Lsv3jxYhUuXFju7u4KCAjQTz/9ZDPdMAwNGTJE2bNnl4eHh2rWrKnjx48n5yoAAAAAAJAiHB7kFy5cqF69emno0KHau3evSpYsqaCgIIWFhdntv337drVq1UohISHat2+fGjZsqIYNG+rw4cPWPmPGjNEXX3yhadOmaefOnfLy8lJQUJDu3buXUqsFAAAAAECycHiQHz9+vDp16qQOHTqoaNGimjZtmjw9PTVz5ky7/SdOnKjg4GD17dtXRYoU0fDhw1W6dGlNmjRJ0qPR+AkTJmjQoEFq0KCBSpQooTlz5ujChQtasWJFCq4ZAAAAAABJz8WRC4+IiNCePXs0YMAAa5uTk5Nq1qypHTt22H3Njh071KtXL5u2oKAga0g/efKkQkNDVbNmTet0b29vlS9fXjt27FDLli1jzPP+/fu6f/++9Xl4eLgk6ebNm4let5QSdf9uvH1uWox4+0T+Gxn3PJJgW5ipVin+el+0WqWU2Q/MVKsUf73UGpOZ3gvMVKvE+5bd+fC+FXM5z9F7AbXGZKb3LTPVKvEea3c+qWQ/SKlak1t0jYYR/89HhgOdP3/ekGRs377dpr1v375GuXLl7L4mTZo0xrx582zaJk+ebGTLls0wDMPYtm2bIcm4cOGCTZ9mzZoZzZs3tzvPoUOHGpJ48ODBgwcPHjx48ODBgwcPhz7Onj0bb5Z26Ih8ajFgwACbUf6oqChdu3ZNmTNnlsVicWBlSe/mzZvKnTu3zp49q/Tp0zu6nDiZqVbJXPVSa/Kg1uRBrcnDTLVK5qqXWpMHtSYPak0+ZqqXWlMHwzB069Yt5ciRI96+Dg3yWbJkkbOzsy5dumTTfunSJfn6+tp9ja+vb5z9o/+9dOmSsmfPbtOnVKlSdufp5uYmNzc3m7YMGTI8zaqYTvr06U2z45upVslc9VJr8qDW5EGtycNMtUrmqpdakwe1Jg9qTT5mqpdaHc/b2ztB/Rx6sTtXV1eVKVNGGzZssLZFRUVpw4YNqlChgt3XVKhQwaa/JK1bt87a39/fX76+vjZ9bt68qZ07d8Y6TwAAAAAAzMLhh9b36tVL7dq1U9myZVWuXDlNmDBBd+7cUYcOHSRJbdu2Vc6cOTVy5EhJUo8ePRQYGKhx48apbt26WrBggXbv3q3p06dLkiwWi3r27KlPPvlEBQsWlL+/vwYPHqwcOXKoYcOGjlpNAAAAAACShMODfIsWLXT58mUNGTJEoaGhKlWqlNasWSMfHx9J0pkzZ+Tk9L8DBypWrKh58+Zp0KBBGjhwoAoWLKgVK1aoePHi1j79+vXTnTt39M477+jGjRuqXLmy1qxZI3d39xRfv9TGzc1NQ4cOjXEqQWpkplolc9VLrcmDWpMHtSYPM9Uqmateak0e1Jo8qDX5mKleajUfi2Ek5Nr2AAAAAAAgNXDoOfIAAAAAAODpEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8i+YHTt2yNnZWXXr1nV0KbFq3769LBaLLBaL0qRJI39/f/Xr10/37t1zdGkxRNc6atQom/YVK1bIYrE4qCr7ntyuPj4+qlWrlmbOnKmoqChHl2dXaGiounXrpnz58snNzU25c+dW/fr1tWHDBkeXZuPxbfv4Izg42NGlmV5oaKh69OihAgUKyN3dXT4+PqpUqZKmTp2qu3fvOro8q/bt29u9xenmzZtlsVh048aNFK8pIWKrOzUxQ43R7NW6ZMkSubu7a9y4cY4p6gnR71fvvvtujGldunSRxWJR+/btU76wOMT2Hvv33387urQYHq/V1dVVBQoU0Mcff6yHDx86ujS7Ll++rPfee0958uSRm5ubfH19FRQUpG3btjm6NCt7P/vHH8OGDXN0iZo2bZrSpUtn83O+ffu20qRJo2rVqtn0jf67cOLEiRSu0pZhGKpZs6aCgoJiTJsyZYoyZMigc+fOOaCymOrXrx/rZ6qtW7fKYrHo4MGDKVyV4xHkXzAzZsxQt27d9Msvv+jChQuOLidWwcHBunjxov755x99/vnn+uqrrzR06FBHl2WXu7u7Ro8erevXrzu6lHhFb9dTp05p9erVql69unr06KF69eqlug8Zp06dUpkyZbRx40aNHTtWhw4d0po1a1S9enV16dLF0eXFEL1tH3/Mnz/f0WXFcPbsWXXs2FE5cuSQq6ur8ubNqx49eujq1auOLi2Gf/75Ry+//LJ+/vlnjRgxQvv27dOOHTvUr18//fjjj1q/fr2jSwTi9PXXX6t169aaOnWqevfu7ehyrHLnzq0FCxbo33//tbbdu3dP8+bNU548eRxYWezsvcf6+/s7uiy7oms9fvy4evfurWHDhmns2LGOLsuuJk2aaN++ffrmm2907NgxrVy5UtWqVUtVfxMe/5lPmDBB6dOnt2nr06ePo0tU9erVdfv2be3evdvatnXrVvn6+mrnzp02g1GbNm1Snjx5lD9/fkeUamWxWDRr1izt3LlTX331lbX95MmT6tevn7788kvlypXLgRX+T0hIiNatW2f3i4VZs2apbNmyKlGihAMqcyyH30ceKef27dtauHChdu/erdDQUM2ePVsDBw50dFl2RX8rLD36wFGzZk2tW7dOo0ePdnBlMdWsWVN///23Ro4cqTFjxji6nDg9vl1z5syp0qVL69VXX1WNGjU0e/Zsvf322w6u8H/ef/99WSwW7dq1S15eXtb2YsWKqWPHjg6szL7Ht21q9c8//6hChQp66aWXNH/+fPn7++uPP/5Q3759tXr1av3222/KlCmTo8u0ev/99+Xi4qLdu3fb7AP58uVTgwYNxN1TkZqNGTNGQ4cO1YIFC9SoUSNHl2OjdOnSOnHihJYtW6bWrVtLkpYtW6Y8efKk2nBshvfYaI/X+t5772n58uVauXKlBgwY4ODKbN24cUNbt27V5s2bFRgYKEnKmzevypUr5+DKbD3+c/f29pbFYkl1+0KhQoWUPXt2bd68Wa+++qqkRyPvDRo00MaNG/Xbb79ZR+Y3b96s6tWrO7Da/8mdO7cmTpyorl27qnbt2vLz81NISIhq166tt956y9HlWdWrV09Zs2bV7NmzNWjQIGv77du3tXjx4lT7RVlyY0T+BbJo0SIVLlxYhQoVUps2bTRz5kxTfBA+fPiwtm/fLldXV0eXYpezs7NGjBihL7/8MtUcgvQ0XnvtNZUsWVLLli1zdClW165d05o1a9SlSxebABctQ4YMKV/Uc6BLly5ydXXVzz//rMDAQOXJk0evv/661q9fr/Pnz+s///mPo0u0unr1qn7++edY9wFJqe70FSDahx9+qOHDh+vHH39MdSE+WseOHTVr1izr85kzZ6pDhw4OrOj55eHhoYiICEeXEUPatGmVNm1arVixQvfv33d0OaZXvXp1bdq0yfp806ZNqlatmgIDA63t//77r3bu3JlqgrwktWvXTjVq1FDHjh01adIkHT582GaEPjVwcXFR27ZtNXv2bJvssnjxYkVGRqpVq1YOrM5xCPIvkBkzZqhNmzaSHh32FR4eri1btji4Kvt+/PFHpU2bVu7u7goICFBYWJj69u3r6LJi1ahRI5UqVSrVHv4fn8KFC+vUqVOOLsPq77//lmEYKly4sKNLSbDoffbxx4gRIxxdltW1a9e0du1avf/++/Lw8LCZ5uvrq9atW2vhwoWp5su96H2gUKFCNu1ZsmSxbt8PP/zQQdXZZ28feP311x1dFlLY6tWrNWbMGH3//feqUaOGo8uJVZs2bfTrr7/q9OnTOn36tLZt22b9jJAaPfn71axZM0eXFC/DMLR+/XqtXbtWr732mqPLicHFxUWzZ8/WN998owwZMqhSpUoaOHDgC3mucVKoXr26tm3bpocPH+rWrVvat2+fAgMDVbVqVW3evFnSo2tV3b9/P1UFeUmaPn26Dh8+rJ49e2r69OnKmjWro0uKoWPHjjpx4oRNdpk1a5aaNGkib29vB1bmOBxa/4I4evSodu3apeXLl0t69ObdokULzZgxI8ZFOFKD6tWra+rUqbpz544+//xzubi4qEmTJo4uK06jR4/Wa6+9lirO1XpahmGkqtHN1BImn0b0Pvu41HSY+vHjx2UYhooUKWJ3epEiRXT9+nVdvnxZ2bJlS+HqEm7Xrl2KiopS69atU90Ikr19YOfOnak6HCHplShRQleuXNHQoUNVrlw5pU2b1tEl2ZU1a1bVrVvXOsJVt25dZcmSxdFlxerJ36/YjtRJDaK/dHjw4IGioqL05ptvpooLstnTpEkT1a1bV1u3btVvv/1m/SLq66+/TnUXPUztqlWrpjt37uj333/X9evX9dJLLylr1qwKDAxUhw4ddO/ePW3evFn58uVLddeiyJYtmzp37qwVK1ak2ouLFi5cWBUrVtTMmTNVrVo1/f3339q6das+/vhjR5fmMAT5F8SMGTP08OFD5ciRw9pmGIbc3Nw0adKkVPdNlpeXlwoUKCDp0eF+JUuW1IwZMxQSEuLgymJXtWpVBQUFacCAAab743fkyJFUdV5kwYIFZbFY9Ndffzm6lAR7fJ9NzeL7kiS1nMJSoEABWSwWHT161KY9X758khTjqILUwN4+YMbTbfBscubMqSVLlqh69eoKDg7W6tWrlS5dOkeXZVfHjh3VtWtXSdLkyZMdXE3czPIeK/3vSwdXV1flyJFDLi6p++O2u7u7atWqpVq1amnw4MF6++23NXToUNN9lnG0AgUKKFeuXNq0aZOuX79uve5Ajhw5lDt3bm3fvl2bNm1KlUdnSI8G+VL7vhoSEqJu3bpp8uTJmjVrlvLnz2/dzi8iDq1/ATx8+FBz5szRuHHjtH//fuvjwIEDypEjR6q8svbjnJycNHDgQA0aNMjmCrup0ahRo/TDDz9ox44dji4lwTZu3KhDhw6lqiMeMmXKpKCgIE2ePFl37tyJMT213sorNYsOxkeOHLE7/ciRI8qaNWuquf5A5syZVatWLU2aNMnuPgCkZnnz5tWWLVsUGhqq4OBg3bp1y9El2RUcHKyIiAg9ePDA7i2okDjRXzrkyZMn1Qcje4oWLcr7biJVr15dmzdv1ubNm22OeK1atapWr16tXbt2pbrD6s2kefPmcnJy0rx58zRnzhx17NgxVR1RmtII8i+AH3/8UdevX1dISIiKFy9u82jSpIlmzJjh6BLj1axZMzk7O6f6EYOAgAC1bt1aX3zxhaNLsev+/fsKDQ3V+fPntXfvXo0YMUINGjRQvXr11LZtW0eXZ2Py5MmKjIxUuXLltHTpUh0/flxHjhzRF198oQoVKji6vBiit+3jjytXrji6LKvoYDxlypQYX4iFhobqu+++S3WjL1OmTNHDhw9VtmxZLVy4UEeOHNHRo0f17bff6q+//pKzs7OjS0QKCQ8Pt/kiev/+/Tp79qyjy4pT7ty5tXnzZoWFhSkoKEg3b950dEkxODs768iRI/rzzz/5fXoBXb16Va+99pq+/fZbHTx4UCdPntTixYs1ZswYNWjQwNHlmVL16tX166+/av/+/TYjxYGBgfrqq68UERFBkH8GadOmVYsWLTRgwABdvHgx1X1uSWkE+RfAjBkzVLNmTbuHzzdp0kS7d+9O9Rc2cXFxUdeuXTVmzJhU/y3xxx9/rKioKEeXYdeaNWuUPXt2+fn5KTg4WJs2bdIXX3yh77//PtV9iMuXL5/27t2r6tWrq3fv3ipevLhq1aqlDRs2xDgPOTWI3raPPypXruzosmxMmjRJ9+/fV1BQkH755RedPXtWa9asUa1atfTSSy9pyJAhji7RRv78+bVv3z7VrFlTAwYMUMmSJVW2bFl9+eWX6tOnj4YPH+7oEpFCNm/erJdfftnm8dFHHzm6rHjlypVLmzdv1pUrV1JtmE+fPr3Sp0/v6DLgAGnTplX58uX1+eefq2rVqipevLgGDx6sTp06adKkSY4uz5SqV6+uf//9VwUKFJCPj4+1PTAwULdu3bLepg6JFxISouvXrysoKMjmlOEXkcUw41WlAACJcurUKQ0bNkxr1qxRWFiYDMNQ48aNNXfuXHl6ejq6PAAAACQAQR4AXmBDhw7V+PHjtW7dOr366quOLgcAAAAJQJAHgBfcrFmzFB4eru7du8vJiTOuAAAAUjuCPAAAAAAAJsLQCwAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAABBDtWrV1LNnT0eXEa/Zs2crQ4YMcfYZNmyYSpUqlSL1AACQEgjyAACY3I4dO+Ts7Ky6desm2TyXLVum4cOHJ9n84hMUFCRnZ2f9/vvvT/W6Fi1a6NixY8lUFQAAqRNBHgAAk5sxY4a6deumX375RRcuXEiSeWbKlEnp0qVLknnF58yZM9q+fbu6du2qmTNnPtVrPTw8lC1btmSqDACA1IkgDwCAid2+fVsLFy7Ue++9p7p162r27Nk20zdv3iyLxaK1a9fq5ZdfloeHh1577TWFhYVp9erVKlKkiNKnT68333xTd+/etb7uyUPr/fz8NGLECHXs2FHp0qVTnjx5NH36dJtlHTp0SK+99po8PDyUOXNmvfPOO7p9+3a86zBr1izVq1dP7733nubPn69///3XZvqNGzfUuXNn+fj4yN3dXcWLF9ePP/4oyf6h9aNGjZKPj4/SpUunkJAQ3bt3LwFbEgAA8yDIAwBgYosWLVLhwoVVqFAhtWnTRjNnzpRhGDH6DRs2TJMmTdL27dt19uxZNW/eXBMmTNC8efO0atUq/fzzz/ryyy/jXNa4ceNUtmxZ7du3T++//77ee+89HT16VJJ0584dBQUFKWPGjPr999+1ePFirV+/Xl27do1znoZhaNasWWrTpo0KFy6sAgUKaMmSJdbpUVFRev3117Vt2zZ9++23+vPPPzVq1Cg5OzvHuj2GDRumESNGaPfu3cqePbumTJkS32YEAMBUCPIAAJjYjBkz1KZNG0lScHCwwsPDtWXLlhj9PvnkE1WqVEkvv/yyQkJCtGXLFk2dOlUvv/yyqlSpoqZNm2rTpk1xLqtOnTp6//33VaBAAX344YfKkiWL9TXz5s3TvXv3NGfOHBUvXlyvvfaaJk2apLlz5+rSpUuxznP9+vW6e/eugoKCJElt2rTRjBkzbKbv2rVLy5YtU61atZQvXz7Vq1dPr7/+ut35TZgwQSEhIQoJCVGhQoX0ySefqGjRonFvRAAATIYgDwCASR09elS7du1Sq1atJEkuLi5q0aKFTRCOVqJECev/fXx85OnpqXz58tm0hYWFxbm8x+dhsVjk6+trfc2RI0dUsmRJeXl5WftUqlRJUVFR1lF7e2bOnKkWLVrIxcVFktSqVStt27ZNJ06ckCTt379fuXLl0ksvvRRnbdGOHDmi8uXL27RVqFAhQa8FAMAsCPIAAJjUjBkz9PDhQ+XIkUMuLi5ycXHR1KlTtXTpUoWHh9v0TZMmjfX/FovF5nl0W1RUVJzLS8xr4nLt2jUtX75cU6ZMsdafM2dOPXz40HrROw8Pj0TPHwCA5xVBHgAAE3r48KHmzJmjcePGaf/+/dbHgQMHlCNHDs2fPz9F6ylSpIgOHDigO3fuWNu2bdsmJycnFSpUyO5rvvvuO+XKlUsHDhywWYdx48Zp9uzZioyMVIkSJXTu3LkE32KuSJEi2rlzp03bb7/9lvgVAwAgFSLIAwBgQj/++KOuX7+ukJAQFS9e3ObRpEkTu4fXJ6fWrVvL3d1d7dq10+HDh7Vp0yZ169ZNb731lnx8fOy+ZsaMGWratGmM+kNCQnTlyhWtWbNGgYGBqlq1qpo0aaJ169bp5MmTWr16tdasWWN3nj169NDMmTM1a9YsHTt2TEOHDtUff/yRnKsOAECKI8gDAGBCM2bMUM2aNeXt7R1jWpMmTbR7924dPHgwxerx9PTU2rVrde3aNb3yyitq2rSpatSooUmTJtntv2fPHh04cEBNmjSJMc3b21s1atSwfhmxdOlSvfLKK2rVqpWKFi2qfv36KTIy0u58W7RoocGDB6tfv34qU6aMTp8+rffeey/pVhQAgFTAYti7Rw0AAAAAAEiVGJEHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABP5P0lnIpmHJQCXAAAAAElFTkSuQmCC",
|
| 64 |
+
"text/plain": [
|
| 65 |
+
"<Figure size 1200x600 with 1 Axes>"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"metadata": {},
|
| 69 |
+
"output_type": "display_data"
|
| 70 |
+
}
|
| 71 |
+
],
|
| 72 |
+
"source": [
|
| 73 |
+
"amino_acid_frequencies = {}\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"datasets = {'Train': train, 'Test': test, 'Val': val}\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"for name, df in datasets.items():\n",
|
| 79 |
+
" # Count total occurrences of each amino acid in each dataset\n",
|
| 80 |
+
" amino_acid_frequencies[name] = df[valid_residues].sum() / df['Length'].sum()\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"# Convert frequencies to a dataframe for easier manipulation\n",
|
| 83 |
+
"freq_df = pd.DataFrame(amino_acid_frequencies)\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"# Plot the histogram comparing amino acid frequencies\n",
|
| 86 |
+
"plt.figure(figsize=(12, 6))\n",
|
| 87 |
+
"\n",
|
| 88 |
+
"# Set bar width\n",
|
| 89 |
+
"bar_width = 0.2\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"# Generate positions for the bars\n",
|
| 92 |
+
"amino_acids = list(valid_residues)\n",
|
| 93 |
+
"x = np.arange(len(amino_acids)) # positions for the amino acids\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"# Plot the bars for each dataset with an offset\n",
|
| 96 |
+
"plt.bar(x - bar_width, freq_df['Train'], width=bar_width, label='Train', align='center')\n",
|
| 97 |
+
"plt.bar(x, freq_df['Test'], width=bar_width, label='Test', align='center')\n",
|
| 98 |
+
"plt.bar(x + bar_width, freq_df['Val'], width=bar_width, label='Val', align='center')\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"plt.xticks(x, amino_acids)\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"plt.title('UniRef50 (100k Sequences) - Train, Test, and Val Residue Frequencies')\n",
|
| 103 |
+
"plt.xlabel('Amino Acid')\n",
|
| 104 |
+
"plt.ylabel('Frequency')\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"plt.legend()\n",
|
| 107 |
+
"plt.show()"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "code",
|
| 112 |
+
"execution_count": 30,
|
| 113 |
+
"metadata": {},
|
| 114 |
+
"outputs": [
|
| 115 |
+
{
|
| 116 |
+
"data": {
|
| 117 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAIjCAYAAABViau2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABn/0lEQVR4nO3deZyN9f//8ecZs+/WWawTyk7WxposI5K1qCkmUyjKEqIkUoQSJaRECxUl+RASsmfLTqgmsowRZsYQw8z790ffOT/HDGbGmDMuj/vtdm4357re53q/rjPXHPM87/d1XTZjjBEAAAAAALAsF2cXAAAAAAAAbi3CPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCP4A8IyoqSqVKlbrl/Vy+fFmDBg1S8eLF5eLiorZt297yPq+WmpqqSpUq6c0338z1vq+lVKlSeuihh5xdBq5j7969cnV11e7du51dSrYMHz5cNpvN2WXcse6//37df//9ud5vbn22w5psNpt69+7t7DIASyD8A8iStD/e//nnnwzXV6pUKUf/uLTZbA4Pf39/NWrUSIsWLcr2Nj/55BONGzdOHTt21Keffqp+/fpJ+i/8Xt2fzWZTz549020jPj5e3bt3V+HCheXj46PGjRvr119/zXQNX375pf7++2+HP2iSkpL02muvqUWLFipQoIBsNptmzpx5zW3s27dPLVq0kK+vrwoUKKAnn3xSJ0+eTNcuNTVVY8eOVVhYmDw9PVWlShV9+eWXma41M1JTU/XZZ5+pTp06KlCggPz8/HT33XerS5cu+uWXX3K0rztZhQoV1KpVKw0bNixHt3utY//qx/WOx7xg5syZmdqPnAqi69ev1/DhwxUfH58j23O2X3/9VTabTUOHDr1mm4MHD8pms6l///453j+fI7dOVFSUfH19nV3GNVntdwnIq1ydXQAApPnoo4+UmpqabnmzZs3UpUsXGWN06NAhTZkyRa1bt9bixYsVERGR5X5WrFihokWL6t133023rlq1anrxxRcdlt19990Oz1NTU9WqVSvt2LFDAwcOVKFChTR58mTdf//92rp1q8qWLXvDGsaNG6fOnTsrICDAvuyff/7R66+/rhIlSqhq1ar6+eefr/n6I0eOqGHDhgoICNCoUaOUlJSkt99+W7t27dKmTZvk7u5ub/vKK6/orbfe0jPPPKNatWrp+++/1+OPPy6bzabOnTvfsNbMeOGFF/TBBx+oTZs2ioyMlKurq/bv36/Fixfrrrvu0n333Zcj/UDq2bOnWrZsqT/++EOlS5fOkW1OmDBBSUlJ9uc//PCDvvzyS7377rsqVKiQfXndunVvqp+hQ4dq8ODBN7WN62nYsKE+//xzh2VPP/20ateure7du9uX5VQIWr9+vUaMGKGoqCgFBgbmyDadqXr16ipXrpy+/PJLvfHGGxm2mT17tiTpiSeeyPH++Ry5c1ntdwnIswwAZMFrr71mJJmTJ09muL5ixYqmUaNGOdafJNOrVy+HZXv37jWSzIMPPpitbTZu3NhUrFgx3fKSJUuaVq1a3fD1X3/9tZFk5s6da18WFxdnAgMDzWOPPXbD1//6669Gkvnpp58cll+4cMEcP37cGGPM5s2bjSQzY8aMDLfx7LPPGi8vL3Po0CH7smXLlhlJ5sMPP7QvO3LkiHFzc3N4D1NTU02DBg1MsWLFzOXLl+3LM7v/V4uNjTU2m80888wz6dalpqaaEydOZHmbuLbk5GSTP39+8+qrr96yPsaNG2ckmZiYmOu2S0pKumU15BQfHx/TtWvXW7LtzL5PeUWjRo1u+Pk8cuRII8ls2LAhw/X33HOPKVeuXJb67dq1qylZsuR12/A5cmt17drV+Pj4OLuMa7re71JGfwcAyB6m/QO4pX7++WfZbDbNmTNHb775pooVKyZPT081adJEv//+u0PbzJ4XWr58eRUqVEh//PGHw/KLFy/qtddeU5kyZeTh4aHixYtr0KBBunjxoiTpr7/+ks1m08qVK7Vnzx77FOCrR9iTk5N17ty5a/b/zTffKCgoSO3bt7cvK1y4sB599FF9//339v6uZf78+XJ3d1fDhg0dlnt4eCg4OPiG+y9J3377rR566CGVKFHCvqxp06a6++67NWfOHPuy77//XpcuXdJzzz1nX2az2fTss8/qyJEj2rBhw3X7+fTTT+Xq6qqBAwdes01MTIyMMapXr166dTabTUWKFHFYFh8fr759+6p48eLy8PBQmTJlNGbMmHSzPuLj4xUVFaWAgAAFBgaqa9eu2r59e7rp59c6jzmj4yk1NVUTJkxQxYoV5enpqaCgIPXo0UNnzpxxaJd2/YO1a9eqdu3a8vT01F133aXPPvssXT/x8fHq16+fSpUqJQ8PDxUrVkxdunRxODXmRsdmmmXLlql+/foKDAyUr6+v7rnnHr388ssObdzc3HT//ffr+++/T1fLrZQ2bfiPP/5Qy5Yt5efnp8jISEnSmjVr9Mgjj6hEiRL2/evXr5/+/fdfh21kdM5/2vm88+fPV6VKleTh4aGKFStqyZIlt2xfjh49qm7duikoKMje3yeffJKu3fvvv6+KFSvK29tb+fPnV82aNe0j38OHD7f/XoSFhdk/T/76668s1XL69GkNGDBAlStXlq+vr/z9/fXggw9qx44dDu2y8lkqSdOmTVPp0qXl5eWl2rVra82aNZmqJ+1nmrafV9q6dav2799vb/P999+rVatWCg0NlYeHh0qXLq2RI0cqJSUlS++BxOdITn6O3IyNGzeqRYsWCggIkLe3txo1aqR169Y5tEn7Pf7999/tI/UBAQF66qmndP78eYe2//77r1544QUVKlRIfn5+evjhh3X06FHZbDYNHz7cvr3M/C7d6DPi7Nmz6tu3r/09LFKkiJo1a5alU/IAq2PaP4Bc8dZbb8nFxUUDBgxQQkKCxo4dq8jISG3cuDHL20pISNCZM2ccpjynpqbq4Ycf1tq1a9W9e3eVL19eu3bt0rvvvqsDBw5o/vz5Kly4sD7//HO9+eabSkpK0ujRoyX992VCmhUrVsjb21spKSkqWbKk+vXrpz59+jj0v23bNlWvXl0uLo7fn9auXVvTpk3TgQMHVLly5WvWv379elWqVElubm5Z3nfpv+ASFxenmjVrpltXu3Zt/fDDDw61+vj4OOxjWru09fXr18+wn2nTpqlnz556+eWXrzkFWJJKliwpSZo7d64eeeQReXt7X7Pt+fPn1ahRIx09elQ9evRQiRIltH79eg0ZMkTHjx/XhAkTJEnGGLVp00Zr165Vz549Vb58eX333Xfq2rXrNbedGT169NDMmTP11FNP6YUXXlBMTIwmTZqkbdu2ad26dQ4/k99//10dO3ZUdHS0unbtqk8++URRUVGqUaOGKlasKOm/6zQ0aNBA+/btU7du3VS9enX9888/WrBggY4cOaJChQpl6tiUpD179uihhx5SlSpV9Prrr8vDw0O///57uj+8JalGjRr6/vvvlZiYKH9//5t6T7Li8uXLioiIUP369fX222/bf9Zz587V+fPn9eyzz6pgwYLatGmT3n//fR05ckRz58694XbXrl2refPm6bnnnpOfn5/ee+89dejQQYcPH1bBggVzdB9OnDih++67z/6lQ+HChbV48WJFR0crMTFRffv2lfTfaUgvvPCCOnbsqD59+ujChQvauXOnNm7cqMcff1zt27fXgQMH0p0eUbhw4SzV8+eff2r+/Pl65JFHFBYWphMnTujDDz9Uo0aNtHfvXoWGhjq0z8xn6fTp09WjRw/VrVtXffv21Z9//qmHH35YBQoUUPHixa9bT1hYmOrWras5c+bo3XffVb58+ezr0r4QePzxxyX9d40FX19f9e/fX76+vlqxYoWGDRumxMREjRs3LkvvA58jOfM5cjNWrFihBx98UDVq1NBrr70mFxcXzZgxQw888IDWrFlj/38jzaOPPqqwsDCNHj1av/76qz7++GMVKVJEY8aMsbeJiorSnDlz9OSTT+q+++7TqlWr1KpVK4ftZOZ3KTOfET179tQ333yj3r17q0KFCjp16pTWrl2rffv2qXr16jf9/gCW4NyJBwBuN1md9r9y5UojyZQvX95cvHjRvnzixIlGktm1a5d9WUZTQyWZ6Ohoc/LkSRMXF2e2bNliWrRoYSSZcePG2dt9/vnnxsXFxaxZs8bh9VOnTjWSzLp16+zLGjVqlOG0/9atW5sxY8aY+fPnm+nTp5sGDRoYSWbQoEEO7Xx8fEy3bt3SvX7RokVGklmyZEmG702aYsWKmQ4dOly3zfWm/aet++yzz9KtGzhwoJFkLly4YIwxplWrVuauu+5K1+7cuXNGkhk8eLB92ZXT/idOnGhsNpsZOXLkdetM06VLFyPJ5M+f37Rr1868/fbbZt++fenajRw50vj4+JgDBw44LB88eLDJly+fOXz4sDHGmPnz5xtJZuzYsfY2ly9ftv9MrnxfrjWV+erjac2aNUaSmTVrlkO7JUuWpFtesmRJI8msXr3aviwuLs54eHiYF1980b5s2LBhRpKZN29euv5TU1ONMZk/Nt99993r/m5dafbs2UaS2bhx4w3bZkdGU3C7du2a7phJc/78+XTLRo8ebWw2m8OpKWmfH1eSZNzd3c3vv/9uX7Zjxw4jybz//vs3vS9XT/uPjo42ISEh5p9//nFo17lzZxMQEGDflzZt2mT4OXGlnJj2f+HCBZOSkuKwLCYmxnh4eJjXX3/dviyzn6XJycmmSJEiplq1ag7tpk2bZiRl6rSsDz74wEgyS5cutS9LSUkxRYsWNeHh4fZlGf3ce/ToYby9ve2fQcZkbtq/MXyO5MTnyLXcaNp/amqqKVu2rImIiLD3acx/P+OwsDDTrFkz+7K03+Or/x9s166dKViwoP351q1bjSTTt29fh3ZRUVFGknnttdfsy2407T8znxEBAQGcHgDcANP+AeSKp556yuEidA0aNJD036jXjUyfPl2FCxdWkSJFVLNmTS1fvlyDBg1yuNr03LlzVb58eZUrV07//POP/fHAAw9IklauXHnDfhYsWKBBgwapTZs26tatm1atWqWIiAiNHz9eR44csbf7999/5eHhke71np6e9vXXc+rUKeXPn/+G9VxL2vYzU0N2ah07dqz69OmjMWPGXPeq31eaMWOGJk2apLCwMH333XcaMGCAypcvryZNmujo0aP2dnPnzlWDBg2UP39+h59T06ZNlZKSotWrV0v674Jzrq6uevbZZ+2vzZcvn55//vlM1ZORuXPnKiAgQM2aNXPou0aNGvL19U13jFSoUMF+nEr/jULdc889Dsfst99+q6pVq6pdu3bp+kub3p7ZYzPtIlfff/99hhe+vFLa8XOtu27cSlf+TNJ4eXnZ/33u3Dn9888/qlu3rowx2rZt2w232bRpU4eZPFWqVJG/v3+mPh+ywhijb7/9Vq1bt5YxxuHnERERoYSEBPsU4cDAQB05ckSbN2/O0Rqu5uHhYZ9FlJKSolOnTtlP+chouvKNPku3bNmiuLg49ezZ06Fd2tT3zOjUqZPc3Nwcpv6vWrVKR48etU/5lxx/7mfPntU///yjBg0a6Pz58/rtt98y1deV+By5+c+R7Nq+fbsOHjyoxx9/XKdOnbJv/9y5c2rSpIlWr16d7nPp6jvhNGjQQKdOnVJiYqIk2aflX3namaRsvf+Z+YwIDAzUxo0bdezYsSxvH7hTMO0fQI7L6D7eV56bLv3/8HL1OZIZadOmjXr37q3k5GRt3rxZo0aN0vnz5x2m3R88eFD79u275pTbuLi4rOyCpP/2o1+/flq6dKl+/vln+9Wtvby8MjzH8sKFC/b1N2KMyXI9adK2n5kaslrrqlWrtGjRIr300kvXPc//ai4uLurVq5d69eqlU6dOad26dZo6daoWL16szp072883PnjwoHbu3HnDn9OhQ4cUEhKS7qrs99xzT6ZrutrBgweVkJCQ7tzhq/tOc/UxK/133F55zP7xxx/q0KHDDfvNzLHZqVMnffzxx3r66ac1ePBgNWnSRO3bt1fHjh3TnWKSdvxk9LuWJjk5WadPn3ZYVrhwYYdp3Fnl6uqqYsWKpVt++PBhDRs2TAsWLEj3O52QkHDD7Wbmvc4JJ0+eVHx8vKZNm6Zp06Zl2Cbt5/HSSy/pp59+Uu3atVWmTBk1b95cjz/+eIbnpN+M1NRUTZw4UZMnT1ZMTIzD+fIZnfJwo8/SQ4cOSVK6u464ubnprrvuylRNBQsWVEREhL777jtNnTpVnp6emj17tlxdXfXoo4/a2+3Zs0dDhw7VihUr7IEvTWZ+7lfjc+T6/eb0/3FXb1/SdU+JSEhIcPji+nrHor+/vw4dOiQXFxeFhYU5tCtTpkyW68vM+zh27Fh17dpVxYsXV40aNdSyZUt16dIl08c9cCcg/APIkhuNbp8/f97e5krXChyZCcHFihVT06ZNJUktW7ZUoUKF1Lt3bzVu3Nh+0b3U1FRVrlxZ48ePz3AbNzrP9VrSXndliAoJCdHx48fTtU1bdvU5ulcrWLDgTYWakJAQh/6urqFAgQL20f6QkBCtXLlSxhiHoHitWitWrKj4+Hh9/vnn6tGjR7o/2jKjYMGCevjhh/Xwww/r/vvv16pVq3To0CGVLFlSqampatasmQYNGpTha6++rWJm2Gy2DI+jqy86lpqaqiJFimjWrFkZbufqP6pv5pi9ut/MHJteXl5avXq1Vq5cqUWLFmnJkiX6+uuv9cADD+jHH390qCft+LnyNnxXW79+vRo3buywLCYm5qbucX/lKHWalJQUNWvWTKdPn9ZLL72kcuXKycfHR0ePHlVUVNQNZzFIOfde30haLU888cQ1Q06VKlUk/XctkP3792vhwoVasmSJvv32W02ePFnDhg3TiBEjcqymUaNG6dVXX1W3bt00cuRIFShQQC4uLurbt2+G711uvVdPPPGEFi5cqIULF+rhhx/Wt99+q+bNm9t/T+Lj49WoUSP5+/vr9ddfV+nSpeXp6alff/1VL730UqZ+7tfD54ijW/V/3JXbl/67DW21atUybHP1lyi5dSxmtq9HH31UDRo00Hfffacff/xR48aN05gxYzRv3jw9+OCDOV4TcDsi/APIkrSLMu3fvz/dHxvnz5/X33//rebNm9/SGnr06KF3331XQ4cOVbt27WSz2VS6dGnt2LFDTZo0ue5oaFalTSm88g+6atWqac2aNUpNTXUIQhs3bpS3t/cN//AsV66cYmJisl1T0aJFVbhwYW3ZsiXduk2bNjn84VatWjV9/PHH2rdvnypUqOBQa9r6KxUqVEjffPON6tevryZNmmjt2rU3/DLjemrWrKlVq1bp+PHjKlmypEqXLq2kpCT7lznXUrJkSS1fvlxJSUkOf3Du378/Xdv8+fNnOD08bQQ0TenSpfXTTz+pXr16mZqdkRmlS5fW7t27b9gms8emi4uLmjRpoiZNmmj8+PEaNWqUXnnlFa1cudLhPYuJiZGLi8t1j7WqVatq2bJlDssyezeJrNi1a5cOHDigTz/9VF26dLEvv7rvvKBw4cLy8/NTSkrKDY9BSfLx8VGnTp3UqVMnJScnq3379nrzzTc1ZMgQeXp65shnzTfffKPGjRtr+vTpDsvj4+Ov++XOtaR9Rh88eNA+JVySLl26pJiYGFWtWjVT23n44Yfl5+en2bNny83NTWfOnHGY8v/zzz/r1KlTmjdvnsOdS27ms+1a+BzJ2udIdmuQJH9//0z9bmRG2hc1MTExDjNRMro7RU7tU0hIiJ577jk999xziouLU/Xq1fXmm28S/oH/wzn/ALKkSZMmcnd315QpU9KN7EybNk2XL1++5f/Jurq66sUXX9S+ffvstzt79NFHdfToUX300Ufp2v/777/XvXWf9N/I/tUjPJcuXdJbb70ld3d3hxHUjh076sSJE5o3b5592T///KO5c+eqdevWGZ5jf6Xw8HDt3r37pm7P1KFDBy1cuFB///23fdny5ct14MABPfLII/Zlbdq0kZubmyZPnmxfZozR1KlTVbRoUdWtWzfdtosVK6affvpJ//77r5o1a6ZTp05dt5bY2Fjt3bs33fLk5GQtX75cLi4u9mmejz76qDZs2KClS5emax8fH6/Lly9L+m+Gx+XLlzVlyhT7+pSUFL3//vvpXle6dGn99ttvOnnypH3Zjh070l0l/9FHH1VKSopGjhyZbhuXL19WfHz8dfczIx06dNCOHTv03XffpVuXNiKV2WPz6in60v//cubqY2Xr1q2qWLHidc/hzp8/v5o2berwyGhWzs1KG5G7cgTOGKOJEyfmeF83K1++fOrQoYO+/fbbDMPWlcfQ1ce9u7u7KlSoIGOMLl26JOm/LwckZevYubKmq0dK586d63COe1bUrFlThQsX1tSpU5WcnGxfPnPmzCzV6eXlpXbt2umHH37QlClT5OPjozZt2jjULTn+3JOTkx0+a7KCz5Gc+RzJrho1aqh06dJ6++23lZSUlG79le9LZkVEREhSumMio/f/Zn+XUlJS0p1qUqRIEYWGhuborRCB2x0j/wCypEiRIho2bJiGDh2qhg0b6uGHH5a3t7fWr1+vL7/8Us2bN1fr1q1veR1RUVEaNmyYxowZo7Zt2+rJJ5/UnDlz1LNnT61cuVL16tVTSkqKfvvtN82ZM0dLly7N8NZ4aRYsWKA33nhDHTt2VFhYmE6fPq3Zs2dr9+7dGjVqlMOIaceOHXXffffpqaee0t69e1WoUCFNnjxZKSkpmZoO3KZNG40cOVKrVq1KN0ti0qRJio+Pt1+w6H//+5/9YoPPP/+8Pey9/PLLmjt3rho3bqw+ffooKSlJ48aNU+XKlfXUU0/Zt1esWDH17dtX48aN06VLl1SrVi3Nnz9fa9as0axZs645lbJMmTL68ccfdf/99ysiIkIrVqy45i3ljhw5otq1a+uBBx5QkyZNFBwcrLi4OH355ZfasWOH+vbtax/BHDhwoBYsWKCHHnrIfrurc+fOadeuXfrmm2/0119/qVChQmrdurXq1aunwYMH66+//lKFChU0b968DM8j7tatm8aPH6+IiAhFR0crLi5OU6dOVcWKFR3OQ27UqJF69Oih0aNHa/v27WrevLnc3Nx08OBBzZ07VxMnTlTHjh1v+PO70sCBA/XNN9/okUceUbdu3VSjRg2dPn1aCxYs0NSpU1W1atVMH5uvv/66Vq9erVatWqlkyZKKi4vT5MmTVaxYMYfbMV66dEmrVq1KdxEtZylXrpxKly6tAQMG6OjRo/L399e3336b4+frS/+NDjZq1Eg///xztrfx1ltvaeXKlapTp46eeeYZVahQQadPn9avv/6qn376yf4lTPPmzRUcHKx69eopKChI+/bt06RJk9SqVSv5+flJ+i8wSdIrr7yizp07y83NTa1bt5aPj4+GDx+uESNGaOXKlRnePz7NQw89pNdff11PPfWU6tatq127dmnWrFnZPk/Zzc1Nb7zxhnr06KEHHnhAnTp1UkxMjGbMmJHlbT7xxBP67LPPtHTpUkVGRtoDmiTVrVtX+fPnV9euXfXCCy/IZrPp888/z/aUbz5HcuZz5HouXbqU4W1bCxQooOeee04ff/yxHnzwQVWsWFFPPfWUihYtqqNHj2rlypXy9/fX//73vyztV40aNdShQwdNmDBBp06dst/q78CBA5IcR/uv97uUGWfPnlWxYsXUsWNHVa1aVb6+vvrpp5+0efNmvfPOO1mqG7C03LuxAAAr+eKLL8x9991nfHx8jIeHhylXrpwZMWKEw+2djPn/t6eaO3euw/KYmJh0t1q61q3+rnXrnuHDhxtJZuXKlcaY/25xNWbMGFOxYkXj4eFh8ufPb2rUqGFGjBhhEhIS7K/L6FZ/W7ZsMa1btzZFixY17u7uxtfX19SvX9/MmTMnw75Pnz5toqOjTcGCBY23t7dp1KiR2bx58/XeMgdVqlQx0dHR6Zan3Roqo8fVt0DavXu3ad68ufH29jaBgYEmMjLSxMbGpttmSkqKGTVqlClZsqRxd3c3FStWNF988UWGfafd6i/Nxo0bjZ+fn2nYsGGGt/UyxpjExEQzceJEExERYYoVK2bc3NyMn5+fCQ8PNx999JHDbaOMMebs2bNmyJAhpkyZMsbd3d0UKlTI1K1b17z99tsmOTnZ3u7UqVPmySefNP7+/iYgIMA8+eSTZtu2bRneAvGLL74wd911l3F3dzfVqlUzS5cuvebtxaZNm2Zq1KhhvLy8jJ+fn6lcubIZNGiQOXbs2HXfC2Myvh3YqVOnTO/eve3HTrFixUzXrl0dbiWXmWNz+fLlpk2bNiY0NNS4u7ub0NBQ89hjj6W7ndnixYuNJHPw4MEMfx454Vq3+rvWrcL27t1rmjZtanx9fU2hQoXMM888Y78V15U/q2vd6i+j3/GSJUs63KLv7NmzRpLp3Llzlvbl6lv9GWPMiRMnTK9evUzx4sWNm5ubCQ4ONk2aNDHTpk2zt/nwww9Nw4YNTcGCBY2Hh4cpXbq0GThwoMNniTH/3XauaNGixsXFxeE9e/HFF43NZsvwVnVXunDhgnnxxRdNSEiI8fLyMvXq1TMbNmxId6xl5bPUGGMmT55swsLCjIeHh6lZs6ZZvXr1NW9ndy2XL182ISEhRpL54Ycf0q1ft26due+++4yXl5cJDQ01gwYNMkuXLnX4XDYmc7f643MkZz5HriXtVp0ZPUqXLm1vt23bNtO+fXv7cV+yZEnz6KOPmuXLl9vbXOuWvzNmzEj3uXHu3DnTq1cvU6BAAePr62vatm1r9u/fbySZt956y+H11/pdysxnxMWLF83AgQNN1apVjZ+fn/Hx8TFVq1Y1kydPvu77AtxpbMbcgqtyAACu6/PPP1evXr10+PBh+y3ecGN//fWXwsLCNGPGDEVFRTm7HKdo27atbDZbhlOEreyHH37QQw89pB07dqhy5crOLueGateurZIlS2ru3LnOLgVX4XPEubZv3657771XX3zxhcN1JADcepzzDwBOEBkZqRIlSuiDDz5wdim4jezbt08LFy7M8Hxjq1u5cqU6d+58WwT/xMRE7dixQ6+//rqzSwGcKqM7A02YMEEuLi4OF4oEkDs45x8AnMDFxeWGV3cGrla+fHn7xczuNOPGjXN2CZnm7+/PRcYASWPHjtXWrVvVuHFjubq6avHixVq8eLG6d+9+07cnBJB1hH8AAAAAOa5u3bpatmyZRo4cqaSkJJUoUULDhw/XK6+84uzSgDsS5/wDAAAAAGBxnPMPAAAAAIDFEf4BAAAAALA4zvnPIampqTp27Jj8/Pxks9mcXQ4AAAAAwOKMMTp79qxCQ0Pl4nL9sX3Cfw45duwYVy0FAAAAAOS6v//+W8WKFbtuG8J/DvHz85P035vu7+/v5GoAAAAAAFaXmJio4sWL2/Po9RD+c0jaVH9/f3/CPwAAAAAg12Tm1HMu+AcAAAAAgMUR/gEAAAAAsDjCPwAAAAAAFsc5/wAAAACAWyIlJUWXLl1ydhm3rXz58snV1TVHbidP+AcAAAAA5LikpCQdOXJExhhnl3Jb8/b2VkhIiNzd3W9qO4R/AAAAAECOSklJ0ZEjR+Tt7a3ChQvnyMj1ncYYo+TkZJ08eVIxMTEqW7asXFyyf+Y+4R8AAAAAkKMuXbokY4wKFy4sLy8vZ5dz2/Ly8pKbm5sOHTqk5ORkeXp6ZntbXPAPAAAAAHBLMOJ/825mtN9hOzmyFQAAAAAAkGcR/gEAAAAAsDjO+QcAAAAA5IromZtztb/pUbVytb+MlCpVSn379lXfvn2dWgcj/wAAAACAO57NZrvuY/jw4dna7ubNm9W9e/ecLTYbGPkHAAAAANzxjh8/bv/3119/rWHDhmn//v32Zb6+vvZ/G2OUkpIiV9cbR+rChQvnbKHZxMg/AAAAAOCOFxwcbH8EBATIZrPZn//222/y8/PT4sWLVaNGDXl4eGjt2rX6448/1KZNGwUFBcnX11e1atXSTz/95LDdUqVKacKECfbnNptNH3/8sdq1aydvb2+VLVtWCxYsuOX7R/gHAAAAACATBg8erLfeekv79u1TlSpVlJSUpJYtW2r58uXatm2bWrRoodatW+vw4cPX3c6IESP06KOPaufOnWrZsqUiIyN1+vTpW1o74R8AAAAAgEx4/fXX1axZM5UuXVoFChRQ1apV1aNHD1WqVElly5bVyJEjVbp06RuO5EdFRemxxx5TmTJlNGrUKCUlJWnTpk23tHbCPwAAAAAAmVCzZk2H50lJSRowYIDKly+vwMBA+fr6at++fTcc+a9SpYr93z4+PvL391dcXNwtqTkNF/wDAAAAACATfHx8HJ4PGDBAy5Yt09tvv60yZcrIy8tLHTt2VHJy8nW34+bm5vDcZrMpNTU1x+u9EuEfAAAAAIBsWLdunaKiotSuXTtJ/80E+Ouvv5xb1DUQ/mFJ0TM352p/06Nq5Wp/AAAAAJyvbNmymjdvnlq3bi2bzaZXX331lo/gZxfhHwAAAACQK6w2aDZ+/Hh169ZNdevWVaFChfTSSy8pMTHR2WVlyGaMMc4uwgoSExMVEBCghIQE+fv7O7ucOx4j/wAAAIDzXLhwQTExMQoLC5Onp6ezy7mtXe+9zEoO5Wr/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYnKuzCwAAAAAA3CFmd8rd/h7/Onf7y8MY+QcAAAAA3PFsNtt1H8OHD7+pbc+fPz/Has0ORv4BAAAAAHe848eP2//99ddfa9iwYdq/f799ma+vrzPKyjGM/AMAAAAA7njBwcH2R0BAgGw2m8Oyr776SuXLl5enp6fKlSunyZMn21+bnJys3r17KyQkRJ6enipZsqRGjx4tSSpVqpQkqV27drLZbPbnuY2RfwAAAAAArmPWrFkaNmyYJk2apHvvvVfbtm3TM888Ix8fH3Xt2lXvvfeeFixYoDlz5qhEiRL6+++/9ffff0uSNm/erCJFimjGjBlq0aKF8uXL55R9IPwDAAAAAHAdr732mt555x21b99ekhQWFqa9e/fqww8/VNeuXXX48GGVLVtW9evXl81mU8mSJe2vLVy4sCQpMDBQwcHBTqlfIvwDAAAAAHBN586d0x9//KHo6Gg988wz9uWXL19WQECAJCkqKkrNmjXTPffcoxYtWuihhx5S8+bNnVVyhpx6zv/q1avVunVrhYaGZnj1Q2OMhg0bppCQEHl5ealp06Y6ePCgQ5vTp08rMjJS/v7+CgwMVHR0tJKSkhza7Ny5Uw0aNJCnp6eKFy+usWPHpqtl7ty5KleunDw9PVW5cmX98MMPOb6/AAAAAIDbS1q+/Oijj7R9+3b7Y/fu3frll18kSdWrV1dMTIxGjhypf//9V48++qg6duzozLLTcWr4P3funKpWraoPPvggw/Vjx47Ve++9p6lTp2rjxo3y8fFRRESELly4YG8TGRmpPXv2aNmyZVq4cKFWr16t7t2729cnJiaqefPmKlmypLZu3apx48Zp+PDhmjZtmr3N+vXr9dhjjyk6Olrbtm1T27Zt1bZtW+3evfvW7TwAAAAAIM8LCgpSaGio/vzzT5UpU8bhERYWZm/n7++vTp066aOPPtLXX3+tb7/9VqdPn5Ykubm5KSUlxVm7IMnJ0/4ffPBBPfjggxmuM8ZowoQJGjp0qNq0aSNJ+uyzzxQUFKT58+erc+fO2rdvn5YsWaLNmzerZs2akqT3339fLVu21Ntvv63Q0FDNmjVLycnJ+uSTT+Tu7q6KFStq+/btGj9+vP1LgokTJ6pFixYaOHCgJGnkyJFatmyZJk2apKlTp+bCOwEAAAAAyKtGjBihF154QQEBAWrRooUuXryoLVu26MyZM+rfv7/Gjx+vkJAQ3XvvvXJxcdHcuXMVHByswMBASf9d8X/58uWqV6+ePDw8lD9//lzfhzx7zn9MTIxiY2PVtGlT+7KAgADVqVNHGzZsUOfOnbVhwwYFBgbag78kNW3aVC4uLtq4caPatWunDRs2qGHDhnJ3d7e3iYiI0JgxY3TmzBnlz59fGzZsUP/+/R36j4iISHcawpUuXryoixcv2p8nJibmwF4DAAAAgIU9/rWzK8iWp59+Wt7e3ho3bpwGDhwoHx8fVa5cWX379pUk+fn5aezYsTp48KDy5cunWrVq6YcffpCLy3+T7d955x31799fH330kYoWLaq//vor1/chz4b/2NhYSf9NsbhSUFCQfV1sbKyKFCnisN7V1VUFChRwaHPlVIwrtxkbG6v8+fMrNjb2uv1kZPTo0RoxYkQ29gwAAAAAkJdFRUUpKirKYdnjjz+uxx9/PMP2zzzzjMPFAK/WunVrtW7dOidLzDKnnvN/OxsyZIgSEhLsj7R7OAIAAAAAkNfk2fCfdv/DEydOOCw/ceKEfV1wcLDi4uIc1l++fFmnT592aJPRNq7s41ptrncPRg8PD/n7+zs8AAAAAADIi/Js+A8LC1NwcLCWL19uX5aYmKiNGzcqPDxckhQeHq74+Hht3brV3mbFihVKTU1VnTp17G1Wr16tS5cu2dssW7ZM99xzj/0iC+Hh4Q79pLVJ6wcAAAAAgNuZU8N/UlKS/R6J0n8X+du+fbsOHz4sm82mvn376o033tCCBQu0a9cudenSRaGhoWrbtq0kqXz58mrRooWeeeYZbdq0SevWrVPv3r3VuXNnhYaGSvrvvAx3d3dFR0drz549+vrrrzVx4kSHC/z16dNHS5Ys0TvvvKPffvtNw4cP15YtW9S7d+/cfksAAAAAAMhxTr3g35YtW9S4cWP787RA3rVrV82cOVODBg3SuXPn1L17d8XHx6t+/fpasmSJPD097a+ZNWuWevfurSZNmsjFxUUdOnTQe++9Z18fEBCgH3/8Ub169VKNGjVUqFAhDRs2zH6bP0mqW7euZs+eraFDh+rll19W2bJlNX/+fFWqVCkX3gUAAAAAAG4tmzHGOLsIK0hMTFRAQIASEhI4/z8PiJ65OVf7mx5VK1f7AwAAAPKyCxcuKCYmRmFhYQ6Dt8i6672XWcmhefacfwAAAAAAkDMI/wAAAAAAWBzhHwAAAAAAi3PqBf8AAAAAAHeO3stz945qk5pMytX+7r//flWrVk0TJkzI1X4zg5F/AAAAAMAdr3Xr1mrRokWG69asWSObzaadO3fmclU5h/APAAAAALjjRUdHa9myZTpy5Ei6dTNmzFDNmjVVpUoVJ1SWMwj/AAAAAIA73kMPPaTChQtr5syZDsuTkpI0d+5ctW3bVo899piKFi0qb29vVa5cWV9++aVzis0Gwj8AAAAA4I7n6uqqLl26aObMmTLG2JfPnTtXKSkpeuKJJ1SjRg0tWrRIu3fvVvfu3fXkk09q06ZNTqw68wj/AAAAAABI6tatm/744w+tWrXKvmzGjBnq0KGDSpYsqQEDBqhatWq666679Pzzz6tFixaaM2eOEyvOPMI/AAAAAACSypUrp7p16+qTTz6RJP3+++9as2aNoqOjlZKSopEjR6py5coqUKCAfH19tXTpUh0+fNjJVWcO4R8AAAAAgP8THR2tb7/9VmfPntWMGTNUunRpNWrUSOPGjdPEiRP10ksvaeXKldq+fbsiIiKUnJzs7JIzxdXZBeDOED1zs7NLAAAAAIAbevTRR9WnTx/Nnj1bn332mZ599lnZbDatW7dObdq00RNPPCFJSk1N1YEDB1ShQgUnV5w5jPwDAAAAAPB/fH191alTJw0ZMkTHjx9XVFSUJKls2bJatmyZ1q9fr3379qlHjx46ceKEc4vNAkb+AQAAAAC5YlKTSc4uIVOio6M1ffp0tWzZUqGhoZKkoUOH6s8//1RERIS8vb3VvXt3tW3bVgkJCU6uNnMI/wAAAAAAXCE8PNzhdn+SVKBAAc2fP/+6r/v5559vXVE3iWn/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAuCWuvmgesi6n3kPCPwAAAAAgR+XLl0+SlJyc7ORKbn/nz5+XJLm5ud3UdrjVHwAAAAAgR7m6usrb21snT56Um5ubXFwYd84qY4zOnz+vuLg4BQYG2r9QyS7CPwAAAAAgR9lsNoWEhCgmJkaHDh1ydjm3tcDAQAUHB9/0dgj/AAAAAIAc5+7urrJlyzL1/ya4ubnd9Ih/GsI/AAAAAOCWcHFxkaenp7PLgLjgHwAAAAAAlkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDhXZxcAWEH0zM252t/0qFq52h8AAACA2xsj/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABaXp8N/SkqKXn31VYWFhcnLy0ulS5fWyJEjZYyxtzHGaNiwYQoJCZGXl5eaNm2qgwcPOmzn9OnTioyMlL+/vwIDAxUdHa2kpCSHNjt37lSDBg3k6emp4sWLa+zYsbmyjwAAAAAA3Gp5OvyPGTNGU6ZM0aRJk7Rv3z6NGTNGY8eO1fvvv29vM3bsWL333nuaOnWqNm7cKB8fH0VEROjChQv2NpGRkdqzZ4+WLVumhQsXavXq1erevbt9fWJiopo3b66SJUtq69atGjdunIYPH65p06bl6v4CAAAAAHAr2MyVw+h5zEMPPaSgoCBNnz7dvqxDhw7y8vLSF198IWOMQkND9eKLL2rAgAGSpISEBAUFBWnmzJnq3Lmz9u3bpwoVKmjz5s2qWbOmJGnJkiVq2bKljhw5otDQUE2ZMkWvvPKKYmNj5e7uLkkaPHiw5s+fr99++y1TtSYmJiogIEAJCQny9/fP4Xfi9hc9c7OzS7CU6VG1nF0CAAAAACfLSg7N0yP/devW1fLly3XgwAFJ0o4dO7R27Vo9+OCDkqSYmBjFxsaqadOm9tcEBASoTp062rBhgyRpw4YNCgwMtAd/SWratKlcXFy0ceNGe5uGDRvag78kRUREaP/+/Tpz5kyGtV28eFGJiYkODwAAAAAA8iJXZxdwPYMHD1ZiYqLKlSunfPnyKSUlRW+++aYiIyMlSbGxsZKkoKAgh9cFBQXZ18XGxqpIkSIO611dXVWgQAGHNmFhYem2kbYuf/786WobPXq0RowYkQN7CQAAAADArZWnR/7nzJmjWbNmafbs2fr111/16aef6u2339ann37q7NI0ZMgQJSQk2B9///23s0sCAAAAACBDeXrkf+DAgRo8eLA6d+4sSapcubIOHTqk0aNHq2vXrgoODpYknThxQiEhIfbXnThxQtWqVZMkBQcHKy4uzmG7ly9f1unTp+2vDw4O1okTJxzapD1Pa3M1Dw8PeXh43PxOAgAAAABwi+Xpkf/z58/LxcWxxHz58ik1NVWSFBYWpuDgYC1fvty+PjExURs3blR4eLgkKTw8XPHx8dq6dau9zYoVK5Samqo6derY26xevVqXLl2yt1m2bJnuueeeDKf8AwAAAABwO8nT4b9169Z68803tWjRIv3111/67rvvNH78eLVr106SZLPZ1LdvX73xxhtasGCBdu3apS5duig0NFRt27aVJJUvX14tWrTQM888o02bNmndunXq3bu3OnfurNDQUEnS448/Lnd3d0VHR2vPnj36+uuvNXHiRPXv399Zuw4AAAAAQI7J09P+33//fb366qt67rnnFBcXp9DQUPXo0UPDhg2ztxk0aJDOnTun7t27Kz4+XvXr19eSJUvk6elpbzNr1iz17t1bTZo0kYuLizp06KD33nvPvj4gIEA//vijevXqpRo1aqhQoUIaNmyYunfvnqv7CwAAAADArWAzxhhnF2EFWbm/4p0oeuZmZ5dgKdOjajm7BAAAAABOlpUcmqen/QMAAAAAgJtH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAi8vTV/sHkLHcvoAiFxgEAAAAbm+M/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHHZCv9//vlnTtcBAAAAAABukWyF/zJlyqhx48b64osvdOHChZyuCQAAAAAA5KBshf9ff/1VVapUUf/+/RUcHKwePXpo06ZNOV0bAAAAAADIAdkK/9WqVdPEiRN17NgxffLJJzp+/Ljq16+vSpUqafz48Tp58mRO1wkAAAAAALLppi745+rqqvbt22vu3LkaM2aMfv/9dw0YMEDFixdXly5ddPz48ZyqEwAAAAAAZJPrzbx4y5Yt+uSTT/TVV1/Jx8dHAwYMUHR0tI4cOaIRI0aoTZs2nA4AWED0zM253uf0qFq53icAAABgVdkK/+PHj9eMGTO0f/9+tWzZUp999platmwpF5f/JhKEhYVp5syZKlWqVE7WCgAAAAAAsiFb4X/KlCnq1q2boqKiFBISkmGbIkWKaPr06TdVHAAAAAAAuHnZCv8HDx68YRt3d3d17do1O5sHAAAAAAA5KFsX/JsxY4bmzp2bbvncuXP16aef3nRRAAAAAAAg52Qr/I8ePVqFChVKt7xIkSIaNWrUTRd1paNHj+qJJ55QwYIF5eXlpcqVK2vLli329cYYDRs2TCEhIfLy8lLTpk3TzUw4ffq0IiMj5e/vr8DAQEVHRyspKcmhzc6dO9WgQQN5enqqePHiGjt2bI7uBwAAAAAAzpKt8H/48GGFhYWlW16yZEkdPnz4potKc+bMGdWrV09ubm5avHix9u7dq3feeUf58+e3txk7dqzee+89TZ06VRs3bpSPj48iIiJ04cIFe5vIyEjt2bNHy5Yt08KFC7V69Wp1797dvj4xMVHNmzdXyZIltXXrVo0bN07Dhw/XtGnTcmxfAAAAAABwlmyd81+kSBHt3Lkz3dX8d+zYoYIFC+ZEXZKkMWPGqHjx4poxY4Z92ZVfOhhjNGHCBA0dOlRt2rSRJH322WcKCgrS/Pnz1blzZ+3bt09LlizR5s2bVbNmTUnS+++/r5YtW+rtt99WaGioZs2apeTkZH3yySdyd3dXxYoVtX37do0fP97hSwIAAAAAAG5H2Rr5f+yxx/TCCy9o5cqVSklJUUpKilasWKE+ffqoc+fOOVbcggULVLNmTT3yyCMqUqSI7r33Xn300Uf29TExMYqNjVXTpk3tywICAlSnTh1t2LBBkrRhwwYFBgbag78kNW3aVC4uLtq4caO9TcOGDeXu7m5vExERof379+vMmTMZ1nbx4kUlJiY6PAAAAAAAyIuyFf5HjhypOnXqqEmTJvLy8pKXl5eaN2+uBx54IEfP+f/zzz81ZcoUlS1bVkuXLtWzzz6rF154wX5RwdjYWElSUFCQw+uCgoLs62JjY1WkSBGH9a6uripQoIBDm4y2cWUfVxs9erQCAgLsj+LFi9/k3gIAAAAAcGtka9q/u7u7vv76a40cOVI7duywX4ivZMmSOVpcamqqatasaf9C4d5779Xu3bs1depUp99GcMiQIerfv7/9eWJiIl8AAAAAAADypGyF/zR333237r777pyqJZ2QkBBVqFDBYVn58uX17bffSpKCg4MlSSdOnFBISIi9zYkTJ1StWjV7m7i4OIdtXL58WadPn7a/Pjg4WCdOnHBok/Y8rc3VPDw85OHhkc09AwAAAAAg92Qr/KekpGjmzJlavny54uLilJqa6rB+xYoVOVJcvXr1tH//fodlBw4csM8wCAsLU3BwsJYvX24P+4mJidq4caOeffZZSVJ4eLji4+O1detW1ahRw15famqq6tSpY2/zyiuv6NKlS3Jzc5MkLVu2TPfcc4/DnQUAAAAAALgdZeuc/z59+qhPnz5KSUlRpUqVVLVqVYdHTunXr59++eUXjRo1Sr///rtmz56tadOmqVevXpIkm82mvn376o033tCCBQu0a9cudenSRaGhoWrbtq2k/2YKtGjRQs8884w2bdqkdevWqXfv3urcubNCQ0MlSY8//rjc3d0VHR2tPXv26Ouvv9bEiRMdpvUDAAAAAHC7ytbI/1dffaU5c+aoZcuWOV2Pg1q1aum7777TkCFD9PrrryssLEwTJkxQZGSkvc2gQYN07tw5de/eXfHx8apfv76WLFkiT09Pe5tZs2apd+/eatKkiVxcXNShQwe999579vUBAQH68ccf1atXL9WoUUOFChXSsGHDuM0fAAAAAMASbMYYk9UXhYaG6ueff76l5/vfbhITExUQEKCEhAT5+/s7u5w8J3rmZmeXgNvM9Khazi4BAAAAyNOykkOzNe3/xRdf1MSJE5WN7w0AAAAAAEAuy9a0/7Vr12rlypVavHixKlasaL9IXpp58+blSHEAAAAAAODmZSv8BwYGql27djldCwAAAAAAuAWyFf5nzJiR03UAAAAAAIBbJFvn/EvS5cuX9dNPP+nDDz/U2bNnJUnHjh1TUlJSjhUHAAAAAABuXrZG/g8dOqQWLVro8OHDunjxopo1ayY/Pz+NGTNGFy9e1NSpU3O6TgAAAAAAkE3ZGvnv06ePatasqTNnzsjLy8u+vF27dlq+fHmOFQcAAAAAAG5etkb+16xZo/Xr18vd3d1healSpXT06NEcKQwAAAAAAOSMbI38p6amKiUlJd3yI0eOyM/P76aLAgAAAAAAOSdb4b958+aaMGGC/bnNZlNSUpJee+01tWzZMqdqAwAAAAAAOSBb0/7feecdRUREqEKFCrpw4YIef/xxHTx4UIUKFdKXX36Z0zUCAAAAAICbkK3wX6xYMe3YsUNfffWVdu7cqaSkJEVHRysyMtLhAoAAAAAAAMD5shX+JcnV1VVPPPFETtYCAAAAAABugWyF/88+++y667t06ZKtYgAAAAAAQM7LVvjv06ePw/NLly7p/Pnzcnd3l7e3N+EfAAAAAIA8JFtX+z9z5ozDIykpSfv371f9+vW54B8AAAAAAHlMtsJ/RsqWLau33nor3awAAAAAAADgXDkW/qX/LgJ47NixnNwkAAAAAAC4Sdk653/BggUOz40xOn78uCZNmqR69erlSGEAAAAAACBnZCv8t23b1uG5zWZT4cKF9cADD+idd97JiboAAAAAAEAOyVb4T01Nzek6AAAAAADALZKj5/wDAAAAAIC8J1sj//3798902/Hjx2enCwAAAAAAkEOyFf63bdumbdu26dKlS7rnnnskSQcOHFC+fPlUvXp1ezubzZYzVQIAAAAAgGzLVvhv3bq1/Pz89Omnnyp//vySpDNnzuipp55SgwYN9OKLL+ZokQAAAAAAIPtsxhiT1RcVLVpUP/74oypWrOiwfPfu3WrevLmOHTuWYwXeLhITExUQEKCEhAT5+/s7u5w8J3rmZmeXAFzX9Khazi4BAAAAyJKs5NBsXfAvMTFRJ0+eTLf85MmTOnv2bHY2CQAAAAAAbpFshf927drpqaee0rx583TkyBEdOXJE3377raKjo9W+ffucrhEAAAAAANyEbJ3zP3XqVA0YMECPP/64Ll269N+GXF0VHR2tcePG5WiBAAAAAADg5mQr/Ht7e2vy5MkaN26c/vjjD0lS6dKl5ePjk6PFAQAAAACAm5etaf9pjh8/ruPHj6ts2bLy8fFRNq4dCAAAAAAAbrFshf9Tp06pSZMmuvvuu9WyZUsdP35ckhQdHc1t/gAAAAAAyGOyFf779esnNzc3HT58WN7e3vblnTp10pIlS3KsOAAAAAAAcPOydc7/jz/+qKVLl6pYsWIOy8uWLatDhw7lSGEAAAAAACBnZGvk/9y5cw4j/mlOnz4tDw+Pmy4KAAAAAADknGyF/wYNGuizzz6zP7fZbEpNTdXYsWPVuHHjHCsOAAAAAADcvGxN+x87dqyaNGmiLVu2KDk5WYMGDdKePXt0+vRprVu3LqdrBAAAAAAANyFbI/+VKlXSgQMHVL9+fbVp00bnzp1T+/bttW3bNpUuXTqnawQAAAAAADchyyP/ly5dUosWLTR16lS98sort6ImAAAAAACQg7I88u/m5qadO3feiloAAAAAAMAtkK1p/0888YSmT5+e07UAAAAAAIBbIFsX/Lt8+bI++eQT/fTTT6pRo4Z8fHwc1o8fPz5HigMAAAAAADcvS+H/zz//VKlSpbR7925Vr15dknTgwAGHNjabLeeqAwAAAAAANy1L4b9s2bI6fvy4Vq5cKUnq1KmT3nvvPQUFBd2S4gAAAAAAwM3L0jn/xhiH54sXL9a5c+dytCAAAAAAAJCzsnXBvzRXfxkAAAAAAADyniyFf5vNlu6cfs7xBwAAAAAgb8vSOf/GGEVFRcnDw0OSdOHCBfXs2TPd1f7nzZuXcxUCAAAAAICbkqXw37VrV4fnTzzxRI4WAwAAAAAAcl6Wwv+MGTNuVR0AAAAAAOAWuakL/gEAAAAAgLyP8A8AAAAAgMUR/gEAAAAAsLgsnfMPAFYVPXNzrvY3PapWrvYHAACAOxsj/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAi7utwv9bb70lm82mvn372pdduHBBvXr1UsGCBeXr66sOHTroxIkTDq87fPiwWrVqJW9vbxUpUkQDBw7U5cuXHdr8/PPPql69ujw8PFSmTBnNnDkzF/YIAAAAAIBb77YJ/5s3b9aHH36oKlWqOCzv16+f/ve//2nu3LlatWqVjh07pvbt29vXp6SkqFWrVkpOTtb69ev16aefaubMmRo2bJi9TUxMjFq1aqXGjRtr+/bt6tu3r55++mktXbo01/YPAAAAAIBb5bYI/0lJSYqMjNRHH32k/Pnz25cnJCRo+vTpGj9+vB544AHVqFFDM2bM0Pr16/XLL79Ikn788Uft3btXX3zxhapVq6YHH3xQI0eO1AcffKDk5GRJ0tSpUxUWFqZ33nlH5cuXV+/evdWxY0e9++67TtlfAAAAAABy0m0R/nv16qVWrVqpadOmDsu3bt2qS5cuOSwvV66cSpQooQ0bNkiSNmzYoMqVKysoKMjeJiIiQomJidqzZ4+9zdXbjoiIsG8jIxcvXlRiYqLDAwAAAACAvMjV2QXcyFdffaVff/1VmzdvTrcuNjZW7u7uCgwMdFgeFBSk2NhYe5srg3/a+rR112uTmJiof//9V15eXun6Hj16tEaMGJHt/QIAAAAAILfk6ZH/v//+W3369NGsWbPk6enp7HIcDBkyRAkJCfbH33//7eySAAAAAADIUJ4e+d+6davi4uJUvXp1+7KUlBStXr1akyZN0tKlS5WcnKz4+HiH0f8TJ04oODhYkhQcHKxNmzY5bDftbgBXtrn6DgEnTpyQv79/hqP+kuTh4SEPD4+b3kcAd6bomelnM91K06Nq5Wp/AAAAyFvy9Mh/kyZNtGvXLm3fvt3+qFmzpiIjI+3/dnNz0/Lly+2v2b9/vw4fPqzw8HBJUnh4uHbt2qW4uDh7m2XLlsnf318VKlSwt7lyG2lt0rYBAAAAAMDtLE+P/Pv5+alSpUoOy3x8fFSwYEH78ujoaPXv318FChSQv7+/nn/+eYWHh+u+++6TJDVv3lwVKlTQk08+qbFjxyo2NlZDhw5Vr1697CP3PXv21KRJkzRo0CB169ZNK1as0Jw5c7Ro0aLc3WEAAAAAAG6BPB3+M+Pdd9+Vi4uLOnTooIsXLyoiIkKTJ0+2r8+XL58WLlyoZ599VuHh4fLx8VHXrl31+uuv29uEhYVp0aJF6tevnyZOnKhixYrp448/VkREhDN2CQAAAACAHGUzxhhnF2EFiYmJCggIUEJCgvz9/Z1dTp6T2+c3A3DEOf8AAADWk5UcmqfP+QcAAAAAADeP8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMW5OrsAAMCtFz1zc672Nz2qVq72BwAAgOtj5B8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALM7V2QUAAKwneubmXO1velStXO0PAADgdsPIPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLc3V2AQAA3KzomZtzvc/pUbVyvU8AAIDsYuQfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsLg8Hf5Hjx6tWrVqyc/PT0WKFFHbtm21f/9+hzYXLlxQr169VLBgQfn6+qpDhw46ceKEQ5vDhw+rVatW8vb2VpEiRTRw4EBdvnzZoc3PP/+s6tWry8PDQ2XKlNHMmTNv9e4BAAAAAJArXJ1dwPWsWrVKvXr1Uq1atXT58mW9/PLLat68ufbu3SsfHx9JUr9+/bRo0SLNnTtXAQEB6t27t9q3b69169ZJklJSUtSqVSsFBwdr/fr1On78uLp06SI3NzeNGjVKkhQTE6NWrVqpZ8+emjVrlpYvX66nn35aISEhioiIcNr+AwDyruiZm3O1v+lRtXK1PwAAYC02Y4xxdhGZdfLkSRUpUkSrVq1Sw4YNlZCQoMKFC2v27Nnq2LGjJOm3335T+fLltWHDBt13331avHixHnroIR07dkxBQUGSpKlTp+qll17SyZMn5e7urpdeekmLFi3S7t277X117txZ8fHxWrJkSYa1XLx4URcvXrQ/T0xMVPHixZWQkCB/f/9b+C7cnnL7j2QAsBrCPwAAuFpiYqICAgIylUPz9LT/qyUkJEiSChQoIEnaunWrLl26pKZNm9rblCtXTiVKlNCGDRskSRs2bFDlypXtwV+SIiIilJiYqD179tjbXLmNtDZp28jI6NGjFRAQYH8UL148Z3YSAAAAAIAcdtuE/9TUVPXt21f16tVTpUqVJEmxsbFyd3dXYGCgQ9ugoCDFxsba21wZ/NPWp627XpvExET9+++/GdYzZMgQJSQk2B9///33Te8jAAAAAAC3Qp4+5/9KvXr10u7du7V27VpnlyJJ8vDwkIeHh7PLAAAAAADghm6Lkf/evXtr4cKFWrlypYoVK2ZfHhwcrOTkZMXHxzu0P3HihIKDg+1trr76f9rzG7Xx9/eXl5dXTu8OAAAAAAC5Kk+Hf2OMevfure+++04rVqxQWFiYw/oaNWrIzc1Ny5cvty/bv3+/Dh8+rPDwcElSeHi4du3apbi4OHubZcuWyd/fXxUqVLC3uXIbaW3StgEAAAAAwO0sT0/779Wrl2bPnq3vv/9efn5+9nP0AwIC5OXlpYCAAEVHR6t///4qUKCA/P399fzzzys8PFz33XefJKl58+aqUKGCnnzySY0dO1axsbEaOnSoevXqZZ+237NnT02aNEmDBg1St27dtGLFCs2ZM0eLFi1y2r4DAAAAAJBT8vTI/5QpU5SQkKD7779fISEh9sfXX39tb/Puu+/qoYceUocOHdSwYUMFBwdr3rx59vX58uXTwoULlS9fPoWHh+uJJ55Qly5d9Prrr9vbhIWFadGiRVq2bJmqVq2qd955Rx9//LEiIiJydX8BAAAAALgVbMYY4+wirCAr91e8E0XP3OzsEgDgtjY9qpazSwAAAHlMVnJonh75BwAAAAAAN4/wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGuzi4AuS965mZnlwAAyKLc/uyeHlUrV/sDAAC3FiP/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4lydXQAAAMh7omduztX+pkfVytX+AAC40zDyDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWxzn/sKTki8/man/uHlNytT8AAAAAyApG/gEAAAAAsDhG/gEAgNNxdwEAAG4tRv4BAAAAALA4Rv6RKw5oYq72VypXewMAAACAvI2RfwAAAAAALI7wDwAAAACAxTHtH7mi1MXfnF0CAAAAANyxGPkHAAAAAMDiCP8AAAAAAFgc0/6BHJB88dlc7c/dY0qu9gcAAADg9sbIPwAAAAAAFsfIPwAAuONEz9yc631Oj6qV630CAJCGkX8AAAAAACyO8A8AAAAAgMUx7R+4DT1/Ymiu9vd+0Bu52h8AAACAnEX4B25D7wSeytX+3HO1NwAAAAA5jWn/AAAAAABYHOEfAAAAAACLY9o/AABALsjt2wtya0EAwJUY+QcAAAAAwOIY+QdwQ9xdAAAAALi9Ef4B3FBu313gL03U3eqTq30CAAAAVsa0fwAAAAAALI6RfwAAAAviAoMAgCsx8g8AAAAAgMUx8g8gTzqgibnaH9cYAAAAgJUR/gHkOaUu/pb7nXrkfpcAYCWcZgAAeRvT/gEAAAAAsDhG/gEAAHDbYaYBAGQNI/8AAAAAAFgcI/8AIC4wCAAAAGsj/AOAnHCRQS4wCAAAgFzEtH8AAAAAACyOkX8AAADgBrjAIIDbHeEfAJwg+eKzudqfu8eUXO0PAAAAeQvT/gEAAAAAsDhG/gHgDsDdDAAAN8KpDYC1Ef4B4A7A3QwA4PaS20EcgPUR/gEAAADkOmYaALmL8A8AyHHPnxiaq/29H/RGrvYHAABwuyH8AwBy3DuBp3K1vxf5sgEAAOC6CP8AAAAALI/TDHCnI/wDAG57uT3T4C9N5I4GAADgtkL4BwAgi0pd/E3Px+feqQacZgAAt5874Y4NzG64vRD+AQDIhtycbcBMAwBAXsSpFLcXwv8d6IAm5nqfpXK9RwCwltz+7ObLBgAArIXwfwcqdfE3Z5cAAMgCZ3xuH/DgywYAQN7CTIObQ/gHAADp5PYXDnzZAADArUX4BwAATpfrsxs8crc7AACcjfAPAADuOMkXn3V2Cbfci/EFc60v7kgBAHkf4f8qH3zwgcaNG6fY2FhVrVpV77//vmrXru3ssgAAALIkN+9IoYvP5uqXDRJfOABAVhH+r/D111+rf//+mjp1qurUqaMJEyYoIiJC+/fvV5EiRZxdHgAAQJ6Vq182SJLFZ2/85VEu1/vkWhiAtdmMMcbZReQVderUUa1atTRp0iRJUmpqqooXL67nn39egwcPvu5rExMTFRAQoISEBPn7++dGudn25Ic1nV0CAAAAkKty+1SY3L5F6/snTuZqf1Luf+mX2z/D2+Fq/1nJoYz8/5/k5GRt3bpVQ4YMsS9zcXFR06ZNtWHDhnTtL168qIsXL9qfJyQkSPrvzc/rkv9NcXYJAAAAQK4a7RGXe53Fd1do7vUmSRrtjAuZ/pu73eXmz/Dwv28rMfGjXOsvu9LyZ2bG9An//+eff/5RSkqKgoKCHJYHBQXpt9/SX4F49OjRGjFiRLrlxYsXv2U1AgAAAAByw3YFPDfH2UVk2tmzZxUQEHDdNoT/bBoyZIj69+9vf56amqrTp0+rYMGCstlsTqkpMTFRxYsX199//53nTz3AnYVjE3kVxybyKo5N5GUcn8ir7sRj0xijs2fPKjT0xnNNCP//p1ChQsqXL59OnDjhsPzEiRMKDg5O197Dw0MeHo5zawIDA29liZnm7+9/xxzsuL1wbCKv4thEXsWxibyM4xN51Z12bN5oxD+Nyy2u47bh7u6uGjVqaPny5fZlqampWr58ucLDw51YGQAAAAAAN4eR/yv0799fXbt2Vc2aNVW7dm1NmDBB586d01NPPeXs0gAAAAAAyDbC/xU6deqkkydPatiwYYqNjVW1atW0ZMmSdBcBzKs8PDz02muvpTsdAXA2jk3kVRybyKs4NpGXcXwir+LYvD6bycw9AQAAAAAAwG2Lc/4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzh30I++OADlSpVSp6enqpTp442bdrk7JJgYaNHj1atWrXk5+enIkWKqG3bttq/f79DmwsXLqhXr14qWLCgfH191aFDB504ccKhzeHDh9WqVSt5e3urSJEiGjhwoC5fvpybuwKLe+utt2Sz2dS3b1/7Mo5NOMvRo0f1xBNPqGDBgvLy8lLlypW1ZcsW+3pjjIYNG6aQkBB5eXmpadOmOnjwoMM2Tp8+rcjISPn7+yswMFDR0dFKSkrK7V2BhaSkpOjVV19VWFiYvLy8VLp0aY0cOVJXXhecYxO5ZfXq1WrdurVCQ0Nls9k0f/58h/U5dSzu3LlTDRo0kKenp4oXL66xY8fe6l1zOsK/RXz99dfq37+/XnvtNf3666+qWrWqIiIiFBcX5+zSYFGrVq1Sr1699Msvv2jZsmW6dOmSmjdvrnPnztnb9OvXT//73/80d+5crVq1SseOHVP79u3t61NSUtSqVSslJydr/fr1+vTTTzVz5kwNGzbMGbsEC9q8ebM+/PBDValSxWE5xyac4cyZM6pXr57c3Ny0ePFi7d27V++8847y589vbzN27Fi99957mjp1qjZu3CgfHx9FRETowoUL9jaRkZHas2ePli1bpoULF2r16tXq3r27M3YJFjFmzBhNmTJFkyZN0r59+zRmzBiNHTtW77//vr0NxyZyy7lz51S1alV98MEHGa7PiWMxMTFRzZs3V8mSJbV161aNGzdOw4cP17Rp0275/jmVgSXUrl3b9OrVy/48JSXFhIaGmtGjRzuxKtxJ4uLijCSzatUqY4wx8fHxxs3NzcydO9feZt++fUaS2bBhgzHGmB9++MG4uLiY2NhYe5spU6YYf39/c/HixdzdAVjO2bNnTdmyZc2yZctMo0aNTJ8+fYwxHJtwnpdeesnUr1//mutTU1NNcHCwGTdunH1ZfHy88fDwMF9++aUxxpi9e/caSWbz5s32NosXLzY2m80cPXr01hUPS2vVqpXp1q2bw7L27dubyMhIYwzHJpxHkvnuu+/sz3PqWJw8ebLJnz+/w//pL730krnnnntu8R45FyP/FpCcnKytW7eqadOm9mUuLi5q2rSpNmzY4MTKcCdJSEiQJBUoUECStHXrVl26dMnhuCxXrpxKlChhPy43bNigypUrKygoyN4mIiJCiYmJ2rNnTy5WDyvq1auXWrVq5XAMShybcJ4FCxaoZs2aeuSRR1SkSBHde++9+uijj+zrY2JiFBsb63BsBgQEqE6dOg7HZmBgoGrWrGlv07RpU7m4uGjjxo25tzOwlLp162r58uU6cOCAJGnHjh1au3atHnzwQUkcm8g7cupY3LBhgxo2bCh3d3d7m4iICO3fv19nzpzJpb3Jfa7OLgA3759//lFKSorDH6mSFBQUpN9++81JVeFOkpqaqr59+6pevXqqVKmSJCk2Nlbu7u4KDAx0aBsUFKTY2Fh7m4yO27R1QHZ99dVX+vXXX7V58+Z06zg24Sx//vmnpkyZov79++vll1/W5s2b9cILL8jd3V1du3a1H1sZHXtXHptFihRxWO/q6qoCBQpwbCLbBg8erMTERJUrV0758uVTSkqK3nzzTUVGRkoSxybyjJw6FmNjYxUWFpZuG2nrrjwdy0oI/wBuWq9evbR7926tXbvW2aUA+vvvv9WnTx8tW7ZMnp6ezi4HsEtNTVXNmjU1atQoSdK9996r3bt3a+rUqeratauTq8OdbM6cOZo1a5Zmz56tihUravv27erbt69CQ0M5NgELYdq/BRQqVEj58uVLd6XqEydOKDg42ElV4U7Ru3dvLVy4UCtXrlSxYsXsy4ODg5WcnKz4+HiH9lcel8HBwRket2nrgOzYunWr4uLiVL16dbm6usrV1VWrVq3Se++9J1dXVwUFBXFswilCQkJUoUIFh2Xly5fX4cOHJf3/Y+t6/58HBwenu5jv5cuXdfr0aY5NZNvAgQM1ePBgde7cWZUrV9aTTz6pfv36afTo0ZI4NpF35NSxeKf+P0/4twB3d3fVqFFDy5cvty9LTU3V8uXLFR4e7sTKYGXGGPXu3VvfffedVqxYkW7qVI0aNeTm5uZwXO7fv1+HDx+2H5fh4eHatWuXwwf0smXL5O/vn+4PZCCzmjRpol27dmn79u32R82aNRUZGWn/N8cmnKFevXrpbol64MABlSxZUpIUFham4OBgh2MzMTFRGzdudDg24+PjtXXrVnubFStWKDU1VXXq1MmFvYAVnT9/Xi4ujrEgX758Sk1NlcSxibwjp47F8PBwrV69WpcuXbK3WbZsme655x7LTvmXxNX+reKrr74yHh4eZubMmWbv3r2me/fuJjAw0OFK1UBOevbZZ01AQID5+eefzfHjx+2P8+fP29v07NnTlChRwqxYscJs2bLFhIeHm/DwcPv6y5cvm0qVKpnmzZub7du3myVLlpjChQubIUOGOGOXYGFXXu3fGI5NOMemTZuMq6urefPNN83BgwfNrFmzjLe3t/niiy/sbd566y0TGBhovv/+e7Nz507Tpk0bExYWZv799197mxYtWph7773XbNy40axdu9aULVvWPPbYY87YJVhE165dTdGiRc3ChQtNTEyMmTdvnilUqJAZNGiQvQ3HJnLL2bNnzbZt28y2bduMJDN+/Hizbds2c+jQIWNMzhyL8fHxJigoyDz55JNm9+7d5quvvjLe3t7mww8/zPX9zU2Efwt5//33TYkSJYy7u7upXbu2+eWXX5xdEixMUoaPGTNm2Nv8+++/5rnnnjP58+c33t7epl27dub48eMO2/nrr7/Mgw8+aLy8vEyhQoXMiy++aC5dupTLewOruzr8c2zCWf73v/+ZSpUqGQ8PD1OuXDkzbdo0h/Wpqanm1VdfNUFBQcbDw8M0adLE7N+/36HNqVOnzGOPPWZ8fX2Nv7+/eeqpp8zZs2dzczdgMYmJiaZPnz6mRIkSxtPT09x1113mlVdecbgNGscmcsvKlSsz/Buza9euxpicOxZ37Nhh6tevbzw8PEzRokXNW2+9lVu76DQ2Y4xxzpwDAAAAAACQGzjnHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAA4Baw2WyaP3++s8sAAEAS4R8AgDzr5MmTevbZZ1WiRAl5eHgoODhYERERWrdunbNLyzPyQsAePny4qlWr5tQaAAC4EVdnFwAAADLWoUMHJScn69NPP9Vdd92lEydOaPny5Tp16pSzSwMAALcZRv4BAMiD4uPjtWbNGo0ZM0aNGzdWyZIlVbt2bQ0ZMkQPP/ywQ7unn35ahQsXlr+/vx544AHt2LHDYVtvvfWWgoKC5Ofnp+joaA0ePNhhpPr+++9X3759HV7Ttm1bRUVF2Z9fvHhRAwYMUNGiReXj46M6dero559/tq+fOXOmAgMDtXTpUpUvX16+vr5q0aKFjh8/7rDdTz75RBUrVpSHh4dCQkLUu3fvLO1LVn388ccqX768PD09Va5cOU2ePNm+7q+//pLNZtO8efPUuHFjeXt7q2rVqtqwYYPDNj766CMVL15c3t7eateuncaPH6/AwED7fo8YMUI7duyQzWaTzWbTzJkz7a/9559/1K5dO3l7e6ts2bJasGDBTe0PAADZRfgHACAP8vX1la+vr+bPn6+LFy9es90jjzyiuLg4LV68WFu3blX16tXVpEkTnT59WpI0Z84cDR8+XKNGjdKWLVsUEhLiEIAzq3fv3tqwYYO++uor7dy5U4888ohatGihgwcP2tucP39eb7/9tj7//HOtXr1ahw8f1oABA+zrp0yZol69eql79+7atWuXFixYoDJlymR6X7Jq1qxZGjZsmN58803t27dPo0aN0quvvqpPP/3Uod0rr7yiAQMGaPv27br77rv12GOP6fLly5KkdevWqWfPnurTp4+2b9+uZs2a6c0337S/tlOnTnrxxRdVsWJFHT9+XMePH1enTp3s60eMGKFHH31UO3fuVMuWLRUZGZnt/QEA4KYYAACQJ33zzTcmf/78xtPT09StW9cMGTLE7Nixw75+zZo1xt/f31y4cMHhdaVLlzYffvihMcaY8PBw89xzzzmsr1Onjqlatar9eaNGjUyfPn0c2rRp08Z07drVGGPMoUOHTL58+czRo0cd2jRp0sQMGTLEGGPMjBkzjCTz+++/29d/8MEHJigoyP48NDTUvPLKKxnua2b2JSOSzHfffZfhutKlS5vZs2c7LBs5cqQJDw83xhgTExNjJJmPP/7Yvn7Pnj1Gktm3b58xxphOnTqZVq1aOWwjMjLSBAQE2J+/9tprDu/nlbUNHTrU/jwpKclIMosXL77m/gAAcKsw8g8AQB7VoUMHHTt2TAsWLFCLFi30888/q3r16vZp5Tt27FBSUpIKFixonyng6+urmJgY/fHHH5Kkffv2qU6dOg7bDQ8Pz1Idu3btUkpKiu6++26HflatWmXvR5K8vb1VunRp+/OQkBDFxcVJkuLi4nTs2DE1adIkwz4ysy9Zce7cOf3xxx+Kjo522N4bb7yRbntVqlRxqDmtXknav3+/ateu7dD+6ufXc+W2fXx85O/vb982AAC5iQv+AQCQh3l6eqpZs2Zq1qyZXn31VT399NN67bXXFBUVpaSkJIWEhDice58m7Zz0zHBxcZExxmHZpUuX7P9OSkpSvnz5tHXrVuXLl8+hna+vr/3fbm5uDutsNpt9u15eXtetIaf25crtSf+dr3/1lx9X78OVddtsNklSampqlvvMSEbvSU5tGwCArCD8AwBwG6lQoYL91nbVq1dXbGysXF1dVapUqQzbly9fXhs3blSXLl3sy3755ReHNoULF3a4MF9KSop2796txo0bS5LuvfdepaSkKC4uTg0aNMhW3X5+fipVqpSWL19u3+6VMrMvWREUFKTQ0FD9+eefioyMzPZ27rnnHm3evNlh2dXP3d3dlZKSku0+AADIDYR/AADyoFOnTumRRx5Rt27dVKVKFfn5+WnLli0aO3as2rRpI0lq2rSpwsPD1bZtW40dO1Z33323jh07pkWLFqldu3aqWbOm+vTpo6ioKNWsWVP16tXTrFmztGfPHt111132vh544AH1799fixYtUunSpTV+/HjFx8fb1999992KjIxUly5d9M477+jee+/VyZMntXz5clWpUkWtWrXK1D4NHz5cPXv2VJEiRfTggw/q7NmzWrdunZ5//vlM7cu1xMTEaPv27Q7LypYtqxEjRuiFF15QQECAWrRooYsXL2rLli06c+aM+vfvn6man3/+eTVs2FDjx49X69attWLFCi1evNg+Q0CSSpUqZa+hWLFi8vPzk4eHR6a2DwBAbiH8AwCQB/n6+qpOnTp699139ccff+jSpUsqXry4nnnmGb388suS/ptC/sMPP+iVV17RU089pZMnTyo4OFgNGzZUUFCQpP+uRv/HH39o0KBBunDhgjp06KBnn31WS5cutffVrVs37dixQ126dJGrq6v69euXbnR+xowZeuONN/Tiiy/q6NGjKlSokO677z499NBDmd6nrl276sKFC3r33Xc1YMAAFSpUSB07dsz0vlxLRkF+zZo1evrpp+Xt7a1x48Zp4MCB8vHxUeXKldPd1vB66tWrp6lTp2rEiBEaOnSoIiIi1K9fP02aNMnepkOHDvbbBcbHx2vGjBkOt0kEACAvsJmrT/IDAACWNnz4cM2fPz/daDky55lnntFvv/2mNWvWOLsUAAAyjZF/AACA63j77bfVrFkz+fj4aPHixfr00081efJkZ5cFAECWEP4BAACuY9OmTRo7dqzOnj2ru+66S++9956efvppZ5cFAECWMO0fAAAAAACLc3F2AQAAAAAA4NYi/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFjc/wPbw9VywKTWYgAAAABJRU5ErkJggg==",
|
| 118 |
+
"text/plain": [
|
| 119 |
+
"<Figure size 1200x600 with 1 Axes>"
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"output_type": "display_data"
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"source": [
|
| 127 |
+
"# 2. Sequence lengths histogram\n",
|
| 128 |
+
"plt.figure(figsize=(12, 6))\n",
|
| 129 |
+
"for name, df in datasets.items():\n",
|
| 130 |
+
" plt.hist(df['Length'], bins=30, alpha=0.7, label=name, density=True)\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"plt.title('UniRef50 (100k Sequences) - Train, Test, and Val Sequence Lengths')\n",
|
| 133 |
+
"plt.xlabel('Sequence Length')\n",
|
| 134 |
+
"plt.ylabel('Frequency')\n",
|
| 135 |
+
"plt.legend()\n",
|
| 136 |
+
"plt.show()"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"cell_type": "code",
|
| 141 |
+
"execution_count": null,
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [],
|
| 144 |
+
"source": []
|
| 145 |
+
}
|
| 146 |
+
],
|
| 147 |
+
"metadata": {
|
| 148 |
+
"kernelspec": {
|
| 149 |
+
"display_name": "Python 3",
|
| 150 |
+
"language": "python",
|
| 151 |
+
"name": "python3"
|
| 152 |
+
},
|
| 153 |
+
"language_info": {
|
| 154 |
+
"codemirror_mode": {
|
| 155 |
+
"name": "ipython",
|
| 156 |
+
"version": 3
|
| 157 |
+
},
|
| 158 |
+
"file_extension": ".py",
|
| 159 |
+
"mimetype": "text/x-python",
|
| 160 |
+
"name": "python",
|
| 161 |
+
"nbconvert_exporter": "python",
|
| 162 |
+
"pygments_lexer": "ipython3",
|
| 163 |
+
"version": "3.10.12"
|
| 164 |
+
}
|
| 165 |
+
},
|
| 166 |
+
"nbformat": 4,
|
| 167 |
+
"nbformat_minor": 2
|
| 168 |
+
}
|
data/uniref/100k_seqs/test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/uniref/100k_seqs/train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:baa07e6863c4d4b3fdc707b539d9520d66fc8d52be68c9d1c444fa96abc3b77f
|
| 3 |
+
size 20059182
|
data/uniref/100k_seqs/val.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/uniref/200k_seqs/check_data.ipynb
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import pandas as pd\n",
|
| 10 |
+
"import seaborn as sns\n",
|
| 11 |
+
"import matplotlib.pyplot as plt\n",
|
| 12 |
+
"import numpy as np"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": 2,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [],
|
| 20 |
+
"source": [
|
| 21 |
+
"path = \"/home/sg666/MDpLM/data/uniref50/200k_seqs\""
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 3,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"train = pd.read_csv(path + \"/train.csv\")\n",
|
| 31 |
+
"test = pd.read_csv(path + \"/test.csv\")\n",
|
| 32 |
+
"val = pd.read_csv(path + '/val.csv')"
|
| 33 |
+
]
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"cell_type": "code",
|
| 37 |
+
"execution_count": 4,
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"outputs": [],
|
| 40 |
+
"source": [
|
| 41 |
+
"valid_residues = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V']\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"for df in [train, test, val]:\n",
|
| 44 |
+
" df['Length'] = df['Sequence'].str.len()\n",
|
| 45 |
+
"\n",
|
| 46 |
+
" for residue in valid_residues:\n",
|
| 47 |
+
" df[residue] = 0\n",
|
| 48 |
+
"\n",
|
| 49 |
+
" for idx, row in df.iterrows():\n",
|
| 50 |
+
" sequence = row['Sequence']\n",
|
| 51 |
+
"\n",
|
| 52 |
+
" for residue in valid_residues:\n",
|
| 53 |
+
" df.at[idx, residue] = sequence.count(residue)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": 5,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [
|
| 61 |
+
{
|
| 62 |
+
"data": {
|
| 63 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/IAAAIjCAYAAACgdyAGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABru0lEQVR4nO3deZxO9f//8ec1+z62YewzhqyDEFkyxjaTIWRLZDCViiIhfGRJWSOy5pMlldJE8iWEkFCyZCkh2RlLGCGGmfP7w2+uj8tcsxgzc83hcb/drlvN+7yvc17nXGeOeV7nfc6xGIZhCAAAAAAAmIKTowsAAAAAAAAZR5AHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHcM+6du2qoKCgbF/OrVu3NGDAABUvXlxOTk5q1apVti/zbklJSapUqZLefffdHF92aoKCgtS8eXNHl4E0/P7773JxcdHevXsdXUqmDB8+XBaLxdFlPLQaNGigBg0a5Phyc+rYnllBQUHq2rVruv3mzZsni8WiI0eOZHtNuD8Z/UwBpESQBx5QyX+Inz9/3u70SpUqZekfihaLxebl5+ensLAwLV++PNPznDNnjsaPH6+2bdvq448/1uuvvy7p9j/8dy/PYrHopZdeSjGPS5cu6cUXX1RAQIC8vb0VHh6uHTt2ZLiGzz//XMePH1evXr2sbb/88ot69eqlihUrytvbWyVKlFD79u114MABu/PYt2+fIiMj5ePjo3z58um5557TuXPnUvRLSkrSuHHjFBwcLA8PD1WuXFmff/55hmvNiKSkJM2fP1+1atVSvnz55Ovrq0ceeURdunTRTz/9lKXLephVqFBBUVFRGjp0aJbON7V9/+7XvHnzsnS5WS05aKX3yqpQuXnzZg0fPlyXLl3Kkvk52o4dO2SxWDRkyJBU+xw8eFAWi0V9+/bN8uU3aNDA5nPy9PRU5cqVNWnSJCUlJWX58szk7m1z5+uPP/5wdHkAHiAuji4AgPn897//tfvHWpMmTdSlSxcZhqGjR49qxowZatGihVasWKGIiIh7Xs7333+vokWL6v33308xrWrVqnrjjTds2h555BGbn5OSkhQVFaVdu3apf//+KlCggKZPn64GDRpo+/btKlOmTLo1jB8/Xs8884z8/f2tbWPHjtWmTZvUrl07Va5cWXFxcZo6daqqVaumn376SZUqVbL2PXHihOrXry9/f3+NGjVKV65c0Xvvvac9e/Zo69atcnNzs/b9z3/+ozFjxuiFF17QY489pm+++UbPPvusLBaLnnnmmQxvt7S89tprmjZtmlq2bKlOnTrJxcVF+/fv14oVK1SqVCk9/vjjWbIcSC+99JKaNWumQ4cOKSQkJEvmOWnSJF25csX687fffqvPP/9c77//vgoUKGBtr1Onzn0tZ8iQIRo4cOB9zSMt9evX1yeffGLT9vzzz6tmzZp68cUXrW0+Pj5ZsrzNmzdrxIgR6tq1q/LkyZMl83SkatWqqVy5cvr888/1zjvv2O2zYMECSVLnzp2zpYZixYpp9OjRkqTz589rwYIFev3113Xu3LlsG8G0f/9+OTnl/nNQd26bOxUpUsQB1eRuZvlMgVzJAPBAGjZsmCHJOHfunN3pFStWNMLCwrJseZKMnj172rT9/vvvhiTjySefzNQ8w8PDjYoVK6ZoL1mypBEVFZXu+xcuXGhIMmJjY61tZ8+eNfLkyWN07Ngx3ffv2LHDkGSsWbPGpn3Tpk3GjRs3bNoOHDhguLu7G506dbJpf/nllw1PT0/j6NGj1rbVq1cbkowPP/zQ2nbixAnD1dXVZhsmJSUZTzzxhFGsWDHj1q1b1vaMrv/d4uLiDIvFYrzwwgsppiUlJRlnzpy553kidQkJCUbevHmNt956K9uWMX78eEOScfjw4TT7XblyJdtqyCre3t5GdHR0tsw7o9sptwgLC0v3+Dxy5EhDkrFlyxa708uWLWuUK1funpYbHR1tlCxZMkP13X1s/vfff42SJUsavr6+NscrR5g7d67DPm972yY9Zvj9BJD78BUYAEnS+vXrZbFY9OWXX+rdd99VsWLF5OHhoUaNGunPP/+06ZvR6yjLly+vAgUK6NChQzbtN27c0LBhw1S6dGm5u7urePHiGjBggG7cuCFJOnLkiCwWi9atW6fffvvNOixx/fr1NvNJSEjQ1atXU13+V199pUKFCunpp5+2tgUEBKh9+/b65ptvrMtLzZIlS+Tm5qb69evbtNepU8fmTLoklSlTRhUrVtS+ffts2hctWqTmzZurRIkS1rbGjRvrkUce0Zdffmlt++abb3Tz5k298sor1jaLxaKXX35ZJ06c0JYtW9Ks9eOPP5aLi4v69++fap/Dhw/LMAzVrVs3xTSLxaKCBQvatF26dEl9+vRR8eLF5e7urtKlS2vs2LEpRmNcunRJXbt2lb+/v/LkyaPo6Gj9+uuvKYZ4p3bdr739KSkpSZMmTVLFihXl4eGhQoUKqUePHrp48aJNv+T7Bfz444+qWbOmPDw8VKpUKc2fPz/Fci5duqTXX39dQUFBcnd3V7FixdSlSxeby0/S2zeTrV69WvXq1VOePHnk4+OjsmXLavDgwTZ9XF1d1aBBA33zzTcpaslOXbt2lY+Pjw4dOqRmzZrJ19dXnTp1kiRt3LhR7dq1U4kSJazr9/rrr+vff/+1mYe9a+QtFot69eqlJUuWqFKlSnJ3d1fFihW1cuXKbFuXkydPqnv37ipUqJB1eXPmzEnRb8qUKapYsaK8vLyUN29e1ahRw3pGevjw4dbfi+DgYOvx5F6vn75w4YL69eun0NBQ+fj4yM/PT08++aR27dpl0+9ejqWSNGvWLIWEhMjT01M1a9bUxo0bM1RP8meavJ532r59u/bv32/t88033ygqKkpFihSRu7u7QkJCNHLkSCUmJt7TNkiLh4eHHnvsMf3zzz86e/aszbRPP/1U1atXl6enp/Lly6dnnnlGx48ft+lz8OBBtWnTRoGBgfLw8FCxYsX0zDPPKD4+3trH3vXUv/32mxo2bChPT08VK1ZM77zzjt0RYxaLRcOHD0/Rbm+eGT32ZUZav58ZPe4ZhqF33nlHxYoVk5eXl8LDw/Xbb7+lWJfU7nWR2j0EVqxYoSeeeELe3t7y9fVVVFSUfvvtN7v1nzx5Uq1atZKPj48CAgLUr1+/FPtTUlKSJk+erNDQUHl4eCggIECRkZHatm2btc/9bP8vvvhC1atXl6+vr/z8/BQaGqrJkyenuf2BBwlD6wHYGDNmjJycnNSvXz/Fx8dr3Lhx6tSpk37++ed7nld8fLwuXrxoM6w4KSlJTz31lH788Ue9+OKLKl++vPbs2aP3339fBw4c0JIlSxQQEKBPPvlE7777rq5cuWIdoli+fHnrfL7//nt5eXkpMTFRJUuW1Ouvv67evXvbLH/nzp2qVq1aimF7NWvW1KxZs3TgwAGFhoamWv/mzZtVqVIlubq6pruuhmHozJkzqlixorXt5MmTOnv2rGrUqJGif82aNfXtt9/a1Ort7W2zjsn9kqfXq1fP7rJnzZqll156SYMHD051mK0klSxZUpIUGxurdu3aycvLK9W+165dU1hYmE6ePKkePXqoRIkS2rx5swYNGqTTp09r0qRJ1vVu2bKlfvzxR7300ksqX768vv76a0VHR6c674zo0aOH5s2bp27duum1117T4cOHNXXqVO3cuVObNm2y+Uz+/PNPtW3bVjExMYqOjtacOXPUtWtXVa9e3fp5XLlyRU888YT27dun7t27q1q1ajp//ryWLl2qEydOqECBAhnaN6XbwaF58+aqXLmy3n77bbm7u+vPP//Upk2bUqxH9erV9c033+jy5cvy8/O7r21yL27duqWIiAjVq1dP7733nvWzjo2N1bVr1/Tyyy8rf/782rp1q6ZMmaITJ04oNjY23fn++OOPWrx4sV555RX5+vrqgw8+UJs2bXTs2DHlz58/S9fhzJkzevzxx61fIAQEBGjFihWKiYnR5cuX1adPH0m3L/V57bXX1LZtW/Xu3VvXr1/X7t279fPPP+vZZ5/V008/rQMHDqS4BCEgIOCe6vnrr7+0ZMkStWvXTsHBwTpz5ow+/PBDhYWF6ffff08xbDojx9LZs2erR48eqlOnjvr06aO//vpLTz31lPLly6fixYunWU9wcLDq1KmjL7/8Uu+//76cnZ2t05LD/bPPPivpdnDz8fFR37595ePjo++//15Dhw7V5cuXNX78+HvaDmlJ/hL2zssX3n33Xb311ltq3769nn/+eZ07d05TpkxR/fr1tXPnTuXJk0cJCQmKiIjQjRs39OqrryowMFAnT57UsmXLdOnSJZtLm+4UFxen8PBw3bp1SwMHDpS3t7dmzZolT0/PTK9DRo99aUlMTExxfxoPDw/rpSKp/X5m9Lg3dOhQvfPOO2rWrJmaNWumHTt2qGnTpkpISMj0en/yySeKjo5WRESExo4dq2vXrmnGjBmqV6+edu7cafNla2JioiIiIlSrVi299957WrNmjSZMmKCQkBC9/PLL1n4xMTGaN2+ennzyST3//PO6deuWNm7cqJ9++snuv4tSxrf/6tWr1bFjRzVq1Ehjx46VdPt+NJs2bUrxtwDwwHLsgAAA2eVeh9avW7fOkGSUL1/eZtj45MmTDUnGnj17rG32hl9KMmJiYoxz584ZZ8+eNbZt22ZERkYakozx48db+33yySeGk5OTsXHjRpv3z5w505BkbNq0ydqW2hDFFi1aGGPHjjWWLFlizJ4923jiiScMScaAAQNs+nl7exvdu3dP8f7ly5cbkoyVK1fa3TbJihUrZrRp0ybNPneulyRj9uzZ1rZffvnFkGTMnz8/Rf/+/fsbkozr168bhmEYUVFRRqlSpVL0u3r1qiHJGDhwoLXtzqH1kydPNiwWizFy5MgM1dmlSxdDkpE3b16jdevWxnvvvWfs27cvRb+RI0ca3t7exoEDB2zaBw4caDg7OxvHjh0zDMMwlixZYkgyxo0bZ+1z69Yt62cyd+5ca3tqw4Xv3p82btxoSDI+++wzm34rV65M0V6yZElDkvHDDz9Y286ePWu4u7sbb7zxhrVt6NChhiRj8eLFKZaflJRkGEbG9833338/zd+tOy1YsMCQZPz888/p9s0Me0PGo6OjU+wzya5du5aibfTo0YbFYrG5/CP5+HEnSYabm5vx559/Wtt27dplSDKmTJly3+ty99D6mJgYo3Dhwsb58+dt+j3zzDOGv7+/dV1atmyZ7lDmrBhaf/36dSMxMdGm7fDhw4a7u7vx9ttvW9syeixNSEgwChYsaFStWtWm36xZswxJGbr0adq0aYYkY9WqVda2xMREo2jRokbt2rWtbfY+9x49ehheXl7WY5Bh3NvQ+nLlyhnnzp0zzp07Z/zxxx/WY9qdl/0cOXLEcHZ2Nt59912b9+/Zs8dwcXGxtu/cuTPFZVD2lCxZ0mYf6dOnT4rfr7Nnzxr+/v4pPm9JxrBhw9KdZ0aPfakJCwszJKV4JS8jtd/PjB73zp49a7i5uRlRUVHWY5dhGMbgwYNtlmMY9n+PDSPlpQf//POPkSdPnhSXXcXFxRn+/v427cn137nPG4ZhPProo0b16tWtP3///feGJOO1115Lsfw7687s9u/du7fh5+fn8Ms4AEdiaD0AG926dbMZNv7EE09Iun02Kj2zZ89WQECAChYsqBo1amjt2rUaMGCAzV2TY2NjVb58eZUrV07nz5+3vho2bChJWrduXbrLWbp0qQYMGKCWLVuqe/fu2rBhgyIiIjRx4kSdOHHC2u/ff/+Vu7t7ivd7eHhYp6fl77//Vt68edOt548//lDPnj1Vu3ZtmzPRyfPPSA2ZqXXcuHHq3bu3xo4dm+bdq+80d+5cTZ06VcHBwfr666/Vr18/lS9fXo0aNdLJkyet/WJjY/XEE08ob968Np9T48aNlZiYqB9++EHS7Zutubi42JyFcXZ21quvvpqheuyJjY2Vv7+/mjRpYrPs6tWry8fHJ8U+UqFCBet+Kt0+01q2bFmbfXbRokWqUqWKWrdunWJ5yUNPM7pvJp9t/Oabb9Idapu8/6T29IjsdOdnkuzOM5VXr17V+fPnVadOHRmGoZ07d6Y7z8aNG9uMsKlcubL8/PwydHy4F4ZhaNGiRWrRooUMw7D5PCIiIhQfH299+kSePHl04sQJ/fLLL1law93c3d2to3sSExP1999/Wy+rsPckjPSOpdu2bdPZs2f10ksv2fRLvkwlIzp06CBXV1eb4fUbNmzQyZMnrcO1JdvP/Z9//tH58+f1xBNP6Nq1a5m+k/off/yhgIAABQQEqFy5cho/fryeeuopm8tpFi9erKSkJLVv397mMwwMDFSZMmWsv1PJ67tq1Spdu3YtwzV8++23evzxx60jl6Tbv/93rvu9yuixLy1BQUFavXq1zWvAgAE2fe7+/czocW/NmjVKSEjQq6++ajNsPnmESmasXr1aly5dUseOHW2W7ezsrFq1atn9d/nup8Q88cQTKY65FotFw4YNS/HetB5tmdHtnydPHl29elWrV6/O7GoDpsfQeuAhZu8f0zuv5Zb+F0TuvkbPnpYtW6pXr15KSEjQL7/8olGjRunatWs2Q9sPHjyoffv2pTqs9e5rKzPCYrHo9ddf16pVq7R+/XrrXZo9PT3tXgd//fp16/T0GIaR5vS4uDhFRUXJ399fX331lc3w1uT5Z6SGe611w4YNWr58ud588800r4u/m5OTk3r27KmePXvq77//1qZNmzRz5kytWLFCzzzzjPX63IMHD2r37t3pfk5Hjx5V4cKFU9xdvGzZshmu6W4HDx5UfHx8imv27152srv3Wen2fnvnPnvo0CG1adMm3eVmZN/s0KGDPvroIz3//PMaOHCgGjVqpKefflpt27ZNcRlH8v6T1h+uCQkJunDhgk1bQECAzb50r1xcXFSsWLEU7ceOHdPQoUO1dOnSFL/Td16LnJqMbOuscO7cOV26dEmzZs3SrFmz7PZJ/jzefPNNrVmzRjVr1lTp0qXVtGlTPfvss3bvBXE/kq/3nT59ug4fPmxzPbC9ywrSO5YePXpUklI8PcPV1VWlSpXKUE358+dXRESEvv76a82cOVMeHh5asGCBXFxc1L59e2u/3377TUOGDNH333+vy5cv28wjI5+7PUFBQdYnmBw6dEjvvvuuzp07Z/3yUbr9O2UYRqpPCEkeKh4cHKy+fftq4sSJ+uyzz/TEE0/oqaeeUufOndP8UuPo0aOqVatWivb7Pf5k5NiXFm9vbzVu3DjV6fZ+PzN63EttvwkICMjQF8/2HDx4UJKsX1re7e7LgpKvd7+TvWNukSJFlC9fvnuuJSPb/5VXXtGXX36pJ598UkWLFlXTpk3Vvn17RUZG3tPyADMjyAMPqPTOOl+7ds3mD65kqYWH9AKtdPuRO8l/vDRr1kwFChRQr169FB4ebr3hXFJSkkJDQzVx4kS780jvutDUJL/vzkBUuHBhnT59OkXf5Lb0HgWUP3/+NANKfHy8nnzySV26dEkbN25MMb/ChQvbLO/uGvLly2c9C1+4cGGtW7dOhmHYhL7Uaq1YsaIuXbqkTz75RD169FBwcHCa65La+j311FN66qmn1KBBA23YsEFHjx5VyZIllZSUpCZNmqQ4i5Ts7kf9ZYTFYrG7H9m7QVLBggX12Wef2Z3P3X/g3c8+e/dyM7Jvenp66ocfftC6deu0fPlyrVy5UgsXLlTDhg313Xff2dSTvP/c+Wi4u23evFnh4eE2bYcPH76vZ6jfefY4WWJiopo0aaILFy7ozTffVLly5eTt7a2TJ0+qa9euGbqRV1Zt6/Qk19K5c+dU77dQuXJlSbfvnbF//34tW7ZMK1eu1KJFizR9+nQNHTpUI0aMyLKaRo0apbfeekvdu3fXyJEjlS9fPjk5OalPnz52t11ObavOnTtr2bJlWrZsmZ566iktWrRITZs2tf6eXLp0SWFhYfLz89Pbb7+tkJAQeXh4aMeOHXrzzTczfQO3u8Nq3bp1Va1aNQ0ePFgffPCBpNufo8Vi0YoVK+xujzu/AJwwYYK6du2qb775Rt99951ee+01jR49Wj/99JPdL6Wyir3jT1Yf++5m7/fzXo97GZHaF4j21lm6fZ18YGBgiv4uLrZx4X6+ZExPRrd/wYIF9euvv2rVqlVasWKFVqxYoblz56pLly76+OOPs60+IDchyAMPqOQbm+3fvz9FOL527ZqOHz+upk2bZmsNPXr00Pvvv68hQ4aodevWslgsCgkJ0a5du9SoUaM0z1Leq+QhfXf+sVO1alVt3LhRSUlJNn80/fzzz/Ly8kr3D7Jy5crp8OHDdqddv35dLVq00IEDB7RmzRpVqFAhRZ+iRYsqICDA5g69ybZu3aqqVava1PrRRx9p3759NvNKvjHWnX2l28Hwq6++Ur169dSoUSP9+OOP9/WM4ho1amjDhg06ffq0SpYsqZCQEF25ciXNs0rS7f1s7dq1unLlis0f5fv370/RN2/evHaHYCefYUoWEhKiNWvWqG7duvd106q757l37950+2R033RyclKjRo3UqFEjTZw4UaNGjdJ//vMfrVu3zmabHT58WE5OTmnua1WqVEkxPNTeH9P3a8+ePTpw4IA+/vhjdenSxdqeG4emBgQEyNfXV4mJienug9LtUNmhQwd16NBBCQkJevrpp/Xuu+9q0KBB8vDwyJJjzVdffaXw8HDNnj3bpv3SpUtpflGTmuRj9MGDB23OhN68eVOHDx9WlSpVMjSfp556Sr6+vlqwYIFcXV118eJFm6Hl69ev199//63FixfbPIEjtWNbZlWuXFmdO3fWhx9+qH79+qlEiRIKCQmRYRgKDg7OUAAODQ1VaGiohgwZos2bN6tu3bqaOXNmqjfxLFmypPVs8p1SO/5cunTJpi0hISHFF60ZPfZltYwe9+7cb+4cuXHu3LkUXzwnn6G/dOmSzQ0I7R1zpdvhOKvWOyQkRKtWrdKFCxfu6az8vWx/Nzc3tWjRQi1atFBSUpJeeeUVffjhh3rrrbdUunTp+ykfMAWukQceUI0aNZKbm5tmzJiR4ozLrFmzdOvWLT355JPZWoOLi4veeOMN7du3z/oIrvbt2+vkyZP673//m6L/v//+m+bj5KTbZ9zvPptw8+ZNjRkzRm5ubjZnNtu2baszZ85o8eLF1rbz588rNjZWLVq0sHtN+p1q166tvXv3phjynpiYqA4dOmjLli2KjY1V7dq1U51HmzZttGzZMptHLa1du1YHDhxQu3btrG0tW7aUq6urpk+fbm0zDEMzZ85U0aJFVadOnRTzLlasmNasWaN///1XTZo00d9//53m+sTFxen3339P0Z6QkKC1a9fKycnJ+sdP+/bttWXLFq1atSpF/0uXLunWrVuSbo+8uHXrlmbMmGGdnpiYqClTpqR4X0hIiP744w+dO3fO2rZr164Ud3tv3769EhMTNXLkyBTzuHXrVoo/xjOiTZs22rVrl77++usU05LPkGZ037x7GLz0vy9a7t5Xtm/frooVK6Y5PDhv3rxq3LixzcveaJn7lXwW7c4zwoZh5MrHNTk7O6tNmzZatGiR3S9g7tyH7t7v3dzcVKFCBRmGoZs3b0q6HfQlZWrfubOmu8+mx8bG2txb4l7UqFFDAQEBmjlzps3dxufNm3dPdXp6eqp169b69ttvNWPGDHl7e6tly5Y2dUu2n3tCQoLNsSarDBgwQDdv3rSOann66afl7OysESNGpNh2hmFYP7vLly9bjynJQkND5eTklOZjQps1a6affvpJW7dutbadO3fO7lntkJCQFNe3z5o1K8W/Jxk99mW1jB73GjduLFdXV02ZMsVmm9q7m35yQL9zva9evZrijHVERIT8/Pw0atQo6+/Mne78fcuoNm3ayDAMu6Ni0hqVktHtf/fvvZOTk3WUTnqPlgUeFJyRBx5QBQsW1NChQzVkyBDVr19fTz31lLy8vLR582Z9/vnnatq0qVq0aJHtdXTt2lVDhw7V2LFj1apVKz333HP68ssv9dJLL2ndunWqW7euEhMT9ccff+jLL7/UqlWrUn0sjXT7RnfvvPOO2rZtq+DgYF24cEELFizQ3r17NWrUKJszmW3bttXjjz+ubt266ffff1eBAgU0ffp0JSYmZmjIbcuWLTVy5Eht2LDBZvTCG2+8oaVLl6pFixa6cOGCPv30U5v3JV+jL0mDBw9WbGyswsPD1bt3b125ckXjx49XaGiounXrZu1XrFgx9enTR+PHj9fNmzf12GOPacmSJdq4caM+++yzVIcyli5dWt99950aNGigiIgIff/996k+5uzEiROqWbOmGjZsqEaNGikwMFBnz57V559/rl27dqlPnz7WM4v9+/fX0qVL1bx5c+uj3K5evao9e/boq6++0pEjR1SgQAG1aNFCdevW1cCBA3XkyBFVqFBBixcvtnvdbffu3TVx4kRFREQoJiZGZ8+e1cyZM1WxYkWb63bDwsLUo0cPjR49Wr/++quaNm0qV1dXHTx4ULGxsZo8ebLatm2b7ud3p/79++urr75Su3bt1L17d1WvXl0XLlzQ0qVLNXPmTFWpUiXD++bbb7+tH374QVFRUSpZsqTOnj2r6dOnq1ixYjaPCLx586Y2bNigV1555Z5qzS7lypVTSEiI+vXrp5MnT8rPz0+LFi3K8uvbpdtDesPCwrR+/fpMz2PMmDFat26datWqpRdeeEEVKlTQhQsXtGPHDq1Zs8b6hUrTpk0VGBiounXrqlChQtq3b5+mTp2qqKgo+fr6Srr9GEBJ+s9//qNnnnlGrq6uatGihby9vTV8+HCNGDFC69atU4MGDVKtp3nz5nr77bfVrVs31alTR3v27NFnn32W4evZ7+bq6qp33nlHPXr0UMOGDdWhQwcdPnxYc+fOved5du7cWfPnz9eqVavUqVMn6xcXklSnTh3lzZtX0dHReu2112SxWPTJJ59k+RB/6faNJ5s1a6aPPvpIb731lkJCQvTOO+9o0KBBOnLkiFq1aiVfX18dPnxYX3/9tV588UX169dP33//vXr16qV27drpkUce0a1bt/TJJ59Yv9BJzYABA/TJJ58oMjJSvXv3tj5+rmTJktq9e7dN3+eff14vvfSS2rRpoyZNmmjXrl1atWpVitEUGT32ZbWMHveSn9k+evRoNW/eXM2aNdPOnTu1YsWKFHU1bdpUJUqUUExMjPr37y9nZ2fNmTNHAQEBOnbsmLWfn5+fZsyYoeeee07VqlXTM888Y+2zfPly1a1bV1OnTr2n9QkPD9dzzz2nDz74QAcPHlRkZKSSkpK0ceNGhYeHq1evXnbfl9Ht//zzz+vChQtq2LChihUrpqNHj2rKlCmqWrVqise4Ag+snLo9PgDH+PTTT43HH3/c8Pb2Ntzd3Y1y5coZI0aMsHnkkGH875FJdz/+5/DhwykeI5ba4+d69uxpt4bhw4cbkox169YZhnH7sUtjx441KlasaLi7uxt58+Y1qlevbowYMcKIj4+3vs/e4+e2bdtmtGjRwihatKjh5uZm+Pj4GPXq1TO+/PJLu8u+cOGCERMTY+TPn9/w8vIywsLCjF9++SWtTWajcuXKRkxMjE1bao8XSn7dbe/evUbTpk0NLy8vI0+ePEanTp2MuLi4FP0SExONUaNGGSVLljTc3NyMihUrGp9++mmKfnc+fi7Zzz//bPj6+hr169e3+6gpwzCMy5cvG5MnTzYiIiKMYsWKGa6uroavr69Ru3Zt47///a/NI4EM4/YjiQYNGmSULl3acHNzMwoUKGDUqVPHeO+994yEhARrv7///tt47rnnDD8/P8Pf39947rnnrI+TunO/MYzb+2OpUqUMNzc3o2rVqsaqVatSfeTVrFmzjOrVqxuenp6Gr6+vERoaagwYMMA4depUmtvCMOw/6u7vv/82evXqZd13ihUrZkRHR9s83iwj++batWuNli1bGkWKFDHc3NyMIkWKGB07dkzxuKQVK1YYkoyDBw/a/TyyQmqPn/P29rbb//fffzcaN25s+Pj4GAUKFDBeeOEF6yPk7vysUnv8nL3f8bsfH/XPP/8Ykoxnnnnmntbl7sfPGYZhnDlzxujZs6dRvHhxw9XV1QgMDDQaNWpkzJo1y9rnww8/NOrXr2/kz5/fcHd3N0JCQoz+/fvbHEsM4/ZjrYoWLWo4OTnZbLM33njDsFgsdh/DeKfr168bb7zxhlG4cGHD09PTqFu3rrFly5YU+9q9HEsNwzCmT59uBAcHG+7u7kaNGjWMH374IdVHNabm1q1bRuHChQ1Jxrfffpti+qZNm4zHH3/c8PT0NIoUKWIMGDDAWLVqlc1x2TDu7fFzqT3yb/369Ske9bZo0SKjXr16hre3t+Ht7W2UK1fO6Nmzp7F//37DMAzjr7/+Mrp3726EhIQYHh4eRr58+Yzw8HBjzZo1NvO+e18zDMPYvXu3ERYWZnh4eBhFixY1Ro4cacyePTvF70ViYqLx5ptvGgUKFDC8vLyMiIgI488//7Q7z4we++512xhG2r+fhpGx415iYqIxYsQI677YoEEDY+/evXbXZfv27UatWrUMNzc3o0SJEsbEiRNTPH4u2bp164yIiAjD39/f8PDwMEJCQoyuXbsa27ZtS7d+e8eMW7duGePHjzfKlStnuLm5GQEBAcaTTz5pbN++3dons9v/q6++Mpo2bWoULFjQum49evQwTp8+neq2BR40FsPIhq9kAeAB8cknn6hnz546duyYzTWGSNuRI0cUHBysuXPnqmvXro4uxyFatWoli8Vidzj/g+zbb79V8+bNtWvXLoWGhjq6nHTVrFlTJUuWVGxsrKNLAe5LUFCQGjRoYPMIQAAPLq6RB4A0dOrUSSVKlNC0adMcXQpMZN++fVq2bJnd610fdOvWrdMzzzxjihB/+fJl7dq1S2+//bajSwEA4J5wjTwApMHJySndu50Ddytfvny23RQrtxs/fryjS8gwPz8/bowFADAlzsgDAAAAAGAiXCMPAAAAAICJcEYeAAAAAAATIcgDAAAAAGAi3OzOjqSkJJ06dUq+vr6yWCyOLgcAAAAA8IAzDEP//POPihQpIientM+5E+TtOHXqlIoXL+7oMgAAAAAAD5njx4+rWLFiafYhyNvh6+sr6fYG9PPzc3A1AAAAAIAH3eXLl1W8eHFrHk0LQd6O5OH0fn5+BHkAAAAAQI7JyOXd3OwOAAAAAAATIcgDAAAAAGAiDg/y06ZNU1BQkDw8PFSrVi1t3bo11b6//fab2rRpo6CgIFksFk2aNOm+5wkAAAAAgJk49Br5hQsXqm/fvpo5c6Zq1aqlSZMmKSIiQvv371fBggVT9L927ZpKlSqldu3a6fXXX8+SeQIAAAAA0peYmKibN286ugzTcnZ2louLS5Y84txiGIaRBTVlSq1atfTYY49p6tSpkm4/v7148eJ69dVXNXDgwDTfGxQUpD59+qhPnz5ZNs9kly9flr+/v+Lj47nZHQAAAICH3pUrV3TixAk5MD4+ELy8vFS4cGG5ubmlmHYvOdRhZ+QTEhK0fft2DRo0yNrm5OSkxo0ba8uWLTk6zxs3bujGjRvWny9fvpyp5QMAAADAgyYxMVEnTpyQl5eXAgICsuSM8sPGMAwlJCTo3LlzOnz4sMqUKSMnp8xf6e6wIH/+/HklJiaqUKFCNu2FChXSH3/8kaPzHD16tEaMGJGpZQIAAADAg+zmzZsyDEMBAQHy9PR0dDmm5enpKVdXVx09elQJCQny8PDI9LwcfrO73GDQoEGKj4+3vo4fP+7okgAAAAAgV+FM/P27n7Pwd3LYGfkCBQrI2dlZZ86csWk/c+aMAgMDc3Se7u7ucnd3z9QyAQAAAADISQ47I+/m5qbq1atr7dq11rakpCStXbtWtWvXzjXzBAAAAAAgN3Ho4+f69u2r6Oho1ahRQzVr1tSkSZN09epVdevWTZLUpUsXFS1aVKNHj5Z0+2Z2v//+u/X/T548qV9//VU+Pj4qXbp0huYJAAAAALh/QQOX5+jyjoyJytHl2ZPa09NymkODfIcOHXTu3DkNHTpUcXFxqlq1qlauXGm9Wd2xY8dsriE4deqUHn30UevP7733nt577z2FhYVp/fr1GZonAAAAAODBlt71/MOGDdPw4cPveb6//PKLvL29M1lV1nHoc+RzK54jDwAAAAC3Xb9+XYcPH1ZwcLDNndZz8xn5uLg46/8vXLhQQ4cO1f79+61tPj4+8vHxkXT70XCJiYlyccn+89ypbUvp3nIod60HAAAAADxQAgMDrS9/f39ZLBbrz3/88Yd8fX21YsUKVa9eXe7u7vrxxx916NAhtWzZUoUKFZKPj48ee+wxrVmzxma+QUFBmjRpkvVni8Wijz76SK1bt5aXl5fKlCmjpUuXZvv6EeQBAAAAAA+dgQMHasyYMdq3b58qV66sK1euqFmzZlq7dq127typyMhItWjRQseOHUtzPiNGjFD79u21e/duNWvWTJ06ddKFCxeytXaCPAAAAADgofP222+rSZMmCgkJUb58+VSlShX16NFDlSpVUpkyZTRy5EiFhISke4a9a9eu6tixo0qXLq1Ro0bpypUr2rp1a7bWTpAHAAAAADx0atSoYfPzlStX1K9fP5UvX1558uSRj4+P9u3bl+4Z+cqVK1v/39vbW35+fjp79my21JzMoXetBwAAAADAEe6++3y/fv20evVqvffeeypdurQ8PT3Vtm1bJSQkpDkfV1dXm58tFouSkpKyvN47EeQBAAAAAA+9TZs2qWvXrmrdurWk22fojxw54tiiUkGQBwAAOS69RxYd8Xg2/ZkMj8+iagAAkMqUKaPFixerRYsWslgseuutt7L9zHpmEeQBAIAphX4cmm6fPdF7cqASAHg43ctz3c1g4sSJ6t69u+rUqaMCBQrozTff1OXLlx1dll0WwzAMRxeR21y+fFn+/v6Kj4+Xn5+fo8sBAOCBkxVn5EODS6TbhyAPAPfv+vXrOnz4sIKDg+Xh4eHockwtrW15LzmUu9YDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiLo4uAAAAAABgQsP9c3h58Tm7vFyMM/IAAAAAgAeKxWJJ8zV8+PD7mveSJUuyrNbM4Iw8AAAAAOCBcvr0aev/L1y4UEOHDtX+/futbT4+Po4oK8twRh4AAAAA8EAJDAy0vvz9/WWxWGzavvjiC5UvX14eHh4qV66cpk+fbn1vQkKCevXqpcKFC8vDw0MlS5bU6NGjJUlBQUGSpNatW8tisVh/zmmckQcAAAAAPDQ+++wzDR06VFOnTtWjjz6qnTt36oUXXpC3t7eio6P1wQcfaOnSpfryyy9VokQJHT9+XMePH5ck/fLLLypYsKDmzp2ryMhIOTs7O2QdCPIAAAAAgIfGsGHDNGHCBD399NOSpODgYP3+++/68MMPFR0drWPHjqlMmTKqV6+eLBaLSpYsaX1vQECAJClPnjwKDAx0SP0SQR4AAAAA8JC4evWqDh06pJiYGL3wwgvW9lu3bsnf//Zd+Lt27aomTZqobNmyioyMVPPmzdW0aVNHlWwXQR4AAAAA8FC4cuWKJOm///2vatWqZTMteZh8tWrVdPjwYa1YsUJr1qxR+/bt1bhxY3311Vc5Xm9qCPIAAAAAgIdCoUKFVKRIEf3111/q1KlTqv38/PzUoUMHdejQQW3btlVkZKQuXLigfPnyydXVVYmJiTlYdUoEeQAAAADAQ2PEiBF67bXX5O/vr8jISN24cUPbtm3TxYsX1bdvX02cOFGFCxfWo48+KicnJ8XGxiowMFB58uSRdPvO9WvXrlXdunXl7u6uvHnz5vg6EOQBAAAAAPdueLyjK8iU559/Xl5eXho/frz69+8vb29vhYaGqk+fPpIkX19fjRs3TgcPHpSzs7Mee+wxffvtt3Jyuv309gkTJqhv377673//q6JFi+rIkSM5vg4WwzCMHF9qLnf58mX5+/srPj5efn5+ji4HAIAHTtDA5WlOP+LxbLrzCA0ukW6fPdF7MlwTAMC+69ev6/DhwwoODpaHh4ejyzG1tLblveRQp+wsEgAAAAAAZC2CPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBEXRxcAAAAAADCf0I9Dc3R5e6L35OjyGjRooKpVq2rSpEk5utyM4Iw8AAAAAOCB0qJFC0VGRtqdtnHjRlksFu3evTuHq8o6BHkAAAAAwAMlJiZGq1ev1okTJ1JMmzt3rmrUqKHKlSs7oLKsQZAHAAAAADxQmjdvroCAAM2bN8+m/cqVK4qNjVWrVq3UsWNHFS1aVF5eXgoNDdXnn3/umGIzgSAPAAAAAHiguLi4qEuXLpo3b54Mw7C2x8bGKjExUZ07d1b16tW1fPly7d27Vy+++KKee+45bd261YFVZxxBHgAAAADwwOnevbsOHTqkDRs2WNvmzp2rNm3aqGTJkurXr5+qVq2qUqVK6dVXX1VkZKS+/PJLB1accQR5AAAAAMADp1y5cqpTp47mzJkjSfrzzz+1ceNGxcTEKDExUSNHjlRoaKjy5csnHx8frVq1SseOHXNw1RlDkAcAAAAAPJBiYmK0aNEi/fPPP5o7d65CQkIUFham8ePHa/LkyXrzzTe1bt06/frrr4qIiFBCQoKjS84QgjwAAAAA4IHUvn17OTk5acGCBZo/f766d+8ui8WiTZs2qWXLlurcubOqVKmiUqVK6cCBA44uN8MI8gAAAACAB5KPj486dOigQYMG6fTp0+rataskqUyZMlq9erU2b96sffv2qUePHjpz5oxji70HLo4uAAAAAABgPnui9zi6hAyJiYnR7Nmz1axZMxUpUkSSNGTIEP3111+KiIiQl5eXXnzxRbVq1Urx8fEOrjZjCPIPg+H+Gehjjh0WAAAAAO5F7dq1bR5BJ0n58uXTkiVL0nzf+vXrs6+o+0SQN7mggcvT7XPEI/35hH4cmuZ0s3zbBgAAAAAPOq6RBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAANJ19w3jcO+yahsS5AEAAAAAqXJ2dpYkJSQkOLgS87t27ZokydXV9b7mw13rAQAAAACpcnFxkZeXl86dOydXV1c5OXE++F4ZhqFr167p7NmzypMnj/XLkcwiyAMAAAAAUmWxWFS4cGEdPnxYR48edXQ5ppYnTx4FBgbe93wI8gAAAACANLm5ualMmTIMr78Prq6u930mPhlBHgAAAACQLicnJ3l4eDi6DIib3QEAAAAAYCoEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATMThQX7atGkKCgqSh4eHatWqpa1bt6bZPzY2VuXKlZOHh4dCQ0P17bff2ky/cuWKevXqpWLFisnT01MVKlTQzJkzs3MVAAAAAADIMQ4N8gsXLlTfvn01bNgw7dixQ1WqVFFERITOnj1rt//mzZvVsWNHxcTEaOfOnWrVqpVatWqlvXv3Wvv07dtXK1eu1Keffqp9+/apT58+6tWrl5YuXZpTqwUAAAAAQLZxaJCfOHGiXnjhBXXr1s165tzLy0tz5syx23/y5MmKjIxU//79Vb58eY0cOVLVqlXT1KlTrX02b96s6OhoNWjQQEFBQXrxxRdVpUqVdM/0AwAAAABgBg4L8gkJCdq+fbsaN278v2KcnNS4cWNt2bLF7nu2bNli01+SIiIibPrXqVNHS5cu1cmTJ2UYhtatW6cDBw6oadOmqdZy48YNXb582eYFAAAAAEBu5LAgf/78eSUmJqpQoUI27YUKFVJcXJzd98TFxaXbf8qUKapQoYKKFSsmNzc3RUZGatq0aapfv36qtYwePVr+/v7WV/Hixe9jzQAAAAAAyD4Ov9ldVpsyZYp++uknLV26VNu3b9eECRPUs2dPrVmzJtX3DBo0SPHx8dbX8ePHc7BiAAAAAAAyzsVRCy5QoICcnZ115swZm/YzZ84oMDDQ7nsCAwPT7P/vv/9q8ODB+vrrrxUVFSVJqly5sn799Ve99957KYblJ3N3d5e7u/v9rhIAAAAAANnOYWfk3dzcVL16da1du9balpSUpLVr16p27dp231O7dm2b/pK0evVqa/+bN2/q5s2bcnKyXS1nZ2clJSVl8RoAAAAAAJDzHHZGXrr9qLjo6GjVqFFDNWvW1KRJk3T16lV169ZNktSlSxcVLVpUo0ePliT17t1bYWFhmjBhgqKiovTFF19o27ZtmjVrliTJz89PYWFh6t+/vzw9PVWyZElt2LBB8+fP18SJEx22ngAAAAAAZBWHBvkOHTro3LlzGjp0qOLi4lS1alWtXLnSekO7Y8eO2Zxdr1OnjhYsWKAhQ4Zo8ODBKlOmjJYsWaJKlSpZ+3zxxRcaNGiQOnXqpAsXLqhkyZJ699139dJLL+X4+gEAAAAAkNUshmEYji4it7l8+bL8/f0VHx8vPz8/R5eTpqCBy9Ptc8Tj2XT7hAaXSHP6nug9Ga4JAID0pPfvV1b82yXx7xcAwDzuJYc+cHetBwAAAADgQUaQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBEXRxcAAAAAIGsEDVyebp8jY6LS7RP6cWia0/dE78lwTQCyHkEeAAAAeJgM90+/T3CJ7K8DQKYxtB4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJiIi6MLAAA8XIIGLk+3z5ExUen2Cf04NM3pe6L3ZLgmAAAAM+GMPAAAAAAAJsIZeQBA7jPcP/0+wSWyvw4AAIBciDPyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIm4OLoAAAAAAAAyK/Tj0HT77InekwOV5ByCPAAAAADAIYIGLk+3zxGPZ9PuEFwii6oxD4bWAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMxMXRBQCAqQ33T2d6fM7UAQAAgIcGQR65S3qhSCIYwVRCPw5Nt8+e6D05UAkAAAAeFA4fWj9t2jQFBQXJw8NDtWrV0tatW9PsHxsbq3LlysnDw0OhoaH69ttvU/TZt2+fnnrqKfn7+8vb21uPPfaYjh07ll2rAAAAAABAjnHoGfmFCxeqb9++mjlzpmrVqqVJkyYpIiJC+/fvV8GCBVP037x5szp27KjRo0erefPmWrBggVq1aqUdO3aoUqVKkqRDhw6pXr16iomJ0YgRI+Tn56fffvtNHh4eOb16uEvQwOXp9jnCx4RchH0WAAAAuZFDg/zEiRP1wgsvqFu3bpKkmTNnavny5ZozZ44GDhyYov/kyZMVGRmp/v37S5JGjhyp1atXa+rUqZo5c6Yk6T//+Y+aNWumcePGWd8XEhKSA2uDnJLeUGWGKQMAAAB4kDlsaH1CQoK2b9+uxo0b/68YJyc1btxYW7ZssfueLVu22PSXpIiICGv/pKQkLV++XI888ogiIiJUsGBB1apVS0uWLEmzlhs3bujy5cs2LwAAAAAAciOHBfnz588rMTFRhQoVsmkvVKiQ4uLi7L4nLi4uzf5nz57VlStXNGbMGEVGRuq7775T69at9fTTT2vDhg2p1jJ69Gj5+/tbX8WLF7/PtQMAAAAAIHs4/GZ3WSkpKUmS1LJlS73++uuqWrWqBg4cqObNm1uH3tszaNAgxcfHW1/Hjx/PqZIBAAAAALgnDrtGvkCBAnJ2dtaZM2ds2s+cOaPAwEC77wkMDEyzf4ECBeTi4qIKFSrY9Clfvrx+/PHHVGtxd3eXu7t7ZlYDAAAAAIAc5bAz8m5ubqpevbrWrl1rbUtKStLatWtVu3Ztu++pXbu2TX9JWr16tbW/m5ubHnvsMe3fv9+mz4EDB1SyZMksXgMAAAAAAHKeQ+9a37dvX0VHR6tGjRqqWbOmJk2apKtXr1rvYt+lSxcVLVpUo0ePliT17t1bYWFhmjBhgqKiovTFF19o27ZtmjVrlnWe/fv3V4cOHVS/fn2Fh4dr5cqV+r//+z+tX7/eEasIAAAAAECWcmiQ79Chg86dO6ehQ4cqLi5OVatW1cqVK603tDt27JicnP43aKBOnTpasGCBhgwZosGDB6tMmTJasmSJ9RnyktS6dWvNnDlTo0eP1muvvaayZctq0aJFqlevXo6vHwAAAAAAWc2hQV6SevXqpV69etmdZu8sert27dSuXbs059m9e3d17949K8oDAAAAACBXeaDuWg8AAAAAwIMuU0H+r7/+yuo6AAAAAABABmQqyJcuXVrh4eH69NNPdf369ayuCQAAAAAApCJTQX7Hjh2qXLmy+vbtq8DAQPXo0UNbt27N6toAAAAAAMBdMhXkq1atqsmTJ+vUqVOaM2eOTp8+rXr16qlSpUqaOHGizp07l9V1AgAAAAAA3efN7lxcXPT0008rNjZWY8eO1Z9//ql+/fqpePHi6tKli06fPp1VdQIAAAAAAN1nkN+2bZteeeUVFS5cWBMnTlS/fv106NAhrV69WqdOnVLLli2zqk4AAAAAAKBMPkd+4sSJmjt3rvbv369mzZpp/vz5atasmZycbn8vEBwcrHnz5ikoKCgrawUAAAAA4KGXqSA/Y8YMde/eXV27dlXhwoXt9ilYsKBmz559X8UBAAAAAABbmQryBw8eTLePm5uboqOjMzN7AAAAAA+4oIHL0+1zZExUun1CPw5Nc/qe6D0Zrgkwi0wF+blz58rHx0ft2rWzaY+NjdW1a9cI8AAAAADu33D/9PsEl8j+OoBcJlM3uxs9erQKFCiQor1gwYIaNWrUfRcFAAAAAADsy1SQP3bsmIKDg1O0lyxZUseOHbvvogAAAAAAgH2ZCvIFCxbU7t27U7Tv2rVL+fPnv++iAAAAAACAfZm6Rr5jx4567bXX5Ovrq/r160uSNmzYoN69e+uZZ57J0gIBAEDO4aZRAADkfpkK8iNHjtSRI0fUqFEjubjcnkVSUpK6dOnCNfIAADhIeneAPuLxbPoz4aZRAADkepkK8m5ublq4cKFGjhypXbt2ydPTU6GhoSpZsmRW1wcAAAAAAO6QqSCf7JFHHtEjjzySVbUAAAAAAIB0ZCrIJyYmat68eVq7dq3Onj2rpKQkm+nff/99lhQHAAAAAABsZSrI9+7dW/PmzVNUVJQqVaoki8WS1XUBuAdZcV1saAaui+UmVwAAAIDjZSrIf/HFF/ryyy/VrFmzrK4HAAAAAACkIVPPkXdzc1Pp0qWzuhYAAAAAAJCOTAX5N954Q5MnT5ZhGFldDwAAAAAASEOmhtb/+OOPWrdunVasWKGKFSvK1dXVZvrixYuzpDgAAAAAAGArU0E+T548at26dVbXAgAAAAAA0pGpID937tysrgMAAAAAAGRApq6Rl6Rbt25pzZo1+vDDD/XPP/9Ikk6dOqUrV65kWXEAAAAAAMBWps7IHz16VJGRkTp27Jhu3LihJk2ayNfXV2PHjtWNGzc0c+bMrK4TAAAAAAAok2fke/furRo1aujixYvy9PS0trdu3Vpr167NsuIAAAAAAICtTJ2R37hxozZv3iw3Nzeb9qCgIJ08eTJLCgMAAAAAACll6ox8UlKSEhMTU7SfOHFCvr6+910UAAAAAACwL1NBvmnTppo0aZL1Z4vFoitXrmjYsGFq1qxZVtUGAAAAAADukqmh9RMmTFBERIQqVKig69ev69lnn9XBgwdVoEABff7551ldIwAAAAAA+P8yFeSLFSumXbt26YsvvtDu3bt15coVxcTEqFOnTjY3vwMAAAAAAFkrU0FeklxcXNS5c+esrAUAAAAwp+H+aU4ODS6R7iz2RO/JqmoAPOAyFeTnz5+f5vQuXbpkqhgAAAAgtwkauDzdPkc8cqAQAPj/MhXke/fubfPzzZs3de3aNbm5ucnLy4sgDwAAHh7pnImVOBsLAMhamQryFy9eTNF28OBBvfzyy+rfv/99FwUAAJBbpHc2ljOxAICclqnHz9lTpkwZjRkzJsXZegAAAAAAkHWyLMhLt2+Ad+rUqaycJQAAAAAAuEOmhtYvXbrU5mfDMHT69GlNnTpVdevWzZLCAAAAAABASpkK8q1atbL52WKxKCAgQA0bNtSECROyoi4AAAAAAGBHpoJ8UlJSVtcBAAAAAAAyIEuvkQcAAAAAANkrU2fk+/btm+G+EydOzMwiAAAAAACAHZkK8jt37tTOnTt18+ZNlS1bVpJ04MABOTs7q1q1atZ+Fosla6oEAAAAAACSMhnkW7RoIV9fX3388cfKmzevJOnixYvq1q2bnnjiCb3xxhtZWiQAAAAAALgtU9fIT5gwQaNHj7aGeEnKmzev3nnnHe5aDwAAAABANspUkL98+bLOnTuXov3cuXP6559/7rsoAAAAAABgX6aCfOvWrdWtWzctXrxYJ06c0IkTJ7Ro0SLFxMTo6aefzuoaAQAAAADA/5epa+Rnzpypfv366dlnn9XNmzdvz8jFRTExMRo/fnyWFggAAAAAAP4nU0Hey8tL06dP1/jx43Xo0CFJUkhIiLy9vbO0OAAAAAAAYCtTQ+uTnT59WqdPn1aZMmXk7e0twzCyqi4AAAAAAGBHpoL833//rUaNGumRRx5Rs2bNdPr0aUlSTEwMj54DAAAAACAbZSrIv/7663J1ddWxY8fk5eVlbe/QoYNWrlyZZcUBAAAAAABbmbpG/rvvvtOqVatUrFgxm/YyZcro6NGjWVIYAAAAAABIKVNn5K9evWpzJj7ZhQsX5O7uft9FAQAAAAAA+zIV5J944gnNnz/f+rPFYlFSUpLGjRun8PDwLCsOAAAAAADYytTQ+nHjxqlRo0batm2bEhISNGDAAP3222+6cOGCNm3alNU1AgAAAACA/y9TZ+QrVaqkAwcOqF69emrZsqWuXr2qp59+Wjt37lRISEhW1wgAAAAAAP6/ez4jf/PmTUVGRmrmzJn6z3/+kx01AQAAAACAVNzzGXlXV1ft3r07O2oBAAAAAADpyNTQ+s6dO2v27NlZXQsAAAAAAEhHpm52d+vWLc2ZM0dr1qxR9erV5e3tbTN94sSJWVIcAAAAAACwdU9B/q+//lJQUJD27t2ratWqSZIOHDhg08disWRddQAAAAAAwMY9BfkyZcro9OnTWrdunSSpQ4cO+uCDD1SoUKFsKQ4AAAAAANi6p2vkDcOw+XnFihW6evVqlhYEAAAAAABSl6mb3SW7O9gDAAAAAIDsdU9B3mKxpLgGnmviAQAAAADIOfd0jbxhGOratavc3d0lSdevX9dLL72U4q71ixcvzroKAQAAAACA1T0F+ejoaJufO3funKXFAAAAAACAtN1TkJ87d2521QEAAAAAADLgvm52BwAAAAAAchZBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwERyRZCfNm2agoKC5OHhoVq1amnr1q1p9o+NjVW5cuXk4eGh0NBQffvtt6n2femll2SxWDRp0qQsrhoAAAAAgJzn8CC/cOFC9e3bV8OGDdOOHTtUpUoVRURE6OzZs3b7b968WR07dlRMTIx27typVq1aqVWrVtq7d2+Kvl9//bV++uknFSlSJLtXAwAAAACAHOHi6AImTpyoF154Qd26dZMkzZw5U8uXL9ecOXM0cODAFP0nT56syMhI9e/fX5I0cuRIrV69WlOnTtXMmTOt/U6ePKlXX31Vq1atUlRUVJo13LhxQzdu3LD+fPny5axYNUChH4emOX1P9J4cqgQAAADAg8KhQT4hIUHbt2/XoEGDrG1OTk5q3LixtmzZYvc9W7ZsUd++fW3aIiIitGTJEuvPSUlJeu6559S/f39VrFgx3TpGjx6tESNGZG4l8MAKGrg8zelHxqT9BREAAAAAZAeHDq0/f/68EhMTVahQIZv2QoUKKS4uzu574uLi0u0/duxYubi46LXXXstQHYMGDVJ8fLz1dfz48XtcEwAAAAAAcobDh9Znte3bt2vy5MnasWOHLBZLht7j7u4ud3f3bK4MD5zh/un3CS6R/XUAAAAAeKg4NMgXKFBAzs7OOnPmjE37mTNnFBgYaPc9gYGBafbfuHGjzp49qxIl/hegEhMT9cYbb2jSpEk6cuRI1q4EAAAA7ll6l7BJ0hGPZ9OcHpqBL8y5Hw2AB5FDh9a7ubmpevXqWrt2rbUtKSlJa9euVe3ate2+p3bt2jb9JWn16tXW/s8995x2796tX3/91foqUqSI+vfvr1WrVmXfygAAAAAAkAMcPrS+b9++io6OVo0aNVSzZk1NmjRJV69etd7FvkuXLipatKhGjx4tSerdu7fCwsI0YcIERUVF6YsvvtC2bds0a9YsSVL+/PmVP39+m2W4uroqMDBQZcuWzdmVAwAAAAAgizk8yHfo0EHnzp3T0KFDFRcXp6pVq2rlypXWG9odO3ZMTk7/GzhQp04dLViwQEOGDNHgwYNVpkwZLVmyRJUqVXLUKgAAAAAAkGMcHuQlqVevXurVq5fdaevXr0/R1q5dO7Vr1y7D8+e6eAAAAADAg8Kh18gDAAAAAIB7Q5AHAAAAAMBEcsXQegDA/cmKxzhpeHwWVQMAAIDsRJAHAEiSQj8OTbcPz2MGAABwPIbWAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARF0cXAAAAAADIOkEDl6c5/ciYqHTnEfpxaLp99kTvyXBNyFoEeQAAAAB4mAz3T79PcInsrwOZxtB6AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmIiLowsAAABADhrun870+JypAwCQaQR5AABSETRwebp9jng8m26f0OASaU7fE70nwzUBacnYPpsDhQAAshVBHgAAAFahH4em24cvnwDAsbhGHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJ5IogP23aNAUFBcnDw0O1atXS1q1b0+wfGxurcuXKycPDQ6Ghofr222+t027evKk333xToaGh8vb2VpEiRdSlSxedOnUqu1cDAAAAAIBs5/Agv3DhQvXt21fDhg3Tjh07VKVKFUVEROjs2bN2+2/evFkdO3ZUTEyMdu7cqVatWqlVq1bau3evJOnatWvasWOH3nrrLe3YsUOLFy/W/v379dRTT+XkagEAAAAAkC0cHuQnTpyoF154Qd26dVOFChU0c+ZMeXl5ac6cOXb7T548WZGRkerfv7/Kly+vkSNHqlq1apo6daokyd/fX6tXr1b79u1VtmxZPf7445o6daq2b9+uY8eO5eSqAQAAAACQ5Rwa5BMSErR9+3Y1btzY2ubk5KTGjRtry5Ytdt+zZcsWm/6SFBERkWp/SYqPj5fFYlGePHnsTr9x44YuX75s8wIAAAAAIDdyaJA/f/68EhMTVahQIZv2QoUKKS4uzu574uLi7qn/9evX9eabb6pjx47y8/Oz22f06NHy9/e3vooXL56JtQEAAAAAIPs5fGh9drp586bat28vwzA0Y8aMVPsNGjRI8fHx1tfx48dzsEoAAAAAADLOxZELL1CggJydnXXmzBmb9jNnzigwMNDuewIDAzPUPznEHz16VN9//32qZ+Mlyd3dXe7u7plcCwAAAAAAco5Dz8i7ubmpevXqWrt2rbUtKSlJa9euVe3ate2+p3bt2jb9JWn16tU2/ZND/MGDB7VmzRrlz58/e1YAAAAAAIAc5tAz8pLUt29fRUdHq0aNGqpZs6YmTZqkq1evqlu3bpKkLl26qGjRoho9erQkqXfv3goLC9OECRMUFRWlL774Qtu2bdOsWbMk3Q7xbdu21Y4dO7Rs2TIlJiZar5/Ply+f3NzcHLOiAAAAAABkAYcH+Q4dOujcuXMaOnSo4uLiVLVqVa1cudJ6Q7tjx47Jyel/Awfq1KmjBQsWaMiQIRo8eLDKlCmjJUuWqFKlSpKkkydPaunSpZKkqlWr2ixr3bp1atCgQY6sFwAAAAAA2cHhQV6SevXqpV69etmdtn79+hRt7dq1U7t27ez2DwoKkmEYWVkeAAAAAAC5xgN913oAAAAAAB40BHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEzExdEFAHi4BA1cnub0Ix7Ppj+T4fFZVA0AAABgPgR5AKYT+nFoun32RO/JgUoAAACAnMfQegAAAAAATIQgDwAAAACAiTC0HgAAAADSke59fsZEpTuP9C4P5NJAZBRBHgAAAADu13D/9PsEl8j+OvBQYGg9AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwkVwR5KdNm6agoCB5eHioVq1a2rp1a5r9Y2NjVa5cOXl4eCg0NFTffvutzXTDMDR06FAVLlxYnp6eaty4sQ4ePJidqwAAAAAAQI5weJBfuHCh+vbtq2HDhmnHjh2qUqWKIiIidPbsWbv9N2/erI4dOyomJkY7d+5Uq1at1KpVK+3du9faZ9y4cfrggw80c+ZM/fzzz/L29lZERISuX7+eU6sFAAAAAEC2cHiQnzhxol544QV169ZNFSpU0MyZM+Xl5aU5c+bY7T958mRFRkaqf//+Kl++vEaOHKlq1app6tSpkm6fjZ80aZKGDBmili1bqnLlypo/f75OnTqlJUuW5OCaAQAAAACQ9VwcufCEhARt375dgwYNsrY5OTmpcePG2rJli933bNmyRX379rVpi4iIsIb0w4cPKy4uTo0bN7ZO9/f3V61atbRlyxY988wzKeZ548YN3bhxw/pzfHy8JOny5cuZXrecknTjWrp9LluMdPsk/puY9jyyYFuYqVYp/XoftlqlnNkPzFSrlH691JqSmY4FZqpV4rhldz4ct1Iu5wE6FlBrSmY6bpmpVoljrN355JL9IKdqzW7JNRpG+p+PDAc6efKkIcnYvHmzTXv//v2NmjVr2n2Pq6ursWDBApu2adOmGQULFjQMwzA2bdpkSDJOnTpl06ddu3ZG+/bt7c5z2LBhhiRevHjx4sWLFy9evHjx4sXLoa/jx4+nm6UdekY+txg0aJDNWf6kpCRduHBB+fPnl8VicWBlWe/y5csqXry4jh8/Lj8/P0eXkyYz1SqZq15qzR7Umj2oNXuYqVbJXPVSa/ag1uxBrdnHTPVSa+5gGIb++ecfFSlSJN2+Dg3yBQoUkLOzs86cOWPTfubMGQUGBtp9T2BgYJr9k/975swZFS5c2KZP1apV7c7T3d1d7u7uNm158uS5l1UxHT8/P9Ps+GaqVTJXvdSaPag1e1Br9jBTrZK56qXW7EGt2YNas4+Z6qVWx/P3989QP4fe7M7NzU3Vq1fX2rVrrW1JSUlau3atateubfc9tWvXtukvSatXr7b2Dw4OVmBgoE2fy5cv6+eff051ngAAAAAAmIXDh9b37dtX0dHRqlGjhmrWrKlJkybp6tWr6tatmySpS5cuKlq0qEaPHi1J6t27t8LCwjRhwgRFRUXpiy++0LZt2zRr1ixJksViUZ8+ffTOO++oTJkyCg4O1ltvvaUiRYqoVatWjlpNAAAAAACyhMODfIcOHXTu3DkNHTpUcXFxqlq1qlauXKlChQpJko4dOyYnp/8NHKhTp44WLFigIUOGaPDgwSpTpoyWLFmiSpUqWfsMGDBAV69e1YsvvqhLly6pXr16WrlypTw8PHJ8/XIbd3d3DRs2LMWlBLmRmWqVzFUvtWYPas0e1Jo9zFSrZK56qTV7UGv2oNbsY6Z6qdV8LIaRkXvbAwAAAACA3MCh18gDAAAAAIB7Q5AHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyD9ktmzZImdnZ0VFRTm6lFR17dpVFotFFotFrq6uCg4O1oABA3T9+nVHl5ZCcq1jxoyxaV+yZIksFouDqrLv7u1aqFAhNWnSRHPmzFFSUpKjy7MrLi5Or776qkqVKiV3d3cVL15cLVq00Nq1ax1dmo07t+2dr8jISEeXZnpxcXHq3bu3SpcuLQ8PDxUqVEh169bVjBkzdO3aNUeXZ9W1a1e7jzhdv369LBaLLl26lOM1ZURqdecmZqgxmb1av/rqK3l4eGjChAmOKeouycerl156KcW0nj17ymKxqGvXrjlfWBpSO8b++eefji4thTtrdXNzU+nSpfX222/r1q1bji7NrnPnzunll19WiRIl5O7ursDAQEVERGjTpk2OLs3K3md/52v48OGOLlEzZ86Ur6+vzed85coVubq6qkGDBjZ9k/9dOHToUA5XacswDDVu3FgREREppk2fPl158uTRiRMnHFBZSi1atEj1b6qNGzfKYrFo9+7dOVyV4xHkHzKzZ8/Wq6++qh9++EGnTp1ydDmpioyM1OnTp/XXX3/p/fff14cffqhhw4Y5uiy7PDw8NHbsWF28eNHRpaQrebseOXJEK1asUHh4uHr37q3mzZvnuj8yjhw5ourVq+v777/X+PHjtWfPHq1cuVLh4eHq2bOno8tLIXnb3vn6/PPPHV1WCsePH1f37t1VpEgRubm5qWTJkurdu7f+/vtvR5eWwl9//aVHH31U3333nUaNGqWdO3dqy5YtGjBggJYtW6Y1a9Y4ukQgTR999JE6deqkGTNm6I033nB0OVbFixfXF198oX///dfadv36dS1YsEAlSpRwYGWps3eMDQ4OdnRZdiXXevDgQb3xxhsaPny4xo8f7+iy7GrTpo127typjz/+WAcOHNDSpUvVoEGDXPVvwp2f+aRJk+Tn52fT1q9fP0eXqPDwcF25ckXbtm2ztm3cuFGBgYH6+eefbU5GrVu3TiVKlFBISIgjSrWyWCyaO3eufv75Z3344YfW9sOHD2vAgAGaMmWKihUr5sAK/ycmJkarV6+2+8XC3LlzVaNGDVWuXNkBlTmWw58jj5xz5coVLVy4UNu2bVNcXJzmzZunwYMHO7osu5K/FZZu/8HRuHFjrV69WmPHjnVwZSk1btxYf/75p0aPHq1x48Y5upw03bldixYtqmrVqunxxx9Xo0aNNG/ePD3//PMOrvB/XnnlFVksFm3dulXe3t7W9ooVK6p79+4OrMy+O7dtbvXXX3+pdu3aeuSRR/T5558rODhYv/32m/r3768VK1bop59+Ur58+RxdptUrr7wiFxcXbdu2zWYfKFWqlFq2bCmenorcbNy4cRo2bJi++OILtW7d2tHl2KhWrZoOHTqkxYsXq1OnTpKkxYsXq0SJErk2HJvhGJvszlpffvllff3111q6dKkGDRrk4MpsXbp0SRs3btT69esVFhYmSSpZsqRq1qzp4Mps3fm5+/v7y2Kx5Lp9oWzZsipcuLDWr1+vxx9/XNLtM+8tW7bU999/r59++sl6Zn79+vUKDw93YLX/U7x4cU2ePFm9evVS06ZNFRQUpJiYGDVt2lTPPfeco8uzat68uQICAjRv3jwNGTLE2n7lyhXFxsbm2i/Kshtn5B8iX375pcqVK6eyZcuqc+fOmjNnjin+EN67d682b94sNzc3R5dil7Ozs0aNGqUpU6bkmiFI96Jhw4aqUqWKFi9e7OhSrC5cuKCVK1eqZ8+eNgEuWZ48eXK+qAdAz5495ebmpu+++05hYWEqUaKEnnzySa1Zs0YnT57Uf/7zH0eXaPX333/ru+++S3UfkJTrLl8Bkr355psaOXKkli1blutCfLLu3btr7ty51p/nzJmjbt26ObCiB5enp6cSEhIcXUYKPj4+8vHx0ZIlS3Tjxg1Hl2N64eHhWrdunfXndevWqUGDBgoLC7O2//vvv/r5559zTZCXpOjoaDVq1Ejdu3fX1KlTtXfvXpsz9LmBi4uLunTponnz5tlkl9jYWCUmJqpjx44OrM5xCPIPkdmzZ6tz586Sbg/7io+P14YNGxxclX3Lli2Tj4+PPDw8FBoaqrNnz6p///6OLitVrVu3VtWqVXPt8P/0lCtXTkeOHHF0GVZ//vmnDMNQuXLlHF1KhiXvs3e+Ro0a5eiyrC5cuKBVq1bplVdekaenp820wMBAderUSQsXLsw1X+4l7wNly5a1aS9QoIB1+7755psOqs4+e/vAk08+6eiykMNWrFihcePG6ZtvvlGjRo0cXU6qOnfurB9//FFHjx7V0aNHtWnTJuvfCLnR3b9f7dq1c3RJ6TIMQ2vWrNGqVavUsGFDR5eTgouLi+bNm6ePP/5YefLkUd26dTV48OCH8lrjrBAeHq5Nmzbp1q1b+ueff7Rz506FhYWpfv36Wr9+vaTb96q6ceNGrgrykjRr1izt3btXffr00axZsxQQEODoklLo3r27Dh06ZJNd5s6dqzZt2sjf39+BlTkOQ+sfEvv379fWrVv19ddfS7p98O7QoYNmz56d4iYcuUF4eLhmzJihq1ev6v3335eLi4vatGnj6LLSNHbsWDVs2DBXXKt1rwzDyFVnN3NLmLwXyfvsnXLTMPWDBw/KMAyVL1/e7vTy5cvr4sWLOnfunAoWLJjD1WXc1q1blZSUpE6dOuW6M0j29oGff/45V4cjZL3KlSvr/PnzGjZsmGrWrCkfHx9Hl2RXQECAoqKirGe4oqKiVKBAAUeXlaq7f79SG6mTGyR/6XDz5k0lJSXp2WefzRU3ZLOnTZs2ioqK0saNG/XTTz9Zv4j66KOPct1ND3O7Bg0a6OrVq/rll1908eJFPfLIIwoICFBYWJi6deum69eva/369SpVqlSuuxdFwYIF1aNHDy1ZsiTX3ly0XLlyqlOnjubMmaMGDRrozz//1MaNG/X22287ujSHIcg/JGbPnq1bt26pSJEi1jbDMOTu7q6pU6fmum+yvL29Vbp0aUm3h/tVqVJFs2fPVkxMjIMrS139+vUVERGhQYMGme4fv3379uWq6yLLlCkji8WiP/74w9GlZNid+2xult6XJLnlEpbSpUvLYrFo//79Nu2lSpWSpBSjCnIDe/uAGS+3wf0pWrSovvrqK4WHhysyMlIrVqyQr6+vo8uyq3v37urVq5ckadq0aQ6uJm1mOcZK//vSwc3NTUWKFJGLS+7+c9vDw0NNmjRRkyZN9NZbb+n555/XsGHDTPe3jKOVLl1axYoV07p163Tx4kXrfQeKFCmi4sWLa/PmzVq3bl2uHJ0h3T7Jl9v31ZiYGL366quaNm2a5s6dq5CQEOt2fhgxtP4hcOvWLc2fP18TJkzQr7/+an3t2rVLRYoUyZV31r6Tk5OTBg8erCFDhtjcYTc3GjNmjP7v//5PW7ZscXQpGfb9999rz549uWrEQ758+RQREaFp06bp6tWrKabn1kd55WbJwXjfvn12p+/bt08BAQG55v4D+fPnV5MmTTR16lS7+wCQm5UsWVIbNmxQXFycIiMj9c8//zi6JLsiIyOVkJCgmzdv2n0EFTIn+UuHEiVK5PpgZE+FChU47mZSeHi41q9fr/Xr19uMeK1fv75WrFihrVu35rph9WbSvn17OTk5acGCBZo/f766d++eq0aU5jSC/ENg2bJlunjxomJiYlSpUiWbV5s2bTR79mxHl5iudu3aydnZOdefMQgNDVWnTp30wQcfOLoUu27cuKG4uDidPHlSO3bs0KhRo9SyZUs1b95cXbp0cXR5NqZNm6bExETVrFlTixYt0sGDB7Vv3z598MEHql27tqPLSyF52975On/+vKPLskoOxtOnT0/xhVhcXJw+++yzXHf2Zfr06bp165Zq1KihhQsXat++fdq/f78+/fRT/fHHH3J2dnZ0icgh8fHxNl9E//rrrzp+/Lijy0pT8eLFtX79ep09e1YRERG6fPmyo0tKwdnZWfv27dPvv//O79ND6O+//1bDhg316aefavfu3Tp8+LBiY2M1btw4tWzZ0tHlmVJ4eLh+/PFH/frrrzZnisPCwvThhx8qISGBIH8ffHx81KFDBw0aNEinT5/OdX+35DSC/ENg9uzZaty4sd3h823atNG2bdty/Y1NXFxc1KtXL40bNy7Xf0v89ttvKykpydFl2LVy5UoVLlxYQUFBioyM1Lp16/TBBx/om2++yXV/xJUqVUo7duxQeHi43njjDVWqVElNmjTR2rVrU1yHnBskb9s7X/Xq1XN0WTamTp2qGzduKCIiQj/88IOOHz+ulStXqkmTJnrkkUc0dOhQR5doIyQkRDt37lTjxo01aNAgValSRTVq1NCUKVPUr18/jRw50tElIoesX79ejz76qM1rxIgRji4rXcWKFdP69et1/vz5XBvm/fz85Ofn5+gy4AA+Pj6qVauW3n//fdWvX1+VKlXSW2+9pRdeeEFTp051dHmmFB4ern///VelS5dWoUKFrO1hYWH6559/rI+pQ+bFxMTo4sWLioiIsLlk+GFkMcx4VykAQKYcOXJEw4cP18qVK3X27FkZhqGnn35an3zyiby8vBxdHgAAADKAIA8AD7Fhw4Zp4sSJWr16tR5//HFHlwMAAIAMIMgDwENu7ty5io+P12uvvSYnJ664AgAAyO0I8gAAAAAAmAinXgAAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAIAUGjRooD59+ji6jHTNmzdPefLkSbPP8OHDVbVq1RypBwCAnECQBwDA5LZs2SJnZ2dFRUVl2TwXL16skSNHZtn80hMRESFnZ2f98ssv9/S+Dh066MCBA9lUFQAAuRNBHgAAk5s9e7ZeffVV/fDDDzp16lSWzDNfvnzy9fXNknml59ixY9q8ebN69eqlOXPm3NN7PT09VbBgwWyqDACA3IkgDwCAiV25ckULFy7Uyy+/rKioKM2bN89m+vr162WxWLRq1So9+uij8vT0VMOGDXX27FmtWLFC5cuXl5+fn5599lldu3bN+r67h9YHBQVp1KhR6t69u3x9fVWiRAnNmjXLZll79uxRw4YN5enpqfz58+vFF1/UlStX0l2HuXPnqnnz5nr55Zf1+eef699//7WZfunSJfXo0UOFChWSh4eHKlWqpGXLlkmyP7R+zJgxKlSokHx9fRUTE6Pr169nYEsCAGAeBHkAAEzsyy+/VLly5VS2bFl17txZc+bMkWEYKfoNHz5cU6dO1ebNm3X8+HG1b99ekyZN0oIFC7R8+XJ99913mjJlSprLmjBhgmrUqKGdO3fqlVde0csvv6z9+/dLkq5evaqIiAjlzZtXv/zyi2JjY7VmzRr16tUrzXkahqG5c+eqc+fOKleunEqXLq2vvvrKOj0pKUlPPvmkNm3apE8//VS///67xowZI2dn51S3x/DhwzVq1Cht27ZNhQsX1vTp09PbjAAAmApBHgAAE5s9e7Y6d+4sSYqMjFR8fLw2bNiQot8777yjunXr6tFHH1VMTIw2bNigGTNm6NFHH9UTTzyhtm3bat26dWkuq1mzZnrllVdUunRpvfnmmypQoID1PQsWLND169c1f/58VapUSQ0bNtTUqVP1ySef6MyZM6nOc82aNbp27ZoiIiIkSZ07d9bs2bNtpm/dulWLFy9WkyZNVKpUKTVv3lxPPvmk3flNmjRJMTExiomJUdmyZfXOO++oQoUKaW9EAABMhiAPAIBJ7d+/X1u3blXHjh0lSS4uLurQoYNNEE5WuXJl6/8XKlRIXl5eKlWqlE3b2bNn01zenfOwWCwKDAy0vmffvn2qUqWKvL29rX3q1q2rpKQk61l7e+bMmaMOHTrIxcVFktSxY0dt2rRJhw4dkiT9+uuvKlasmB555JE0a0u2b98+1apVy6atdu3aGXovAABmQZAHAMCkZs+erVu3bqlIkSJycXGRi4uLZsyYoUWLFik+Pt6mr6urq/X/LRaLzc/JbUlJSWkuLzPvScuFCxf09ddfa/r06db6ixYtqlu3bllveufp6Znp+QMA8KAiyAMAYEK3bt3S/PnzNWHCBP3666/W165du1SkSBF9/vnnOVpP+fLltWvXLl29etXatmnTJjk5Oals2bJ23/PZZ5+pWLFi2rVrl806TJgwQfPmzVNiYqIqV66sEydOZPgRc+XLl9fPP/9s0/bTTz9lfsUAAMiFCPIAAJjQsmXLdPHiRcXExKhSpUo2rzZt2tgdXp+dOnXqJA8PD0VHR2vv3r1at26dXn31VT333HMqVKiQ3ffMnj1bbdu2TVF/TEyMzp8/r5UrVyosLEz169dXmzZttHr1ah0+fFgrVqzQypUr7c6zd+/emjNnjubOnasDBw5o2LBh+u2337Jz1QEAyHEEeQAATGj27Nlq3Lix/P39U0xr06aNtm3bpt27d+dYPV5eXlq1apUuXLigxx57TG3btlWjRo00depUu/23b9+uXbt2qU2bNimm+fv7q1GjRtYvIxYtWqTHHntMHTt2VIUKFTRgwAAlJibanW+HDh301ltvacCAAapevbqOHj2ql19+OetWFACAXMBi2HtGDQAAAAAAyJU4Iw8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJvL/AEDgNCp53Pm5AAAAAElFTkSuQmCC",
|
| 64 |
+
"text/plain": [
|
| 65 |
+
"<Figure size 1200x600 with 1 Axes>"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"metadata": {},
|
| 69 |
+
"output_type": "display_data"
|
| 70 |
+
}
|
| 71 |
+
],
|
| 72 |
+
"source": [
|
| 73 |
+
"amino_acid_frequencies = {}\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"datasets = {'Train': train, 'Test': test, 'Val': val}\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"for name, df in datasets.items():\n",
|
| 79 |
+
" # Count total occurrences of each amino acid in each dataset\n",
|
| 80 |
+
" amino_acid_frequencies[name] = df[valid_residues].sum() / df['Length'].sum()\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"# Convert frequencies to a dataframe for easier manipulation\n",
|
| 83 |
+
"freq_df = pd.DataFrame(amino_acid_frequencies)\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"# Plot the histogram comparing amino acid frequencies\n",
|
| 86 |
+
"plt.figure(figsize=(12, 6))\n",
|
| 87 |
+
"\n",
|
| 88 |
+
"# Set bar width\n",
|
| 89 |
+
"bar_width = 0.2\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"# Generate positions for the bars\n",
|
| 92 |
+
"amino_acids = list(valid_residues)\n",
|
| 93 |
+
"x = np.arange(len(amino_acids)) # positions for the amino acids\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"# Plot the bars for each dataset with an offset\n",
|
| 96 |
+
"plt.bar(x - bar_width, freq_df['Train'], width=bar_width, label='Train', align='center')\n",
|
| 97 |
+
"plt.bar(x, freq_df['Test'], width=bar_width, label='Test', align='center')\n",
|
| 98 |
+
"plt.bar(x + bar_width, freq_df['Val'], width=bar_width, label='Val', align='center')\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"plt.xticks(x, amino_acids)\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"plt.title('UniRef50 (200k Sequences) - Train, Test, and Val Residue Frequencies')\n",
|
| 103 |
+
"plt.xlabel('Amino Acid')\n",
|
| 104 |
+
"plt.ylabel('Frequency')\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"plt.legend()\n",
|
| 107 |
+
"plt.show()"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "code",
|
| 112 |
+
"execution_count": 6,
|
| 113 |
+
"metadata": {},
|
| 114 |
+
"outputs": [
|
| 115 |
+
{
|
| 116 |
+
"data": {
|
| 117 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAIjCAYAAABViau2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABzUklEQVR4nO3de3zP9f//8ft7Zpuxg9NOYWbkfD41csphWHIsoVhWUpQz+SSRiggpISXrQGUlidDMmSWnOeWUJjkMkc3Ixvb6/dF3r5+3Ddtse8/b7Xq5vC8X7+fr8X4+H6/3Xnvb4/18vl4vi2EYhgAAAAAAgN1ysHUCAAAAAAAgd1H8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDyDdCQ0NVtmzZXB/n+vXrGjlypEqXLi0HBwd16tQp18e8WWpqqqpVq6a33norz8e+lbJly+rRRx+1dRq4jd9++02Ojo7at2+frVPJlnHjxslisdg6jftW8+bN1bx58zwfN68+22GfLBaLBg4caOs0ALtA8Q8gS9L+eP/7778z3F6tWrUc/ePSYrFYPdzd3dWsWTMtX748231++umnmjJlirp166bPPvtMQ4YMkfRf8XvzeBaLRf3790/Xx8WLF9WvXz+VLFlShQsXVosWLbRz585M5/DVV1/pr7/+svqDZtu2bRo4cKCqVq2qwoULq0yZMnriiSd0+PDhDPs4cOCA2rZtqyJFiqhYsWJ6+umnde7cuXRxqampmjx5sgICAuTi4qIaNWroq6++ynSumZGamqrPP/9cDRs2VLFixeTm5qYHH3xQvXv31i+//JKjY93PqlSpopCQEI0dOzZH+73VsX/zIzw8PEfHzWnh4eGZ2o+cKkS3bNmicePG6eLFiznSn63t3LlTFotFY8aMuWXMkSNHZLFYNHTo0Bwfn8+R3BMaGqoiRYrYOo1bsrffJSC/crR1AgCQ5uOPP1Zqamq69tatW6t3794yDEN//vmnZs+erQ4dOmjFihUKDg7O8jhr1qzRAw88oOnTp6fbVqtWLQ0bNsyq7cEHH7R6npqaqpCQEO3evVsjRoxQiRIlNGvWLDVv3lw7duxQhQoV7pjDlClT9OSTT8rDw8Nse+edd7R582Y9/vjjqlGjhuLi4jRz5kzVqVNHv/zyi6pVq2bGnjhxQk2bNpWHh4fefvttJSYm6t1339XevXv166+/ysnJyYx99dVXNWnSJD333HOqX7++fvjhB/Xs2VMWi0VPPvlkpt+323n55Zf14YcfqmPHjurVq5ccHR116NAhrVixQuXKldNDDz2UI+NA6t+/v9q3b6+jR48qMDAwR/p87733lJiYaD7/6aef9NVXX2n69OkqUaKE2d6oUaO7GmfMmDF65ZVX7qqP22natKm++OILq7Znn31WDRo0UL9+/cy2nCqCtmzZovHjxys0NFSenp450qct1alTR5UqVdJXX32lN998M8OYhQsXSpKeeuqpHB+fz5H7l739LgH5lgEAWfD6668bkoxz585luL1q1apGs2bNcmw8ScaAAQOs2n777TdDktGuXbts9dmiRQujatWq6dr9/f2NkJCQO77+m2++MSQZERERZtvZs2cNT09Po0ePHnd8/c6dOw1JxurVq63aN2/ebCQlJVm1HT582HB2djZ69epl1f7CCy8YhQoVMv7880+zLTIy0pBkfPTRR2bbiRMnjIIFC1q9h6mpqUaTJk2MUqVKGdevXzfbM7v/N4uLizMsFovx3HPPpduWmppqnDlzJst94taSk5ONokWLGq+99lqujTFlyhRDkhEbG3vbuMTExFzLIacULlzY6NOnT670ndn3Kb9o1qzZHT+fJ0yYYEgyoqOjM9xesWJFo1KlSlkat0+fPoa/v/9tY/gcyV19+vQxChcubOs0bul2v0sZ/R0AIHtY9g8gV61bt04Wi0WLFi3SW2+9pVKlSsnFxUUtW7bU77//bhWb2fNCK1eurBIlSujo0aNW7UlJSXr99ddVvnx5OTs7q3Tp0ho5cqSSkpIkSceOHZPFYtHatWu1f/9+cwnwunXrrPpJTk7W5cuXbzn+t99+K29vb3Xp0sVsK1mypJ544gn98MMP5ni3smTJEjk5Oalp06ZW7Y0aNbKasZekChUqqGrVqjpw4IBV+3fffadHH31UZcqUMdtatWqlBx98UIsWLTLbfvjhB127dk0vvvii2WaxWPTCCy/oxIkTio6Ovm2un332mRwdHTVixIhbxsTGxsowDDVu3DjdNovFIi8vL6u2ixcvavDgwSpdurScnZ1Vvnx5vfPOO+lWfVy8eFGhoaHy8PCQp6en+vTpo5iYmHTLz291HnNGx1Nqaqree+89Va1aVS4uLvL29tbzzz+vf/75xyou7foHmzZtUoMGDeTi4qJy5crp888/TzfOxYsXNWTIEJUtW1bOzs4qVaqUevfubXVqzJ2OzTSRkZF6+OGH5enpqSJFiqhixYr63//+ZxVTsGBBNW/eXD/88EO6XHJT2rLho0ePqn379nJzc1OvXr0kSRs3btTjjz+uMmXKmPs3ZMgQ/fvvv1Z9ZHTOf9r5vEuWLFG1atXk7OysqlWrauXKlbm2LydPnlTfvn3l7e1tjvfpp5+mi/vggw9UtWpVubq6qmjRoqpXr5458z1u3Djz9yIgIMD8PDl27FiWcrlw4YKGDx+u6tWrq0iRInJ3d1e7du20e/duq7isfJZK0ty5cxUYGKhChQqpQYMG2rhxY6bySfuZpu3njXbs2KFDhw6ZMT/88INCQkLk5+cnZ2dnBQYGasKECUpJScnSeyDxOZKTnyN3Y+vWrWrbtq08PDzk6uqqZs2aafPmzVYxab/Hv//+uzlT7+HhoWeeeUZXrlyxiv3333/18ssvq0SJEnJzc9Njjz2mkydPymKxaNy4cWZ/mfldutNnxKVLlzR48GDzPfTy8lLr1q2zdEoeYO9Y9g8gT0yaNEkODg4aPny44uPjNXnyZPXq1Utbt27Ncl/x8fH6559/rJY8p6am6rHHHtOmTZvUr18/Va5cWXv37tX06dN1+PBhLVmyRCVLltQXX3yht956S4mJiZo4caKk/75MSLNmzRq5uroqJSVF/v7+GjJkiAYNGmQ1/q5du1SnTh05OFh/f9qgQQPNnTtXhw8fVvXq1W+Z/5YtW1StWjUVLFjwjvtqGIbOnDmjqlWrmm0nT57U2bNnVa9evXTxDRo00E8//WSVa+HCha32MS0ubfvDDz+c4dhz585V//799b///e+WS4Alyd/fX5IUERGhxx9/XK6urreMvXLlipo1a6aTJ0/q+eefV5kyZbRlyxaNHj1ap0+f1nvvvWfud8eOHbVp0yb1799flStX1vfff68+ffrcsu/MeP755xUeHq5nnnlGL7/8smJjYzVz5kzt2rVLmzdvtvqZ/P777+rWrZvCwsLUp08fffrppwoNDVXdunXNn0diYqKaNGmiAwcOqG/fvqpTp47+/vtvLV26VCdOnFCJEiUydWxK0v79+/Xoo4+qRo0aeuONN+Ts7Kzff/893R/eklS3bl398MMPSkhIkLu7+129J1lx/fp1BQcH6+GHH9a7775r/qwjIiJ05coVvfDCCypevLh+/fVXffDBBzpx4oQiIiLu2O+mTZu0ePFivfjii3Jzc9P777+vrl276vjx4ypevHiO7sOZM2f00EMPmV86lCxZUitWrFBYWJgSEhI0ePBgSf+dhvTyyy+rW7duGjRokK5evao9e/Zo69at6tmzp7p06aLDhw+nOz2iZMmSWcrnjz/+0JIlS/T4448rICBAZ86c0UcffaRmzZrpt99+k5+fn1V8Zj5L582bp+eff16NGjXS4MGD9ccff+ixxx5TsWLFVLp06dvmExAQoEaNGmnRokWaPn26ChQoYG5L+0KgZ8+ekv67xkKRIkU0dOhQFSlSRGvWrNHYsWOVkJCgKVOmZOl94HMkZz5H7saaNWvUrl071a1bV6+//rocHBw0f/58PfLII9q4caP5/0aaJ554QgEBAZo4caJ27typTz75RF5eXnrnnXfMmNDQUC1atEhPP/20HnroIa1fv14hISFW/WTmdykznxH9+/fXt99+q4EDB6pKlSo6f/68Nm3apAMHDqhOnTp3/f4AdsGm6w4A3HOyuux/7dq1hiSjcuXKVkvaZ8yYYUgy9u7da7ZltDRUkhEWFmacO3fOOHv2rLF9+3ajbdu2hiRjypQpZtwXX3xhODg4GBs3brR6/Zw5cwxJxubNm822Zs2aZbjsv0OHDsY777xjLFmyxJg3b57RpEkTQ5IxcuRIq7jChQsbffv2Tff65cuXG5KMlStXZvjepClVqpTRtWvX28bcuF+SjHnz5plt27ZtMyQZn3/+ebr4ESNGGJKMq1evGoZhGCEhIUa5cuXSxV2+fNmQZLzyyitm243L/mfMmGFYLBZjwoQJmcqzd+/ehiSjaNGiRufOnY13333XOHDgQLq4CRMmGIULFzYOHz5s1f7KK68YBQoUMI4fP24YhmEsWbLEkGRMnjzZjLl+/br5M5k/f77ZfqulzDcfTxs3bjQkGQsWLLCKW7lyZbp2f39/Q5KxYcMGs+3s2bOGs7OzMWzYMLNt7NixhiRj8eLF6cZPTU01DCPzx+b06dNv+7t1o4ULFxqSjK1bt94xNjsyWoLbp0+fdMdMmitXrqRrmzhxomGxWKxOTUn7/LiRJMPJycn4/fffzbbdu3cbkowPPvjgrvfl5mX/YWFhhq+vr/H3339bxT355JOGh4eHuS8dO3bM8HPiRjmx7P/q1atGSkqKVVtsbKzh7OxsvPHGG2ZbZj9Lk5OTDS8vL6NWrVpWcXPnzjUkZeq0rA8//NCQZKxatcpsS0lJMR544AEjKCjIbMvo5/78888brq6u5meQYWRu2b9h8DmSE58jt3KnZf+pqalGhQoVjODgYHNMw/jvZxwQEGC0bt3abEv7Pb75/8HOnTsbxYsXN5/v2LHDkGQMHjzYKi40NNSQZLz++utm252W/WfmM8LDw4PTA4A7YNk/gDzxzDPPWC1pb9KkiaT/Zr3uZN68eSpZsqS8vLxUr149RUVFaeTIkVZXm46IiFDlypVVqVIl/f333+bjkUcekSStXbv2juMsXbpUI0eOVMeOHdW3b1+tX79ewcHBmjZtmk6cOGHG/fvvv3J2dk73ehcXF3P77Zw/f15Fixa9Yz4HDx7UgAEDFBQUZDVTldZ/ZnLITq6TJ0/WoEGD9M4779z2qt83mj9/vmbOnKmAgAB9//33Gj58uCpXrqyWLVvq5MmTZlxERISaNGmiokWLWv2cWrVqpZSUFG3YsEHSfxecc3R01AsvvGC+tkCBAnrppZcylU9GIiIi5OHhodatW1uNXbduXRUpUiTdMVKlShXzOJX+m4WqWLGi1TH73XffqWbNmurcuXO68dKWt2f22Ey7yNUPP/yQ4YUvb5R2/Nzqrhu56cafSZpChQqZ/758+bL+/vtvNWrUSIZhaNeuXXfss1WrVlYreWrUqCF3d/dMfT5khWEY+u6779ShQwcZhmH18wgODlZ8fLy5RNjT01MnTpzQtm3bcjSHmzk7O5uriFJSUnT+/HnzlI+Mlivf6bN0+/btOnv2rPr3728Vl7b0PTO6d++uggULWi39X79+vU6ePGku+Zesf+6XLl3S33//rSZNmujKlSs6ePBgpsa6EZ8jd/85kl0xMTE6cuSIevbsqfPnz5v9X758WS1bttSGDRvSfS7dfCecJk2a6Pz580pISJAkc1n+jaedScrW+5+ZzwhPT09t3bpVp06dynL/wP2CZf8AclxG9/G+8dx06f8XLzefI5mRjh07auDAgUpOTta2bdv09ttv68qVK1bL7o8cOaIDBw7ccsnt2bNns7ILkv7bjyFDhmjVqlVat26deXXrQoUKZXiO5dWrV83td2IYxm23x8XFKSQkRB4eHvr222+tlt6m9Z+ZHLKa6/r167V8+XKNGjXqtuf538zBwUEDBgzQgAEDdP78eW3evFlz5szRihUr9OSTT5rnGx85ckR79uy548/pzz//lK+vb7qrslesWDHTOd3syJEjio+PT3fu8M1jp7n5mJX+O25vPGaPHj2qrl273nHczByb3bt31yeffKJnn31Wr7zyilq2bKkuXbqoW7du6U4xSTt+MvpdS5OcnKwLFy5YtZUsWdLqWMoqR0dHlSpVKl378ePHNXbsWC1dujTd73R8fPwd+83Me50Tzp07p4sXL2ru3LmaO3duhjFpP49Ro0Zp9erVatCggcqXL682bdqoZ8+eGZ6TfjdSU1M1Y8YMzZo1S7GxsVbny2d0ysOdPkv//PNPSUp315GCBQuqXLlymcqpePHiCg4O1vfff685c+bIxcVFCxculKOjo5544gkzbv/+/RozZozWrFljFnxpMvNzvxmfI7cfN6f/j7u5f0m3PSUiPj7e6ovr2x2L7u7u+vPPP+Xg4KCAgACruPLly2c5v8y8j5MnT1afPn1UunRp1a1bV+3bt1fv3r0zfdwD9wOKfwBZcqfZ7StXrpgxN7pVwXGnIliSSpUqpVatWkmS2rdvrxIlSmjgwIFq0aKFedG91NRUVa9eXdOmTcuwjzud53oraa+7sYjy9fXV6dOn08Wmtd18ju7NihcvftuiJj4+Xu3atdPFixe1cePGdP35+vpajXdzDsWKFTNn+319fbV27VoZhmFVKN4q16pVq+rixYv64osv9Pzzz6f7oy0zihcvrscee0yPPfaYmjdvrvXr1+vPP/+Uv7+/UlNT1bp1a40cOTLD1958W8XMsFgsGR5HN190LDU1VV5eXlqwYEGG/dz8R/XdHLM3j5uZY7NQoULasGGD1q5dq+XLl2vlypX65ptv9Mgjj+jnn3+2yift+LnxNnw327Jli1q0aGHVFhsbe1f3uL9xljpNSkqKWrdurQsXLmjUqFGqVKmSChcurJMnTyo0NPSOqxiknHuv7yQtl6eeeuqWRU6NGjUk/XctkEOHDmnZsmVauXKlvvvuO82aNUtjx47V+PHjcyynt99+W6+99pr69u2rCRMmqFixYnJwcNDgwYMzfO/y6r166qmntGzZMi1btkyPPfaYvvvuO7Vp08b8Pbl48aKaNWsmd3d3vfHGGwoMDJSLi4t27typUaNGZernfjt8jljLrf/jbuxf+u82tLVq1cow5uYvUfLqWMzsWE888YSaNGmi77//Xj///LOmTJmid955R4sXL1a7du1yPCfgXkTxDyBL0i7KdOjQoXR/bFy5ckV//fWX2rRpk6s5PP/885o+fbrGjBmjzp07y2KxKDAwULt371bLli1vOxuaVWlLCm/8g65WrVrauHGjUlNTrQqhrVu3ytXV9Y5/eFaqVEmxsbEZbrt69ao6dOigw4cPa/Xq1apSpUq6mAceeEAlS5bU9u3b02379ddfrf5wq1Wrlj755BMdOHDAqq+0i4Pd/EdeiRIl9O233+rhhx9Wy5YttWnTpjt+mXE79erV0/r163X69Gn5+/srMDBQiYmJ5pc5t+Lv76+oqCglJiZa/cF56NChdLFFixbNcHl42gxomsDAQK1evVqNGzfO1OqMzAgMDNS+ffvuGJPZY9PBwUEtW7ZUy5YtNW3aNL399tt69dVXtXbtWqv3LDY2Vg4ODrc91mrWrKnIyEirNh8fn0zsVdbs3btXhw8f1meffabevXub7TePnR+ULFlSbm5uSklJueMxKEmFCxdW9+7d1b17dyUnJ6tLly566623NHr0aLm4uOTIZ823336rFi1aaN68eVbtFy9evO2XO7eS9hl95MgRc0m4JF27dk2xsbGqWbNmpvp57LHH5ObmpoULF6pgwYL6559/rJb8r1u3TufPn9fixYut7lxyq8+2u8HnSNY+R7KbgyS5u7tn6ncjM9K+qImNjbVaiZLR3Slyap98fX314osv6sUXX9TZs2dVp04dvfXWWxT/wP/hnH8AWdKyZUs5OTlp9uzZ6WZ25s6dq+vXr+f6f7KOjo4aNmyYDhw4YN7u7IknntDJkyf18ccfp4v/999/b3vrPum/mf2bZ3iuXbumSZMmycnJyWoGtVu3bjpz5owWL15stv3999+KiIhQhw4dMjzH/kZBQUHat29fuuX4KSkp6t69u6KjoxUREaGgoKBb9tG1a1ctW7ZMf/31l9kWFRWlw4cP6/HHHzfbOnbsqIIFC2rWrFlmm2EYmjNnjh544AE1atQoXd+lSpXS6tWr9e+//6p169Y6f/78bfcnLi5Ov/32W7r25ORkRUVFycHBwVzm+cQTTyg6OlqrVq1KF3/x4kVdv35d0n8rPK5fv67Zs2eb21NSUvTBBx+ke11gYKAOHjyoc+fOmW27d+9Od5X8J554QikpKZowYUK6Pq5fv66LFy/edj8z0rVrV+3evVvff/99um1pM1KZPTZvXqIv/f8vZ24+Vnbs2KGqVave9hzuokWLqlWrVlaPjFbl3K20GbkbZ+AMw9CMGTNyfKy7VaBAAXXt2lXfffddhsXWjcfQzce9k5OTqlSpIsMwdO3aNUn/fTkgKVvHzo053TxTGhERYXWOe1bUq1dPJUuW1Jw5c5ScnGy2h4eHZynPQoUKqXPnzvrpp580e/ZsFS5cWB07drTKW7L+uScnJ1t91mQFnyM58zmSXXXr1lVgYKDeffddJSYmptt+4/uSWcHBwZKU7pjI6P2/29+llJSUdKeaeHl5yc/PL0dvhQjc65j5B5AlXl5eGjt2rMaMGaOmTZvqsccek6urq7Zs2aKvvvpKbdq0UYcOHXI9j9DQUI0dO1bvvPOOOnXqpKefflqLFi1S//79tXbtWjVu3FgpKSk6ePCgFi1apFWrVmV4a7w0S5cu1Ztvvqlu3bopICBAFy5c0MKFC7Vv3z69/fbbVjOm3bp100MPPaRnnnlGv/32m0qUKKFZs2YpJSUlU8uBO3bsqAkTJmj9+vVWqySGDRumpUuXqkOHDrpw4YK+/PJLq9elXXNAkv73v/8pIiJCLVq00KBBg5SYmKgpU6aoevXqeuaZZ8y4UqVKafDgwZoyZYquXbum+vXra8mSJdq4caMWLFhwy6WU5cuX188//6zmzZsrODhYa9asueUt5U6cOKEGDRrokUceUcuWLeXj46OzZ8/qq6++0u7duzV48GBzBnPEiBFaunSpHn30UfN2V5cvX9bevXv17bff6tixYypRooQ6dOigxo0b65VXXtGxY8dUpUoVLV68OMPziPv27atp06YpODhYYWFhOnv2rObMmaOqVatanYfcrFkzPf/885o4caJiYmLUpk0bFSxYUEeOHFFERIRmzJihbt263fHnd6MRI0bo22+/1eOPP66+ffuqbt26unDhgpYuXao5c+aoZs2amT4233jjDW3YsEEhISHy9/fX2bNnNWvWLJUqVcrqdozXrl3T+vXr011Ey1YqVaqkwMBADR8+XCdPnpS7u7u+++67HD9fX/pvdrBZs2Zat25dtvuYNGmS1q5dq4YNG+q5555TlSpVdOHCBe3cuVOrV682v4Rp06aNfHx81LhxY3l7e+vAgQOaOXOmQkJC5ObmJum/gkmSXn31VT355JMqWLCgOnTooMKFC2vcuHEaP3681q5dm+H949M8+uijeuONN/TMM8+oUaNG2rt3rxYsWJDt85QLFiyoN998U88//7weeeQRde/eXbGxsZo/f36W+3zqqaf0+eefa9WqVerVq5dZoElSo0aNVLRoUfXp00cvv/yyLBaLvvjii2wv+eZzJGc+R27n2rVrGd62tVixYnrxxRf1ySefqF27dqpataqeeeYZPfDAAzp58qTWrl0rd3d3/fjjj1nar7p166pr16567733dP78efNWf4cPH5ZkPdt/u9+lzLh06ZJKlSqlbt26qWbNmipSpIhWr16tbdu2aerUqVnKG7BreXdjAQD25MsvvzQeeugho3Dhwoazs7NRqVIlY/z48Va3dzKM/397qoiICKv22NjYdLdautWt/m51655x48YZkoy1a9cahvHfLa7eeecdo2rVqoazs7NRtGhRo27dusb48eON+Ph483UZ3epv+/btRocOHYwHHnjAcHJyMooUKWI8/PDDxqJFizIc+8KFC0ZYWJhRvHhxw9XV1WjWrJmxbdu2271lVmrUqGGEhYVZtTVr1syQdMvHzfbt22e0adPGcHV1NTw9PY1evXoZcXFx6eJSUlKMt99+2/D39zecnJyMqlWrGl9++WW6uBtv9Zdm69athpubm9G0adMMb+tlGIaRkJBgzJgxwwgODjZKlSplFCxY0HBzczOCgoKMjz/+2Oq2UYZhGJcuXTJGjx5tlC9f3nBycjJKlChhNGrUyHj33XeN5ORkM+78+fPG008/bbi7uxseHh7G008/bezatSvdcWMY/x2P5cqVM5ycnIxatWoZq1atuuXtxebOnWvUrVvXKFSokOHm5mZUr17dGDlypHHq1KnbvheGkfHtwM6fP28MHDjQPHZKlSpl9OnTx+pWcpk5NqOiooyOHTsafn5+hpOTk+Hn52f06NEj3e3MVqxYYUgyjhw5kuHPIyfc6lZ/t7pV2G+//Wa0atXKKFKkiFGiRAnjueeeM2/FdePP6la3+svod9zf39/qFn2XLl0yJBlPPvlklvbl5lv9GYZhnDlzxhgwYIBRunRpo2DBgoaPj4/RsmVLY+7cuWbMRx99ZDRt2tQoXry44ezsbAQGBhojRoyw+iwxjP9uO/fAAw8YDg4OVu/ZsGHDDIvFkuGt6m509epVY9iwYYavr69RqFAho3HjxkZ0dHS6Yy0rn6WGYRizZs0yAgICDGdnZ6NevXrGhg0bbnk7u1u5fv264evra0gyfvrpp3TbN2/ebDz00ENGoUKFDD8/P2PkyJHGqlWrrD6XDSNzt/rjcyRnPkduJe1WnRk9AgMDzbhdu3YZXbp0MY97f39/44knnjCioqLMmFvd8nf+/PnpPjcuX75sDBgwwChWrJhRpEgRo1OnTsahQ4cMScakSZOsXn+r36XMfEYkJSUZI0aMMGrWrGm4ubkZhQsXNmrWrGnMmjXrtu8LcL+xGEYuXJUDAHBbX3zxhQYMGKDjx4+bt3jDnR07dkwBAQGaP3++QkNDbZ2OTXTq1EkWiyXDJcL27KefftKjjz6q3bt3q3r16rZO544aNGggf39/RURE2DoV3ITPEduKiYlR7dq19eWXX1pdRwJA7uOcfwCwgV69eqlMmTL68MMPbZ0K7iEHDhzQsmXLMjzf2N6tXbtWTz755D1R+CckJGj37t164403bJ0KYFMZ3Rnovffek4ODg9WFIgHkDc75BwAbcHBwuOPVnYGbVa5c2byY2f1mypQptk4h09zd3bnIGCBp8uTJ2rFjh1q0aCFHR0etWLFCK1asUL9+/e769oQAso7iHwAAAECOa9SokSIjIzVhwgQlJiaqTJkyGjdunF599VVbpwbclzjnHwAAAAAAO8c5/wAAAAAA2DmKfwAAAAAA7Bzn/OeQ1NRUnTp1Sm5ubrJYLLZOBwAAAABg5wzD0KVLl+Tn5ycHh9vP7VP855BTp05x1VIAAAAAQJ7766+/VKpUqdvGUPznEDc3N0n/venu7u42zgYAAAAAYO8SEhJUunRpsx69HYr/HJK21N/d3Z3iHwAAAACQZzJz6jkX/AMAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHOc8w8AAAAAyBUpKSm6du2ardO4ZxUoUECOjo45cjt5in8AAAAAQI5LTEzUiRMnZBiGrVO5p7m6usrX11dOTk531Q/FPwAAAAAgR6WkpOjEiRNydXVVyZIlc2Tm+n5jGIaSk5N17tw5xcbGqkKFCnJwyP6Z+xT/AAAAAIAcde3aNRmGoZIlS6pQoUK2TueeVahQIRUsWFB//vmnkpOT5eLiku2+uOAfAAAAACBXMON/9+5mtt+qnxzpBQAAAAAA5FsU/wAAAAAA2DnO+QcAAAAA5Imw8G15Ot680Pp5Ol5GypYtq8GDB2vw4ME2zYOZfwAAAADAfc9isdz2MW7cuGz1u23bNvXr1y9nk80GZv4BAAAAAPe906dPm//+5ptvNHbsWB06dMhsK1KkiPlvwzCUkpIiR8c7l9QlS5bM2USziZl/AAAAAMB9z8fHx3x4eHjIYrGYzw8ePCg3NzetWLFCdevWlbOzszZt2qSjR4+qY8eO8vb2VpEiRVS/fn2tXr3aqt+yZcvqvffeM59bLBZ98skn6ty5s1xdXVWhQgUtXbo01/eP4h8AAAAAgEx45ZVXNGnSJB04cEA1atRQYmKi2rdvr6ioKO3atUtt27ZVhw4ddPz48dv2M378eD3xxBPas2eP2rdvr169eunChQu5mjvFPwAAAAAAmfDGG2+odevWCgwMVLFixVSzZk09//zzqlatmipUqKAJEyYoMDDwjjP5oaGh6tGjh8qXL6+3335biYmJ+vXXX3M1d4p/AAAAAAAyoV69elbPExMTNXz4cFWuXFmenp4qUqSIDhw4cMeZ/xo1apj/Lly4sNzd3XX27NlcyTkNF/wDAAAAACATChcubPV8+PDhioyM1Lvvvqvy5curUKFC6tatm5KTk2/bT8GCBa2eWywWpaam5ni+N6L4BwAAAAAgGzZv3qzQ0FB17txZ0n8rAY4dO2bbpG6B4h92KSx8W56ONy+0fp6OBwAAAMD2KlSooMWLF6tDhw6yWCx67bXXcn0GP7so/gEAAAAAecLeJs2mTZumvn37qlGjRipRooRGjRqlhIQEW6eVIYthGIatk7AHCQkJ8vDwUHx8vNzd3W2dzn2PmX8AAADAdq5evarY2FgFBATIxcXF1unc0273XmalDrXp1f4nTpyo+vXry83NTV5eXurUqZMOHTpkFXP16lUNGDBAxYsXV5EiRdS1a1edOXPGKub48eMKCQmRq6urvLy8NGLECF2/ft0qZt26dapTp46cnZ1Vvnx5hYeHp8vnww8/VNmyZeXi4qKGDRvm+q0WAAAAAADICzYt/tevX68BAwbol19+UWRkpK5du6Y2bdro8uXLZsyQIUP0448/KiIiQuvXr9epU6fUpUsXc3tKSopCQkKUnJysLVu26LPPPlN4eLjGjh1rxsTGxiokJEQtWrRQTEyMBg8erGeffVarVq0yY7755hsNHTpUr7/+unbu3KmaNWsqODg412+3AAAAAABAbstXy/7PnTsnLy8vrV+/Xk2bNlV8fLxKliyphQsXqlu3bpKkgwcPqnLlyoqOjtZDDz2kFStW6NFHH9WpU6fk7e0tSZozZ45GjRqlc+fOycnJSaNGjdLy5cu1b98+c6wnn3xSFy9e1MqVKyVJDRs2VP369TVz5kxJUmpqqkqXLq2XXnpJr7zyyh1zZ9l//sKyfwAAAMB2WPafc+xi2f/N4uPjJUnFihWTJO3YsUPXrl1Tq1atzJhKlSqpTJkyio6OliRFR0erevXqZuEvScHBwUpISND+/fvNmBv7SItJ6yM5OVk7duywinFwcFCrVq3MmJslJSUpISHB6gEAAAAAQH6Ub4r/1NRUDR48WI0bN1a1atUkSXFxcXJycpKnp6dVrLe3t+Li4syYGwv/tO1p224Xk5CQoH///Vd///23UlJSMoxJ6+NmEydOlIeHh/koXbp09nYcAAAAAIBclm+K/wEDBmjfvn36+uuvbZ1KpowePVrx8fHm46+//rJ1SgAAAAAAZMjR1glI0sCBA7Vs2TJt2LBBpUqVMtt9fHyUnJysixcvWs3+nzlzRj4+PmbMzVflT7sbwI0xN98h4MyZM3J3d1ehQoVUoEABFShQIMOYtD5u5uzsLGdn5+ztMAAAAAAAecimM/+GYWjgwIH6/vvvtWbNGgUEBFhtr1u3rgoWLKioqCiz7dChQzp+/LiCgoIkSUFBQdq7d6/VVfkjIyPl7u6uKlWqmDE39pEWk9aHk5OT6tataxWTmpqqqKgoMwYAAAAAgHuVTWf+BwwYoIULF+qHH36Qm5ubeX69h4eHChUqJA8PD4WFhWno0KEqVqyY3N3d9dJLLykoKEgPPfSQJKlNmzaqUqWKnn76aU2ePFlxcXEaM2aMBgwYYM7M9+/fXzNnztTIkSPVt29frVmzRosWLdLy5cvNXIYOHao+ffqoXr16atCggd577z1dvnxZzzzzTN6/MQAAAAAA5CCbFv+zZ8+WJDVv3tyqff78+QoNDZUkTZ8+XQ4ODuratauSkpIUHBysWbNmmbEFChTQsmXL9MILLygoKEiFCxdWnz599MYbb5gxAQEBWr58uYYMGaIZM2aoVKlS+uSTTxQcHGzGdO/eXefOndPYsWMVFxenWrVqaeXKlekuAggAAAAAyKaF3fN2vJ7f5O14+ZjFMAzD1knYg6zcXxG5Lyx8W56ONy+0fp6OBwAAAORnt7w3fT4u/i0Wy223v/766xo3bly20rBYLPr+++/VqVOnLL/2lu+lslaH5osL/gEAAAAAYEunT582//3NN99o7NixOnTokNlWpEgRW6SVY/LNrf4AAAAAALAVHx8f8+Hh4SGLxWLV9vXXX6ty5cpycXFRpUqVrE5HT05O1sCBA+Xr6ysXFxf5+/tr4sSJkqSyZctKkjp37iyLxWI+z2vM/AMAAAAAcBsLFizQ2LFjNXPmTNWuXVu7du3Sc889Z15z7v3339fSpUu1aNEilSlTRn/99Zf++usvSdK2bdvk5eWl+fPnq23btipQoIBN9oHiHwAAAACA23j99dc1depUdenSRdJ/F5X/7bff9NFHH6lPnz46fvy4KlSooIcfflgWi0X+/v7ma0uWLClJ8vT0lI+Pj03ylyj+AQAAAAC4pcuXL+vo0aMKCwvTc889Z7Zfv35dHh4ekqTQ0FC1bt1aFStWVNu2bfXoo4+qTZs2tko5QxT/AAAAAADcQmJioiTp448/VsOGDa22pS3hr1OnjmJjY7VixQqtXr1aTzzxhFq1aqVvv/02z/O9FYp/AAAAAABuwdvbW35+fvrjjz/Uq1evW8a5u7ure/fu6t69u7p166a2bdvqwoULKlasmAoWLKiUlJQ8zDo9in8AAAAAAG5j/Pjxevnll+Xh4aG2bdsqKSlJ27dv1z///KOhQ4dq2rRp8vX1Ve3ateXg4KCIiAj5+PjI09NT0n9X/I+KilLjxo3l7OysokWL5vk+UPwDAAAAAPJGz29snUG2PPvss3J1ddWUKVM0YsQIFS5cWNWrV9fgwYMlSW5ubpo8ebKOHDmiAgUKqH79+vrpp5/k4OAgSZo6daqGDh2qjz/+WA888ICOHTuW5/tgMQzDyPNR7VBCQoI8PDwUHx8vd3d3W6dz3wsL35an480LrZ+n4wEAAAD52dWrVxUbG6uAgAC5uLjYOp172u3ey6zUoQ65mSQAAAAAALA9in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DlHWycAAAAAALg/DIwamKfjzWw5M0/Ha968uWrVqqX33nsvT8fNDGb+AQAAAAD3vQ4dOqht27YZbtu4caMsFov27NmTx1nlHIp/AAAAAMB9LywsTJGRkTpx4kS6bfPnz1e9evVUo0YNG2SWMyj+AQAAAAD3vUcffVQlS5ZUeHi4VXtiYqIiIiLUqVMn9ejRQw888IBcXV1VvXp1ffXVV7ZJNhso/gEAAAAA9z1HR0f17t1b4eHhMgzDbI+IiFBKSoqeeuop1a1bV8uXL9e+ffvUr18/Pf300/r1119tmHXmccE/5Imw8G22TgEAAAAAbqtv376aMmWK1q9fr+bNm0v6b8l/165d5e/vr+HDh5uxL730klatWqVFixapQYMGNso485j5BwAAAABAUqVKldSoUSN9+umnkqTff/9dGzduVFhYmFJSUjRhwgRVr15dxYoVU5EiRbRq1SodP37cxllnDsU/AAAAAAD/JywsTN99950uXbqk+fPnKzAwUM2aNdOUKVM0Y8YMjRo1SmvXrlVMTIyCg4OVnJxs65QzheIfAAAAAID/88QTT8jBwUELFy7U559/rr59+8pisWjz5s3q2LGjnnrqKdWsWVPlypXT4cOHbZ1uplH8AwAAAADwf4oUKaLu3btr9OjROn36tEJDQyVJFSpUUGRkpLZs2aIDBw7o+eef15kzZ2ybbBZwwT8AAAAAQJ6Y2XKmrVPIlLCwMM2bN0/t27eXn5+fJGnMmDH6448/FBwcLFdXV/Xr10+dOnVSfHy8jbPNHIp/AAAAAABuEBQUZHW7P0kqVqyYlixZctvXrVu3LveSukss+wcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAABArrj5onnIupx6Dyn+AQAAAAA5qkCBApKk5ORkG2dy77ty5YokqWDBgnfVD7f6AwAAAADkKEdHR7m6uurcuXMqWLCgHByYd84qwzB05coVnT17Vp6enuYXKtlF8Q8AAAAAyFEWi0W+vr6KjY3Vn3/+aet07mmenp7y8fG5634o/gEAAAAAOc7JyUkVKlRg6f9dKFiw4F3P+Keh+AcAAAAA5AoHBwe5uLjYOg2IC/4BAAAAAGD3KP4BAAAAALBzNi3+N2zYoA4dOsjPz08Wi0VLliyx2m6xWDJ8TJkyxYwpW7Zsuu2TJk2y6mfPnj1q0qSJXFxcVLp0aU2ePDldLhEREapUqZJcXFxUvXp1/fTTT7myzwAAAAAA5DWbFv+XL19WzZo19eGHH2a4/fTp01aPTz/9VBaLRV27drWKe+ONN6ziXnrpJXNbQkKC2rRpI39/f+3YsUNTpkzRuHHjNHfuXDNmy5Yt6tGjh8LCwrRr1y516tRJnTp10r59+3JnxwEAAAAAyEM2veBfu3bt1K5du1tuv/l2Bj/88INatGihcuXKWbW7ubnd8tYHCxYsUHJysj799FM5OTmpatWqiomJ0bRp09SvXz9J0owZM9S2bVuNGDFCkjRhwgRFRkZq5syZmjNnzt3sIgAAAAAANnfPnPN/5swZLV++XGFhYem2TZo0ScWLF1ft2rU1ZcoUXb9+3dwWHR2tpk2bysnJyWwLDg7WoUOH9M8//5gxrVq1suozODhY0dHRt8wnKSlJCQkJVg8AAAAAAPKje+ZWf5999pnc3NzUpUsXq/aXX35ZderUUbFixbRlyxaNHj1ap0+f1rRp0yRJcXFxCggIsHqNt7e3ua1o0aKKi4sz226MiYuLu2U+EydO1Pjx43Ni1wAAAAAAyFX3TPH/6aefqlevXunuETl06FDz3zVq1JCTk5Oef/55TZw4Uc7OzrmWz+jRo63GTkhIUOnSpXNtPAAAAAAAsuueKP43btyoQ4cO6ZtvvrljbMOGDXX9+nUdO3ZMFStWlI+Pj86cOWMVk/Y87ToBt4q51XUEJMnZ2TlXv1wAAAAAACCn3BPn/M+bN09169ZVzZo17xgbExMjBwcHeXl5SZKCgoK0YcMGXbt2zYyJjIxUxYoVVbRoUTMmKirKqp/IyEgFBQXl4F4AAAAAAGAbNi3+ExMTFRMTo5iYGElSbGysYmJidPz4cTMmISFBERERevbZZ9O9Pjo6Wu+99552796tP/74QwsWLNCQIUP01FNPmYV9z5495eTkpLCwMO3fv1/ffPONZsyYYbVkf9CgQVq5cqWmTp2qgwcPaty4cdq+fbsGDhyYu28AAAAAAAB5wKbL/rdv364WLVqYz9MK8j59+ig8PFyS9PXXX8swDPXo0SPd652dnfX1119r3LhxSkpKUkBAgIYMGWJV2Ht4eOjnn3/WgAEDVLduXZUoUUJjx441b/MnSY0aNdLChQs1ZswY/e9//1OFChW0ZMkSVatWLZf2HAAAAACAvGMxDMOwdRL2ICEhQR4eHoqPj5e7u7ut08l3wsK32TqFXDUvtL6tUwAAAABwn8lKHXpPnPMPAAAAAACyj+IfAAAAAAA7R/EPAAAAAICds+kF/wB7kdfXNOAaAwAAAACygpl/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmbFv8bNmxQhw4d5OfnJ4vFoiVLllhtDw0NlcVisXq0bdvWKubChQvq1auX3N3d5enpqbCwMCUmJlrF7NmzR02aNJGLi4tKly6tyZMnp8slIiJClSpVkouLi6pXr66ffvopx/cXAAAAAABbsGnxf/nyZdWsWVMffvjhLWPatm2r06dPm4+vvvrKanuvXr20f/9+RUZGatmyZdqwYYP69etnbk9ISFCbNm3k7++vHTt2aMqUKRo3bpzmzp1rxmzZskU9evRQWFiYdu3apU6dOqlTp07at29fzu80AAAAAAB5zGIYhmHrJCTJYrHo+++/V6dOncy20NBQXbx4Md2KgDQHDhxQlSpVtG3bNtWrV0+StHLlSrVv314nTpyQn5+fZs+erVdffVVxcXFycnKSJL3yyitasmSJDh48KEnq3r27Ll++rGXLlpl9P/TQQ6pVq5bmzJmT4dhJSUlKSkoynyckJKh06dKKj4+Xu7v73bwVdiksfJutU7Ar80Lr2zoFAAAAADaWkJAgDw+PTNWh+f6c/3Xr1snLy0sVK1bUCy+8oPPnz5vboqOj5enpaRb+ktSqVSs5ODho69atZkzTpk3Nwl+SgoODdejQIf3zzz9mTKtWrazGDQ4OVnR09C3zmjhxojw8PMxH6dKlc2R/AQAAAADIafm6+G/btq0+//xzRUVF6Z133tH69evVrl07paSkSJLi4uLk5eVl9RpHR0cVK1ZMcXFxZoy3t7dVTNrzO8Wkbc/I6NGjFR8fbz7++uuvu9tZAAAAAAByiaOtE7idJ5980vx39erVVaNGDQUGBmrdunVq2bKlDTOTnJ2d5ezsbNMccP/K69MoOM0AAAAAuLfl65n/m5UrV04lSpTQ77//Lkny8fHR2bNnrWKuX7+uCxcuyMfHx4w5c+aMVUza8zvFpG0HAAAAAOBedk8V/ydOnND58+fl6+srSQoKCtLFixe1Y8cOM2bNmjVKTU1Vw4YNzZgNGzbo2rVrZkxkZKQqVqyookWLmjFRUVFWY0VGRiooKCi3dwkAAAAAgFxn0+I/MTFRMTExiomJkSTFxsYqJiZGx48fV2JiokaMGKFffvlFx44dU1RUlDp27Kjy5csrODhYklS5cmW1bdtWzz33nH799Vdt3rxZAwcO1JNPPik/Pz9JUs+ePeXk5KSwsDDt379f33zzjWbMmKGhQ4eaeQwaNEgrV67U1KlTdfDgQY0bN07bt2/XwIED8/w9AQAAAAAgp9m0+N++fbtq166t2rVrS5KGDh2q2rVra+zYsSpQoID27Nmjxx57TA8++KDCwsJUt25dbdy40epc+wULFqhSpUpq2bKl2rdvr4cfflhz5841t3t4eOjnn39WbGys6tatq2HDhmns2LHq16+fGdOoUSMtXLhQc+fOVc2aNfXtt99qyZIlqlatWt69GQAAAAAA5BKLYRiGrZOwB1m5v+L9KK8vUIecxQX/AAAAgPwnK3XoPXXOPwAAAAAAyDqKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHbOpsX/hg0b1KFDB/n5+clisWjJkiXmtmvXrmnUqFGqXr26ChcuLD8/P/Xu3VunTp2y6qNs2bKyWCxWj0mTJlnF7NmzR02aNJGLi4tKly6tyZMnp8slIiJClSpVkouLi6pXr66ffvopV/YZAAAAAIC8ZtPi//Lly6pZs6Y+/PDDdNuuXLminTt36rXXXtPOnTu1ePFiHTp0SI899li62DfeeEOnT582Hy+99JK5LSEhQW3atJG/v7927NihKVOmaNy4cZo7d64Zs2XLFvXo0UNhYWHatWuXOnXqpE6dOmnfvn25s+MAAAAAAOQhR1sO3q5dO7Vr1y7DbR4eHoqMjLRqmzlzpho0aKDjx4+rTJkyZrubm5t8fHwy7GfBggVKTk7Wp59+KicnJ1WtWlUxMTGaNm2a+vXrJ0maMWOG2rZtqxEjRkiSJkyYoMjISM2cOVNz5szJsN+kpCQlJSWZzxMSEjK/4wAAAAAA5KF76pz/+Ph4WSwWeXp6WrVPmjRJxYsXV+3atTVlyhRdv37d3BYdHa2mTZvKycnJbAsODtahQ4f0zz//mDGtWrWy6jM4OFjR0dG3zGXixIny8PAwH6VLl86BPQQAAAAAIOfdM8X/1atXNWrUKPXo0UPu7u5m+8svv6yvv/5aa9eu1fPPP6+3335bI0eONLfHxcXJ29vbqq+053FxcbeNSduekdGjRys+Pt58/PXXX3e9jwAAAAAA5AabLvvPrGvXrumJJ56QYRiaPXu21bahQ4ea/65Ro4acnJz0/PPPa+LEiXJ2ds61nJydnXO1fwAAAAAAckq+n/lPK/z//PNPRUZGWs36Z6Rhw4a6fv26jh07Jkny8fHRmTNnrGLSnqddJ+BWMbe6jgAAAAAAAPeSfF38pxX+R44c0erVq1W8ePE7viYmJkYODg7y8vKSJAUFBWnDhg26du2aGRMZGamKFSuqaNGiZkxUVJRVP5GRkQoKCsrBvQEAAAAAwDZsuuw/MTFRv//+u/k8NjZWMTExKlasmHx9fdWtWzft3LlTy5YtU0pKinkOfrFixeTk5KTo6Ght3bpVLVq0kJubm6KjozVkyBA99dRTZmHfs2dPjR8/XmFhYRo1apT27dunGTNmaPr06ea4gwYNUrNmzTR16lSFhITo66+/1vbt261uBwgAAAAAwL3KYhiGYavB161bpxYtWqRr79Onj8aNG6eAgIAMX7d27Vo1b95cO3fu1IsvvqiDBw8qKSlJAQEBevrppzV06FCr8/H37NmjAQMGaNu2bSpRooReeukljRo1yqrPiIgIjRkzRseOHVOFChU0efJktW/fPtP7kpCQIA8PD8XHx9/x1IT7UVj4NlungLswL7S+rVMAAAAAcJOs1KHZKv7/+OMPlStXLtsJ2iOK/9uj+L+3UfwDAAAA+U9W6tBsnfNfvnx5tWjRQl9++aWuXr2arSQBAAAAAEDeyNbMf0xMjObPn6+vvvpKycnJ6t69u8LCwtSgQYPcyPGewMz/7THzj6xitQEAAABwe7k+81+rVi3NmDFDp06d0qeffqrTp0/r4YcfVrVq1TRt2jSdO3cuW4kDAAAAAICcd1e3+nN0dFSXLl0UERGhd955R7///ruGDx+u0qVLq3fv3jp9+nRO5QkAAAAAALLpror/7du368UXX5Svr6+mTZum4cOH6+jRo4qMjNSpU6fUsWPHnMoTAAAAAABkk2N2XjRt2jTNnz9fhw4dUvv27fX555+rffv2cnD477uEgIAAhYeHq2zZsjmZKwAAAAAAyIZsFf+zZ89W3759FRoaKl9f3wxjvLy8NG/evLtKDgAAAAAA3L1sFf9Hjhy5Y4yTk5P69OmTne4BAAAAAEAOytY5//Pnz1dERES69oiICH322Wd3nRQAAAAAAMg52Sr+J06cqBIlSqRr9/Ly0ttvv33XSQEAAAAAgJyTreL/+PHjCggISNfu7++v48eP33VSAAAAAAAg52Sr+Pfy8tKePXvSte/evVvFixe/66QAAAAAAEDOyVbx36NHD7388stau3atUlJSlJKSojVr1mjQoEF68sknczpHAAAAAABwF7J1tf8JEybo2LFjatmypRwd/+siNTVVvXv35px/AAAAAADymWwV/05OTvrmm280YcIE7d69W4UKFVL16tXl7++f0/kBAAAAAIC7lK3iP82DDz6oBx98MKdyAQAAAAAAuSBbxX9KSorCw8MVFRWls2fPKjU11Wr7mjVrciQ5AAAAAABw97JV/A8aNEjh4eEKCQlRtWrVZLFYcjovAAAAAACQQ7JV/H/99ddatGiR2rdvn9P5AAAAAACAHJatW/05OTmpfPnyOZ0LAAAAAADIBdkq/ocNG6YZM2bIMIyczgcAAAAAAOSwbC3737Rpk9auXasVK1aoatWqKliwoNX2xYsX50hyAAAAAADg7mWr+Pf09FTnzp1zOhcAAAAAAJALslX8z58/P6fzAAAAAAAAuSRb5/xL0vXr17V69Wp99NFHunTpkiTp1KlTSkxMzLHkAAAAAADA3cvWzP+ff/6ptm3b6vjx40pKSlLr1q3l5uamd955R0lJSZozZ05O5wkAAAAAALIpWzP/gwYNUr169fTPP/+oUKFCZnvnzp0VFRWVY8kBAAAAAIC7l62Z/40bN2rLli1ycnKyai9btqxOnjyZI4kBAAAAAICcka2Z/9TUVKWkpKRrP3HihNzc3O46KQAAAAAAkHOyVfy3adNG7733nvncYrEoMTFRr7/+utq3b59TuQEAAAAAgByQrWX/U6dOVXBwsKpUqaKrV6+qZ8+eOnLkiEqUKKGvvvoqp3MEAAAAAAB3IVvFf6lSpbR79259/fXX2rNnjxITExUWFqZevXpZXQAQAAAAAADYXraKf0lydHTUU089lZO5AAAAAACAXJCt4v/zzz+/7fbevXtnKxkAAAAAAJDzslX8Dxo0yOr5tWvXdOXKFTk5OcnV1ZXiHwAAAACAfCRbV/v/559/rB6JiYk6dOiQHn74YS74BwAAAABAPpOt4j8jFSpU0KRJk9KtCgAAAAAAALaVY8W/9N9FAE+dOpWTXQIAAAAAgLuUrXP+ly5davXcMAydPn1aM2fOVOPGjXMkMQAAAAAAkDOyVfx36tTJ6rnFYlHJkiX1yCOPaOrUqTmRFwAAAAAAyCHZKv5TU1NzOg8AsBIWvi1Px5sXWj9PxwMAAADyUo6e8w8AAAAAAPKfbM38Dx06NNOx06ZNy84QAAAAAAAgh2Rr5n/Xrl369NNP9dFHH2ndunVat26d5s6dq3nz5mnXrl3mIyYm5rb9bNiwQR06dJCfn58sFouWLFlitd0wDI0dO1a+vr4qVKiQWrVqpSNHjljFXLhwQb169ZK7u7s8PT0VFhamxMREq5g9e/aoSZMmcnFxUenSpTV58uR0uURERKhSpUpycXFR9erV9dNPP2XnrQEAAAAAIN/JVvHfoUMHNW3aVCdOnNDOnTu1c+dO/fXXX2rRooUeffRRrV27VmvXrtWaNWtu28/ly5dVs2ZNffjhhxlunzx5st5//33NmTNHW7duVeHChRUcHKyrV6+aMb169dL+/fsVGRmpZcuWacOGDerXr5+5PSEhQW3atJG/v7927NihKVOmaNy4cZo7d64Zs2XLFvXo0UNhYWHatWuXOnXqpE6dOmnfvn3ZeXsAAAAAAMhXLIZhGFl90QMPPKCff/5ZVatWtWrft2+f2rRpo1OnTmU9EYtF33//vXknAcMw5Ofnp2HDhmn48OGSpPj4eHl7eys8PFxPPvmkDhw4oCpVqmjbtm2qV6+eJGnlypVq3769Tpw4IT8/P82ePVuvvvqq4uLi5OTkJEl65ZVXtGTJEh08eFCS1L17d12+fFnLli0z83nooYdUq1YtzZkzJ1P5JyQkyMPDQ/Hx8XJ3d8/y/tu7vL54G5BVXPAPAAAA95qs1KHZmvlPSEjQuXPn0rWfO3dOly5dyk6X6cTGxiouLk6tWrUy2zw8PNSwYUNFR0dLkqKjo+Xp6WkW/pLUqlUrOTg4aOvWrWZM06ZNzcJfkoKDg3Xo0CH9888/ZsyN46TFpI2TkaSkJCUkJFg9AAAAAADIj7JV/Hfu3FnPPPOMFi9erBMnTujEiRP67rvvFBYWpi5duuRIYnFxcZIkb29vq3Zvb29zW1xcnLy8vKy2Ozo6qlixYlYxGfVx4xi3iknbnpGJEyfKw8PDfJQuXTqruwgAAAAAQJ7IVvE/Z84ctWvXTj179pS/v7/8/f3Vs2dPtW3bVrNmzcrpHPOl0aNHKz4+3nz89ddftk4JAAAAAIAMZetWf66urpo1a5amTJmio0ePSpICAwNVuHDhHEvMx8dHknTmzBn5+vqa7WfOnFGtWrXMmLNnz1q97vr167pw4YL5eh8fH505c8YqJu35nWLStmfE2dlZzs7O2dgzAAAAAADyVrZm/tOcPn1ap0+fVoUKFVS4cGFl49qBtxQQECAfHx9FRUWZbQkJCdq6dauCgoIkSUFBQbp48aJ27NhhxqxZs0apqalq2LChGbNhwwZdu3bNjImMjFTFihVVtGhRM+bGcdJi0sYBAAAAAOBelq3i//z582rZsqUefPBBtW/fXqdPn5YkhYWFadiwYZnuJzExUTExMYqJiZH030X+YmJidPz4cVksFg0ePFhvvvmmli5dqr1796p3797y8/Mz7whQuXJltW3bVs8995x+/fVXbd68WQMHDtSTTz4pPz8/SVLPnj3l5OSksLAw7d+/X998841mzJihoUOHmnkMGjRIK1eu1NSpU3Xw4EGNGzdO27dv18CBA7Pz9gAAAAAAkK9kq/gfMmSIChYsqOPHj8vV1dVs7969u1auXJnpfrZv367atWurdu3akqShQ4eqdu3aGjt2rCRp5MiReumll9SvXz/Vr19fiYmJWrlypVxcXMw+FixYoEqVKqlly5Zq3769Hn74Yc2dO9fc7uHhoZ9//lmxsbGqW7euhg0bprFjx6pfv35mTKNGjbRw4ULNnTtXNWvW1LfffqslS5aoWrVq2Xl7AAAAAADIVyxGNtbq+/j4aNWqVapZs6bc3Ny0e/dulStXTn/88Ydq1KihxMTE3Mg1X8vK/RXvR2Hh22ydAnBb80Lr2zoFAAAAIEuyUodma+b/8uXLVjP+aS5cuMBF8AAAAAAAyGeyVfw3adJEn3/+ufncYrEoNTVVkydPVosWLXIsOQAAAAAAcPeydau/yZMnq2XLltq+fbuSk5M1cuRI7d+/XxcuXNDmzZtzOkcAAAAAAHAXsjXzX61aNR0+fFgPP/ywOnbsqMuXL6tLly7atWuXAgMDczpHAAAAAABwF7I883/t2jW1bdtWc+bM0auvvpobOQEAAAAAgByU5Zn/ggULas+ePbmRCwAAAAAAyAXZWvb/1FNPad68eTmdCwAAAAAAyAXZuuDf9evX9emnn2r16tWqW7euChcubLV92rRpOZIcAAAAAAC4e1kq/v/44w+VLVtW+/btU506dSRJhw8ftoqxWCw5lx0AAAAAALhrWSr+K1SooNOnT2vt2rWSpO7du+v999+Xt7d3riQHAAAAAADuXpbO+TcMw+r5ihUrdPny5RxNCAAAAAAA5KxsXfAvzc1fBgAAAAAAgPwnS8W/xWJJd04/5/gDAAAAAJC/Zemcf8MwFBoaKmdnZ0nS1atX1b9//3RX+1+8eHHOZQgAAAAAAO5Klor/Pn36WD1/6qmncjQZAAAAAACQ87JU/M+fPz+38gAAAAAAALnkri74BwAAAAAA8j+KfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAncvS1f4BwF6FhW/L0/HmhdbP0/EAAABwf2PmHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM452joBALgfhYVvy9Px5oXWz9PxAAAAkL8w8w8AAAAAgJ3L98V/2bJlZbFY0j0GDBggSWrevHm6bf3797fq4/jx4woJCZGrq6u8vLw0YsQIXb9+3Spm3bp1qlOnjpydnVW+fHmFh4fn1S4CAAAAAJCr8v2y/23btiklJcV8vm/fPrVu3VqPP/642fbcc8/pjTfeMJ+7urqa/05JSVFISIh8fHy0ZcsWnT59Wr1791bBggX19ttvS5JiY2MVEhKi/v37a8GCBYqKitKzzz4rX19fBQcH58FeAgAAAACQe/J98V+yZEmr55MmTVJgYKCaNWtmtrm6usrHxyfD1//888/67bfftHr1anl7e6tWrVqaMGGCRo0apXHjxsnJyUlz5sxRQECApk6dKkmqXLmyNm3apOnTp1P8AwAAAADuefl+2f+NkpOT9eWXX6pv376yWCxm+4IFC1SiRAlVq1ZNo0eP1pUrV8xt0dHRql69ury9vc224OBgJSQkaP/+/WZMq1atrMYKDg5WdHT0LXNJSkpSQkKC1QMAAAAAgPwo38/832jJkiW6ePGiQkNDzbaePXvK399ffn5+2rNnj0aNGqVDhw5p8eLFkqS4uDirwl+S+TwuLu62MQkJCfr3339VqFChdLlMnDhR48ePz8ndAwAAAAAgV9xTxf+8efPUrl07+fn5mW39+vUz/129enX5+vqqZcuWOnr0qAIDA3Mtl9GjR2vo0KHm84SEBJUuXTrXxgMAAAAAILvumeL/zz//1OrVq80Z/Vtp2LChJOn3339XYGCgfHx89Ouvv1rFnDlzRpLM6wT4+PiYbTfGuLu7ZzjrL0nOzs5ydnbO1r4AAAAAAJCX7plz/ufPny8vLy+FhITcNi4mJkaS5OvrK0kKCgrS3r17dfbsWTMmMjJS7u7uqlKlihkTFRVl1U9kZKSCgoJycA8AAAAAALCNe6L4T01N1fz589WnTx85Ov7/xQpHjx7VhAkTtGPHDh07dkxLly5V79691bRpU9WoUUOS1KZNG1WpUkVPP/20du/erVWrVmnMmDEaMGCAOXPfv39//fHHHxo5cqQOHjyoWbNmadGiRRoyZIhN9hcAAAAAgJx0TxT/q1ev1vHjx9W3b1+rdicnJ61evVpt2rRRpUqVNGzYMHXt2lU//vijGVOgQAEtW7ZMBQoUUFBQkJ566in17t1bb7zxhhkTEBCg5cuXKzIyUjVr1tTUqVP1ySefcJs/AAAAAIBdsBiGYdg6CXuQkJAgDw8PxcfHy93d3dbp5Dth4dtsnQJwX5sXWt/WKQAAACCHZaUOvSdm/gEAAAAAQPZR/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOUdbJwAAyH1h4dvydLx5ofXzdDwAAADcHjP/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzjnaOgEAgP0JC9+Wp+PNC62fp+MBAADca5j5BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7l6+L/3HjxslisVg9KlWqZG6/evWqBgwYoOLFi6tIkSLq2rWrzpw5Y9XH8ePHFRISIldXV3l5eWnEiBG6fv26Vcy6detUp04dOTs7q3z58goPD8+L3QMAAAAAIE/k6+JfkqpWrarTp0+bj02bNpnbhgwZoh9//FERERFav369Tp06pS5dupjbU1JSFBISouTkZG3ZskWfffaZwsPDNXbsWDMmNjZWISEhatGihWJiYjR48GA9++yzWrVqVZ7uJwAAAAAAucXR1gnciaOjo3x8fNK1x8fHa968eVq4cKEeeeQRSdL8+fNVuXJl/fLLL3rooYf0888/67ffftPq1avl7e2tWrVqacKECRo1apTGjRsnJycnzZkzRwEBAZo6daokqXLlytq0aZOmT5+u4ODgPN1XAAAAAAByQ76f+T9y5Ij8/PxUrlw59erVS8ePH5ck7dixQ9euXVOrVq3M2EqVKqlMmTKKjo6WJEVHR6t69ery9vY2Y4KDg5WQkKD9+/ebMTf2kRaT1setJCUlKSEhweoBAAAAAEB+lK+L/4YNGyo8PFwrV67U7NmzFRsbqyZNmujSpUuKi4uTk5OTPD09rV7j7e2tuLg4SVJcXJxV4Z+2PW3b7WISEhL077//3jK3iRMnysPDw3yULl36bncXAAAAAIBcka+X/bdr1878d40aNdSwYUP5+/tr0aJFKlSokA0zk0aPHq2hQ4eazxMSEvgCAAAAAACQL+Xrmf+beXp66sEHH9Tvv/8uHx8fJScn6+LFi1YxZ86cMa8R4OPjk+7q/2nP7xTj7u5+2y8YnJ2d5e7ubvUAAAAAACA/uqeK/8TERB09elS+vr6qW7euChYsqKioKHP7oUOHdPz4cQUFBUmSgoKCtHfvXp09e9aMiYyMlLu7u6pUqWLG3NhHWkxaHwAAAAAA3OvydfE/fPhwrV+/XseOHdOWLVvUuXNnFShQQD169JCHh4fCwsI0dOhQrV27Vjt27NAzzzyjoKAgPfTQQ5KkNm3aqEqVKnr66ae1e/durVq1SmPGjNGAAQPk7OwsSerfv7/++OMPjRw5UgcPHtSsWbO0aNEiDRkyxJa7DgAAAABAjsnX5/yfOHFCPXr00Pnz51WyZEk9/PDD+uWXX1SyZElJ0vTp0+Xg4KCuXbsqKSlJwcHBmjVrlvn6AgUKaNmyZXrhhRcUFBSkwoULq0+fPnrjjTfMmICAAC1fvlxDhgzRjBkzVKpUKX3yySfc5g8A7iFh4dvyfMx5ofXzfEwAAIDsshiGYdg6CXuQkJAgDw8PxcfHc/5/BmzxhzkA5CaKfwAAYGtZqUPz9bJ/AAAAAABw9yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM452joBAADuRWHh2/J0vHmh9fN0PAAAYF+Y+QcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOUdbJwAAAO4sLHxbno43L7R+no4HAAByFzP/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2jnP+70N5fd4oAAAAAMC2mPkHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ1ztHUCAAAg/wkL35an480LrZ+n4wEAcL/J1zP/EydOVP369eXm5iYvLy916tRJhw4dsopp3ry5LBaL1aN///5WMcePH1dISIhcXV3l5eWlESNG6Pr161Yx69atU506deTs7Kzy5csrPDw8t3cPAAAAAIA8ka+L//Xr12vAgAH65ZdfFBkZqWvXrqlNmza6fPmyVdxzzz2n06dPm4/Jkyeb21JSUhQSEqLk5GRt2bJFn332mcLDwzV27FgzJjY2ViEhIWrRooViYmI0ePBgPfvss1q1alWe7SsAAAAAALklXy/7X7lypdXz8PBweXl5aceOHWratKnZ7urqKh8fnwz7+Pnnn/Xbb79p9erV8vb2Vq1atTRhwgSNGjVK48aNk5OTk+bMmaOAgABNnTpVklS5cmVt2rRJ06dPV3BwcO7tIHLVS2fG5Ol4H3i/mafjAQAAAEBm5euZ/5vFx8dLkooVK2bVvmDBApUoUULVqlXT6NGjdeXKFXNbdHS0qlevLm9vb7MtODhYCQkJ2r9/vxnTqlUrqz6Dg4MVHR19y1ySkpKUkJBg9QAAAAAAID/K1zP/N0pNTdXgwYPVuHFjVatWzWzv2bOn/P395efnpz179mjUqFE6dOiQFi9eLEmKi4uzKvwlmc/j4uJuG5OQkKB///1XhQoVSpfPxIkTNX78+BzdR+SsqZ7n83Q8pzwdDQAAAAAy754p/gcMGKB9+/Zp06ZNVu39+vUz/129enX5+vqqZcuWOnr0qAIDA3Mtn9GjR2vo0KHm84SEBJUuXTrXxgMAwJ5xdwEAAHLXPVH8Dxw4UMuWLdOGDRtUqlSp28Y2bNhQkvT7778rMDBQPj4++vXXX61izpw5I0nmdQJ8fHzMthtj3N3dM5z1lyRnZ2c5Oztna3/uR8lJL9g6BQAAAAC4b+Xrc/4Nw9DAgQP1/fffa82aNQoICLjja2JiYiRJvr6+kqSgoCDt3btXZ8+eNWMiIyPl7u6uKlWqmDFRUVFW/URGRiooKCiH9gQAAAAAANvJ18X/gAED9OWXX2rhwoVyc3NTXFyc4uLi9O+//0qSjh49qgkTJmjHjh06duyYli5dqt69e6tp06aqUaOGJKlNmzaqUqWKnn76ae3evVurVq3SmDFjNGDAAHPmvn///vrjjz80cuRIHTx4ULNmzdKiRYs0ZMgQm+07AAAAAAA5JV8X/7Nnz1Z8fLyaN28uX19f8/HNN99IkpycnLR69Wq1adNGlSpV0rBhw9S1a1f9+OOPZh8FChTQsmXLVKBAAQUFBempp55S79699cYbb5gxAQEBWr58uSIjI1WzZk1NnTpVn3zyCbf5AwAAAADYhXx9zr9hGLfdXrp0aa1fv/6O/fj7++unn366bUzz5s21a9euLOUHAAAAAMC9IF/P/AMAAAAAgLuXr2f+gXvJS2fG5Ol4H3i/mafjAQAAALh3UfwDOWSq5/k8Hc8pT0cDAAAAcC9j2T8AAAAAAHaO4h8AAAAAADvHsn8AAHDfCQvfludjzgutn+djAgCQhpl/AAAAAADsHDP/wD2KuwsAAAAAyCyKf+Aexd0FAAAAAGQWy/4BAAAAALBzzPwDAADkgby+yCAXGAQA3IiZfwAAAAAA7BzFPwAAAAAAdo5l/wAy5bBm5Ol4D2pQno4HAAAA2DOKfwCZUjbpYN4O6Jy3wwEAAAD2jOIfAADADnGBQQDAjTjnHwAAAAAAO0fxDwAAAACAnWPZPwAAAO4apxkAQP5G8Q8gX+LuAgAAAEDOofgHkC9xdwEAAAAg53DOPwAAAAAAdo6ZfwAAANxzuMYAAGQNxT8ASEpOeiFPx3Nynp2n4wEAAOD+xrJ/AAAAAADsHDP/AAAAwB1wmgGAex3FPwDYQF6fZiBxqgEAAMD9jGX/AAAAAADYOYp/AAAAAADsHMv+AeA+wR0NAODekdfXGJC4zgBg7yj+AQC54rBm5Ol4D2pQno4HAPaGixoC9o1l/wAAAAAA2Dlm/gEAuaJs0sG8HdA5b4cDANwdVhoAeYuZfwAAAAAA7Bwz/wAAu8AFDQEAt8NKA9zvKP4BAAAAIIfxZQPyG4p/AACygZUGAID8hNtD4k4o/u9Tef1HKwDg7rx0ZkyejveB95t5Oh4A4N7D6oZ7C8U/AAAAACDf48uGu0PxDwDAPWCq5/m8HdAGK8Q4tQEAgNxD8Q8AAPIFrqMAAEDuofgHAAD3Jb5sAADcTyj+AQAA8gBfNgAAbIni/yYffvihpkyZori4ONWsWVMffPCBGjRoYOu0AAAAssTe7+wz7GLxPB2PO2AAuNdR/N/gm2++0dChQzVnzhw1bNhQ7733noKDg3Xo0CF5eXnZOj0AAAD8n/vhIpj2ji9wgLxF8X+DadOm6bnnntMzzzwjSZozZ46WL1+uTz/9VK+88oqNswMAAADsh71/gZPXX2685F0yT8eTpLJJB/N0PE5nujsU//8nOTlZO3bs0OjRo802BwcHtWrVStHR0enik5KSlJSUZD6Pj4+XJCUkJOR+sncp+d9EJSel2DoNAAAAwG5NdD6bp+P5Xczb8SQpOa/H+7dfno6XkLA+T8fLjrT60zCMO8ZS/P+fv//+WykpKfL29rZq9/b21sGD6b/RmjhxosaPH5+uvXTp0rmWIwAAAAAgbywa4mHrFDLt0qVL8vC4fb4U/9k0evRoDR061HyempqqCxcuqHjx4rJYLDbJKSEhQaVLl9Zff/0ld3d3m+QAZIRjE/kVxybyK45N5Gccn8iv7sdj0zAMXbp0SX5+fneMpfj/PyVKlFCBAgV05swZq/YzZ87Ix8cnXbyzs7OcnZ2t2jw9PXMzxUxzd3e/bw523Fs4NpFfcWwiv+LYRH7G8Yn86n47Nu8045/GIZfzuGc4OTmpbt26ioqKMttSU1MVFRWloKAgG2YGAAAAAMDdYeb/BkOHDlWfPn1Ur149NWjQQO+9954uX75sXv0fAAAAAIB7EcX/Dbp3765z585p7NixiouLU61atbRy5cp0FwHMr5ydnfX666+nOx0BsDWOTeRXHJvIrzg2kZ9xfCK/4ti8PYuRmXsCAAAAAACAexbn/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFvx358MMPVbZsWbm4uKhhw4b69ddfbZ0S7NjEiRNVv359ubm5ycvLS506ddKhQ4esYq5evaoBAwaoePHiKlKkiLp27aozZ85YxRw/flwhISFydXWVl5eXRowYoevXr+flrsDOTZo0SRaLRYMHDzbbODZhKydPntRTTz2l4sWLq1ChQqpevbq2b99ubjcMQ2PHjpWvr68KFSqkVq1a6ciRI1Z9XLhwQb169ZK7u7s8PT0VFhamxMTEvN4V2JGUlBS99tprCggIUKFChRQYGKgJEyboxuuCc2wir2zYsEEdOnSQn5+fLBaLlixZYrU9p47FPXv2qEmTJnJxcVHp0qU1efLk3N41m6P4txPffPONhg4dqtdff107d+5UzZo1FRwcrLNnz9o6Ndip9evXa8CAAfrll18UGRmpa9euqU2bNrp8+bIZM2TIEP3444+KiIjQ+vXrderUKXXp0sXcnpKSopCQECUnJ2vLli367LPPFB4errFjx9pil2CHtm3bpo8++kg1atSwaufYhC38888/aty4sQoWLKgVK1bot99+09SpU1W0aFEzZvLkyXr//fc1Z84cbd26VYULF1ZwcLCuXr1qxvTq1Uv79+9XZGSkli1bpg0bNqhfv3622CXYiXfeeUezZ8/WzJkzdeDAAb3zzjuaPHmyPvjgAzOGYxN55fLly6pZs6Y+/PDDDLfnxLGYkJCgNm3ayN/fXzt27NCUKVM0btw4zZ07N9f3z6YM2IUGDRoYAwYMMJ+npKQYfn5+xsSJE22YFe4nZ8+eNSQZ69evNwzDMC5evGgULFjQiIiIMGMOHDhgSDKio6MNwzCMn376yXBwcDDi4uLMmNmzZxvu7u5GUlJS3u4A7M6lS5eMChUqGJGRkUazZs2MQYMGGYbBsQnbGTVqlPHwww/fcntqaqrh4+NjTJkyxWy7ePGi4ezsbHz11VeGYRjGb7/9Zkgytm3bZsasWLHCsFgsxsmTJ3Mvedi1kJAQo2/fvlZtXbp0MXr16mUYBscmbEeS8f3335vPc+pYnDVrllG0aFGr/9NHjRplVKxYMZf3yLaY+bcDycnJ2rFjh1q1amW2OTg4qFWrVoqOjrZhZrifxMfHS5KKFSsmSdqxY4euXbtmdVxWqlRJZcqUMY/L6OhoVa9eXd7e3mZMcHCwEhIStH///jzMHvZowIABCgkJsToGJY5N2M7SpUtVr149Pf744/Ly8lLt2rX18ccfm9tjY2MVFxdndWx6eHioYcOGVsemp6en6tWrZ8a0atVKDg4O2rp1a97tDOxKo0aNFBUVpcOHD0uSdu/erU2bNqldu3aSODaRf+TUsRgdHa2mTZvKycnJjAkODtahQ4f0zz//5NHe5D1HWyeAu/f3338rJSXF6o9USfL29tbBgwdtlBXuJ6mpqRo8eLAaN26satWqSZLi4uLk5OQkT09Pq1hvb2/FxcWZMRkdt2nbgOz6+uuvtXPnTm3bti3dNo5N2Moff/yh2bNna+jQofrf//6nbdu26eWXX5aTk5P69OljHlsZHXs3HpteXl5W2x0dHVWsWDGOTWTbK6+8ooSEBFWqVEkFChRQSkqK3nrrLfXq1UuSODaRb+TUsRgXF6eAgIB0faRtu/F0LHtC8Q/grg0YMED79u3Tpk2bbJ0KoL/++kuDBg1SZGSkXFxcbJ0OYEpNTVW9evX09ttvS5Jq166tffv2ac6cOerTp4+Ns8P9bNGiRVqwYIEWLlyoqlWrKiYmRoMHD5afnx/HJmBHWPZvB0qUKKECBQqku1L1mTNn5OPjY6OscL8YOHCgli1bprVr16pUqVJmu4+Pj5KTk3Xx4kWr+BuPSx8fnwyP27RtQHbs2LFDZ8+eVZ06deTo6ChHR0etX79e77//vhwdHeXt7c2xCZvw9fVVlSpVrNoqV66s48ePS/r/x9bt/j/38fFJdzHf69ev68KFCxybyLYRI0bolVde0ZNPPqnq1avr6aef1pAhQzRx4kRJHJvIP3LqWLxf/5+n+LcDTk5Oqlu3rqKiosy21NRURUVFKSgoyIaZwZ4ZhqGBAwfq+++/15o1a9Itnapbt64KFixodVweOnRIx48fN4/LoKAg7d271+oDOjIyUu7u7un+QAYyq2XLltq7d69iYmLMR7169dSrVy/z3xybsIXGjRunuyXq4cOH5e/vL0kKCAiQj4+P1bGZkJCgrVu3Wh2bFy9e1I4dO8yYNWvWKDU1VQ0bNsyDvYA9unLlihwcrMuCAgUKKDU1VRLHJvKPnDoWg4KCtGHDBl27ds2MiYyMVMWKFe12yb8krvZvL77++mvD2dnZCA8PN3777TejX79+hqenp9WVqoGc9MILLxgeHh7GunXrjNOnT5uPK1eumDH9+/c3ypQpY6xZs8bYvn27ERQUZAQFBZnbr1+/blSrVs1o06aNERMTY6xcudIoWbKkMXr0aFvsEuzYjVf7NwyOTdjGr7/+ajg6OhpvvfWWceTIEWPBggWGq6ur8eWXX5oxkyZNMjw9PY0ffvjB2LNnj9GxY0cjICDA+Pfff82Ytm3bGrVr1za2bt1qbNq0yahQoYLRo0cPW+wS7ESfPn2MBx54wFi2bJkRGxtrLF682ChRooQxcuRIM4ZjE3nl0qVLxq5du4xdu3YZkoxp06YZu3btMv7880/DMHLmWLx48aLh7e1tPP3008a+ffuMr7/+2nB1dTU++uijPN/fvETxb0c++OADo0yZMoaTk5PRoEED45dffrF1SrBjkjJ8zJ8/34z5999/jRdffNEoWrSo4erqanTu3Nk4ffq0VT/Hjh0z2rVrZxQqVMgoUaKEMWzYMOPatWt5vDewdzcX/xybsJUff/zRqFatmuHs7GxUqlTJmDt3rtX21NRU47XXXjO8vb0NZ2dno2XLlsahQ4esYs6fP2/06NHDKFKkiOHu7m4888wzxqVLl/JyN2BnEhISjEGDBhllypQxXFxcjHLlyhmvvvqq1W3QODaRV9auXZvh35h9+vQxDCPnjsXdu3cbDz/8sOHs7Gw88MADxqRJk/JqF23GYhiGYZs1BwAAAAAAIC9wzj8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAQC6wWCxasmSJrdMAAEASxT8AAPnWuXPn9MILL6hMmTJydnaWj4+PgoODtXnzZlunlm/khwJ73LhxqlWrlk1zAADgThxtnQAAAMhY165dlZycrM8++0zlypXTmTNnFBUVpfPnz9s6NQAAcI9h5h8AgHzo4sWL2rhxo9555x21aNFC/v7+atCggUaPHq3HHnvMKu7ZZ59VyZIl5e7urkceeUS7d++26mvSpEny9vaWm5ubwsLC9Morr1jNVDdv3lyDBw+2ek2nTp0UGhpqPk9KStLw4cP1wAMPqHDhwmrYsKHWrVtnbg8PD5enp6dWrVqlypUrq0iRImrbtq1Onz5t1e+nn36qqlWrytnZWb6+vho4cGCW9iWrPvnkE1WuXFkuLi6qVKmSZs2aZW47duyYLBaLFi9erBYtWsjV1VU1a9ZUdHS0VR8ff/yxSpcuLVdXV3Xu3FnTpk2Tp6enud/jx4/X7t27ZbFYZLFYFB4ebr7277//VufOneXq6qoKFSpo6dKld7U/AABkF8U/AAD5UJEiRVSkSBEtWbJESUlJt4x7/PHHdfbsWa1YsUI7duxQnTp11LJlS124cEGStGjRIo0bN05vv/22tm/fLl9fX6sCOLMGDhyo6Ohoff3119qzZ48ef/xxtW3bVkeOHDFjrly5onfffVdffPGFNmzYoOPHj2v48OHm9tmzZ2vAgAHq16+f9u7dq6VLl6p8+fKZ3pesWrBggcaOHau33npLBw4c0Ntvv63XXntNn332mVXcq6++quHDhysmJkYPPvigevTooevXr0uSNm/erP79+2vQoEGKiYlR69at9dZbb5mv7d69u4YNG6aqVavq9OnTOn36tLp3725uHz9+vJ544gnt2bNH7du3V69evbK9PwAA3BUDAADkS99++61RtGhRw8XFxWjUqJExevRoY/fu3eb2jRs3Gu7u7sbVq1etXhcYGGh89NFHhmEYRlBQkPHiiy9abW/YsKFRs2ZN83mzZs2MQYMGWcV07NjR6NOnj2EYhvHnn38aBQoUME6ePGkV07JlS2P06NGGYRjG/PnzDUnG77//bm7/8MMPDW9vb/O5n5+f8eqrr2a4r5nZl4xIMr7//vsMtwUGBhoLFy60apswYYIRFBRkGIZhxMbGGpKMTz75xNy+f/9+Q5Jx4MABwzAMo3v37kZISIhVH7169TI8PDzM56+//rrV+3ljbmPGjDGfJyYmGpKMFStW3HJ/AADILcz8AwCQT3Xt2lWnTp3S0qVL1bZtW61bt0516tQxl5Xv3r1biYmJKl68uLlSoEiRIoqNjdXRo0clSQcOHFDDhg2t+g0KCspSHnv37lVKSooefPBBq3HWr19vjiNJrq6uCgwMNJ/7+vrq7NmzkqSzZ8/q1KlTatmyZYZjZGZfsuLy5cs6evSowsLCrPp788030/VXo0YNq5zT8pWkQ4cOqUGDBlbxNz+/nRv7Lly4sNzd3c2+AQDIS1zwDwCAfMzFxUWtW7dW69at9dprr+nZZ5/V66+/rtDQUCUmJsrX19fq3Ps0aeekZ4aDg4MMw7Bqu3btmvnvxMREFShQQDt27FCBAgWs4ooUKWL+u2DBglbbLBaL2W+hQoVum0NO7cuN/Un/na9/85cfN+/DjXlbLBZJUmpqapbHzEhG70lO9Q0AQFZQ/AMAcA+pUqWKeWu7OnXqKC4uTo6OjipbtmyG8ZUrV9bWrVvVu3dvs+2XX36xiilZsqTVhflSUlK0b98+tWjRQpJUu3ZtpaSk6OzZs2rSpEm28nZzc1PZsmUVFRVl9nujzOxLVnh7e8vPz09//PGHevXqle1+KlasqG3btlm13fzcyclJKSkp2R4DAIC8QPEPAEA+dP78eT3++OPq27evatSoITc3N23fvl2TJ09Wx44dJUmtWrVSUFCQOnXqpMmTJ+vBBx/UqVOntHz5cnXu3Fn16tXToEGDFBoaqnr16qlx48ZasGCB9u/fr3LlypljPfLIIxo6dKiWL1+uwMBATZs2TRcvXjS3P/jgg+rVq5d69+6tqVOnqnbt2jp37pyioqJUo0YNhYSEZGqfxo0bp/79+8vLy0vt2rXTpUuXtHnzZr300kuZ2pdbiY2NVUxMjFVbhQoVNH78eL388svy8PBQ27ZtlZSUpO3bt+uff/7R0KFDM5XzSy+9pKZNm2ratGnq0KGD1qxZoxUrVpgrBCSpbNmyZg6lSpWSm5ubnJ2dM9U/AAB5heIfAIB8qEiRImrYsKGmT5+uo0eP6tq1aypdurSee+45/e9//5P03xLyn376Sa+++qqeeeYZnTt3Tj4+PmratKm8vb0l/Xc1+qNHj2rkyJG6evWqunbtqhdeeEGrVq0yx+rbt692796t3r17y9HRUUOGDEk3Oz9//ny9+eabGjZsmE6ePKkSJUrooYce0qOPPprpferTp4+uXr2q6dOna/jw4SpRooS6deuW6X25lYwK+Y0bN+rZZ5+Vq6urpkyZohEjRqhw4cKqXr16utsa3k7jxo01Z84cjR8/XmPGjFFwcLCGDBmimTNnmjFdu3Y1bxd48eJFzZ8/3+o2iQAA5AcW4+aT/AAAgF0bN26clixZkm62HJnz3HPP6eDBg9q4caOtUwEAINOY+QcAALiNd999V61bt1bhwoW1YsUKffbZZ5o1a5at0wIAIEso/gEAAG7j119/1eTJk3Xp0iWVK1dO77//vp599llbpwUAQJaw7B8AAAAAADvnYOsEAAAAAABA7qL4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA79/8ACZ13sxrznTkAAAAASUVORK5CYII=",
|
| 118 |
+
"text/plain": [
|
| 119 |
+
"<Figure size 1200x600 with 1 Axes>"
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"output_type": "display_data"
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"source": [
|
| 127 |
+
"# 2. Sequence lengths histogram\n",
|
| 128 |
+
"plt.figure(figsize=(12, 6))\n",
|
| 129 |
+
"for name, df in datasets.items():\n",
|
| 130 |
+
" plt.hist(df['Length'], bins=30, alpha=0.7, label=name)\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"plt.title('UniRef50 (200k Sequences) - Train, Test, and Val Sequence Lengths')\n",
|
| 133 |
+
"plt.xlabel('Sequence Length')\n",
|
| 134 |
+
"plt.ylabel('Frequency')\n",
|
| 135 |
+
"plt.legend()\n",
|
| 136 |
+
"plt.show()"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"cell_type": "code",
|
| 141 |
+
"execution_count": null,
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [],
|
| 144 |
+
"source": []
|
| 145 |
+
}
|
| 146 |
+
],
|
| 147 |
+
"metadata": {
|
| 148 |
+
"kernelspec": {
|
| 149 |
+
"display_name": "Python 3",
|
| 150 |
+
"language": "python",
|
| 151 |
+
"name": "python3"
|
| 152 |
+
},
|
| 153 |
+
"language_info": {
|
| 154 |
+
"codemirror_mode": {
|
| 155 |
+
"name": "ipython",
|
| 156 |
+
"version": 3
|
| 157 |
+
},
|
| 158 |
+
"file_extension": ".py",
|
| 159 |
+
"mimetype": "text/x-python",
|
| 160 |
+
"name": "python",
|
| 161 |
+
"nbconvert_exporter": "python",
|
| 162 |
+
"pygments_lexer": "ipython3",
|
| 163 |
+
"version": "3.10.12"
|
| 164 |
+
}
|
| 165 |
+
},
|
| 166 |
+
"nbformat": 4,
|
| 167 |
+
"nbformat_minor": 2
|
| 168 |
+
}
|