Upload new model safetensors with trained LMHead

#3
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +43 -1
  2. .gitignore +0 -24
  3. README.md +33 -37
  4. __init__.py +0 -0
  5. benchmarks/.DS_Store +0 -0
  6. .DS_Store → benchmarks/Generation/.DS_Store +0 -0
  7. benchmarks/Generation/ProtGPT2/protgpt2_finetune.py +70 -0
  8. benchmarks/Generation/ProtGPT2/protgpt2_generate.py +55 -0
  9. benchmarks/Generation/ProtGPT2/protgpt2_generated_sequences.csv +101 -0
  10. benchmarks/Generation/ProtGPT2/protgpt2_test.txt +0 -0
  11. benchmarks/Generation/ProtGPT2/protgpt2_train.txt +0 -0
  12. benchmarks/Generation/ProtGPT2/run_clm.py +657 -0
  13. benchmarks/Generation/Visualize/analyze_mdlm_denovo_gen.py +7 -0
  14. benchmarks/Generation/Visualize/esm_umap.png +0 -0
  15. benchmarks/Generation/Visualize/esm_umap.py +111 -0
  16. benchmarks/Generation/Visualize/mdlm_de-novo_generation_results.csv +101 -0
  17. benchmarks/MLM/config.py +14 -0
  18. benchmarks/MLM/data_loader.py +48 -0
  19. benchmarks/MLM/esm_utils.py +16 -0
  20. benchmarks/MLM/mlm_generate_utils.py +108 -0
  21. benchmarks/MLM/mlm_lowercase_results.csv +0 -0
  22. benchmarks/MLM/mlm_motif_benchmarking.py +39 -0
  23. benchmarks/MLM/mlm_uppercase_results.csv +0 -0
  24. benchmarks/MLM/model.py +65 -0
  25. benchmarks/MLM/pretrained_models.py +12 -0
  26. benchmarks/MLM/screen_mlm_cosine_hamming.py +17 -0
  27. benchmarks/MLM/train_and_test.py +184 -0
  28. benchmarks/Supervised/.DS_Store +0 -0
  29. benchmarks/Supervised/Localization/cell_localization_predictor.py +224 -0
  30. benchmarks/Supervised/Localization/process_cell_local_data.py +12 -0
  31. benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_test.csv +0 -0
  32. memdlm_schematic.png → benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv +2 -2
  33. benchmarks/Supervised/Membrane Type/membrane_type_predictor.py +226 -0
  34. benchmarks/Supervised/Membrane Type/membrane_type_test.csv +0 -0
  35. benchmarks/Supervised/Membrane Type/membrane_type_train.csv +3 -0
  36. benchmarks/Supervised/Membrane Type/split_membrane_type_data.py +15 -0
  37. benchmarks/Supervised/Membrane Type/unsplit_membrane_type_all.csv +3 -0
  38. benchmarks/Supervised/Solubility/solubility_transformer.py +353 -0
  39. checkpoints/.DS_Store +0 -0
  40. config.json +30 -0
  41. config.yaml +127 -0
  42. data/.DS_Store +0 -0
  43. data/membrane/test.csv +0 -0
  44. data/membrane/train.csv +0 -0
  45. data/membrane/val.csv +0 -0
  46. data/uniref/100k_seqs/check_data.ipynb +168 -0
  47. data/uniref/100k_seqs/test.csv +0 -0
  48. data/uniref/100k_seqs/train.csv +3 -0
  49. data/uniref/100k_seqs/val.csv +0 -0
  50. data/uniref/200k_seqs/check_data.ipynb +168 -0
.gitattributes CHANGED
@@ -1 +1,43 @@
1
- memdlm_schematic.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ benchmarks/DeepLoc/cell_localization_train_val.csv filter=lfs diff=lfs merge=lfs -text
37
+ benchmarks/DeepLoc/membrane_type_train.csv filter=lfs diff=lfs merge=lfs -text
38
+ benchmarks/DeepLoc/OG_membrane_type_all.csv filter=lfs diff=lfs merge=lfs -text
39
+ data/uniref/100k_seqs/train.csv filter=lfs diff=lfs merge=lfs -text
40
+ data/uniref/200k_seqs/train.csv filter=lfs diff=lfs merge=lfs -text
41
+ benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv filter=lfs diff=lfs merge=lfs -text
42
+ benchmarks/Supervised/Membrane[[:space:]]Type/membrane_type_train.csv filter=lfs diff=lfs merge=lfs -text
43
+ benchmarks/Supervised/Membrane[[:space:]]Type/unsplit_membrane_type_all.csv filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,24 +0,0 @@
1
- # .gitignore
2
-
3
- /checkpoints/
4
- /data/
5
- /results/
6
- /build/
7
- /src/scripts/
8
- /src/benchmarks
9
-
10
- /src/lm/dplm
11
- /src/lm/evodiff
12
- /src/lm/dplm_playground.ipynb
13
- /src/lm/evoflow_playground.ipynb
14
- /src/utils/ubuntu_font
15
-
16
- /src/sampling/old_guidance.py
17
-
18
- /MeMDLM_v2.egg-info/
19
- *.pth
20
- *.ckpt
21
- *.err
22
- *.out
23
- *.csv
24
- __pycache__/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,51 +1,47 @@
1
  ---
2
  license: cc-by-nc-nd-4.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
4
 
5
- <h1 align='center'>Token-Level Guided Discrete Diffusion for Membrane Protein Design</h1>
6
 
7
- <div align="center">
8
- <a href="https://shreygoel09.github.io/" target="_blank">Shrey Goel</a><sup>1</sup>&ensp;<b>&middot;</b>&ensp;
9
- <a href="https://www.linkedin.com/in/perin-schray-96855a32b/" target="_blank">Perin Schray</a><sup>2</sup>&ensp;<b>&middot;</b>&ensp;
10
- <a href="https://www.linkedin.com/in/yinuozhang98/" target="_blank">Yinuo Zhang</a><sup>3</sup>&ensp;<b>&middot;</b>&ensp;
11
- <a href="https://www.linkedin.com/in/sophia-vincoff-185192146/" target="_blank">Sophia Vincoff</a><sup>4</sup>&ensp;<b>&middot;</b>&ensp;
12
- <a href="https://www.linkedin.com/in/htkratochvil/" target="_blank">Huong T. Kratochvil</a><sup>2</sup>&ensp;<b>&middot;</b>&ensp;
13
- <a href="https://www.chatterjeelab.com/" target="_blank">Pranam Chatterjee</a><sup>4<sup>
14
- <br>
15
- <p style="font-size: 16px;">
16
- <sup>1</sup> Duke University &emsp;
17
- <sup>2</sup> UNC—Chapel Hill &emsp;
18
- <sup>3</sup> Duke-NUS Medical School &emsp;
19
- <sup>4</sup> University of Pennsylvania &emsp;
20
- </div>
21
-
22
- <div align="center">
23
- <a href="https://arxiv.org/abs/2410.16735"><img src="https://img.shields.io/badge/Arxiv-2506.09007-red?style=for-the-badge&logo=Arxiv" alt="arXiv"/></a>
24
 
25
- </div>
26
 
 
27
 
 
28
 
 
 
29
 
30
- ![MemDLM diagram](./memdlm_schematic.png)
 
31
 
 
32
 
33
- Reparameterized diffusion models (RDMs) have recently matched autoregressive methods in protein generation, motivating their use for challenging tasks such as designing membrane proteins, which possess interleaved soluble and transmembrane (TM) regions.
 
34
 
35
- We introduce ***Membrane Diffusion Language Model (MemDLM)***, a fine-tuned RDM-based protein language model that enables controllable membrane protein sequence design. MemDLM-generated sequences recapitulate the TM residue density and structural features of natural membrane proteins, achieving comparable biological plausibility and outperforming state-of-the-art diffusion baselines in motif scaffolding tasks by producing:
 
36
 
37
- - Lower perplexity
38
- - Higher BLOSUM-62 scores
39
- - Improved pLDDT confidence
40
-
41
- To enhance controllability, we develop ***Per-Token Guidance (PET)***, a novel classifier-guided sampling strategy that selectively solubilizes residues while preserving conserved TM domains. This yields sequences with reduced TM density but intact functional cores.
42
-
43
- Importantly, MemDLM designs validated in TOXCAT β-lactamase growth assays demonstrate successful TM insertion, distinguishing high-quality generated sequences from poor ones.
44
-
45
- Together, our framework establishes the first experimentally validated diffusion-based model for rational membrane protein generation, integrating *de novo* design, motif scaffolding, and targeted property optimization.
46
-
47
-
48
-
49
- ## **Repository Authors**
50
- - <u>[Shrey Goel](https://shreygoel09.github.io/)</u> – undergraduate student at Duke University
51
- - <u>[Pranam Chatterjee](mailto:pranam@seas.upenn.edu)</u> – Assistant Professor at University of Pennsylvania
 
1
  ---
2
  license: cc-by-nc-nd-4.0
3
+ extra_gated_fields:
4
+ Name: text
5
+ Company: text
6
+ Country: country
7
+ Specific date: date_picker
8
+ I want to use this model for:
9
+ type: select
10
+ options:
11
+ - Research
12
+ - Education
13
+ - label: Other
14
+ value: other
15
+ I agree to share generated sequences and associated data with authors before publishing: checkbox
16
+ I agree not to file patents on any sequences generated by this model: checkbox
17
+ I agree to use this model for non-commercial use ONLY: checkbox
18
+ base_model:
19
+ - facebook/esm2_t30_150M_UR50D
20
+ pipeline_tag: fill-mask
21
  ---
22
 
23
+ # MeMDLM: De Novo Membrane Protein Design with Masked Diffusion Language Models
24
 
25
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65bbea9a26c639b000501321/uWW6xnJZwQFWDS1QZNQTm.png)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ Masked Diffusion Language Models (MDLMs), introduced by Sahoo et al (arxiv.org/pdf/2406.07524), provide strong generative capabilities to BERT-style models. In this work, we pre-train and fine-tune ESM-2-150M on the MDLM objective to scaffold functional motifs while unconditionally generating realistic, high-quality membrane protein sequences.
28
 
29
+ ## Model Usage
30
 
31
+ The MDLM model leverages an internal backbone model, which is a fine-tune of ESM2 (150M). This backbone model can be used through this repo:
32
 
33
+ ```python
34
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
35
 
36
+ tokenizer = AutoTokenizer.from_pretrained("ChatterjeeLab/MeMDLM")
37
+ model = AutoModelForMaskedLM.from_pretrained("ChatterjeeLab/MeMDLM")
38
 
39
+ input_sequence = "QMMALTFITYIGCGLSSIFLSVTLVILIQLCAALLLLNLIFLLDSWIALYnTRGFCIAVAVFLHYFLLVSFTWMGLEAFHMYLKFCIVGWGIPAVVVSIVLTISPDNYGidFCWINSNVVFYITVVGYFCVIFLLNVSMFIVVLVQLCRIKKKKQLGDL"
40
 
41
+ inputs = tokenizer(input_sequence, return_tensors="pt")
42
+ output = model(**inputs)
43
 
44
+ filled_protein_seq = tokenizer.decode(output.squeeze()) # contains the output protein sequence with filled mask tokens
45
+ ```
46
 
47
+ This backbone model can be integrated with the [MDLM formulation](https://github.com/kuleshov-group/mdlm) by setting the model backbone type to "hf_dit" and setting the HuggingFace Model ID to "ChatterjeeLab/MeMDLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
__init__.py DELETED
File without changes
benchmarks/.DS_Store ADDED
Binary file (6.15 kB). View file
 
.DS_Store → benchmarks/Generation/.DS_Store RENAMED
Binary files a/.DS_Store and b/benchmarks/Generation/.DS_Store differ
 
benchmarks/Generation/ProtGPT2/protgpt2_finetune.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import subprocess
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+
6
+
7
+ # Format sequence inputs based on ProtGPT fine-tuning requirements
8
+ def modify_sequences(sequence):
9
+ modified_sequence = sequence.upper()
10
+ modified_sequence = '\n'.join([modified_sequence[i:i+60] for i in range(0, len(modified_sequence), 60)])
11
+
12
+ fasta = "<|endoftext|>"
13
+ modified_sequence = fasta + "\n" + modified_sequence
14
+
15
+ return modified_sequence
16
+
17
+ # Function to save sequences to txt files
18
+ def to_txt_file(df, filename):
19
+ with open(filename, 'w') as f:
20
+ for sequence in df['Sequence']:
21
+ f.write(sequence + '\n')
22
+
23
+
24
+ # Modify the sequences
25
+ path = "/workspace/sg666/MDpLM"
26
+
27
+ train = pd.read_csv(path + "/data/membrane/train.csv")
28
+ val = pd.read_csv(path + "/data/membrane/val.csv")
29
+ test = pd.read_csv(path + "/data/membrane/test.csv")
30
+
31
+ train = pd.concat([train, val])
32
+
33
+ train['Sequence'] = train['Sequence'].apply(modify_sequences)
34
+ test['Sequence'] = test['Sequence'].apply(modify_sequences)
35
+
36
+
37
+ # Save the modified sequences as txt files
38
+ to_txt_file(train, path + '/benchmarks/Generation/ProtGPT2/protgpt2_train.txt')
39
+ to_txt_file(test, path + '/benchmarks/Generation/ProtGPT2/protgpt2_test.txt')
40
+
41
+
42
+ tokenizer = AutoTokenizer.from_pretrained("nferruz/ProtGPT2")
43
+ model = AutoModelForCausalLM.from_pretrained("nferruz/ProtGPT2")
44
+
45
+ finetune_protgpt2_command = [
46
+ "python", "run_clm.py",
47
+ "--model_name_or_path", "nferruz/ProtGPT2",
48
+ "--train_file", "protgpt2_train.txt",
49
+ "--validation_file", "protgpt2_test.txt",
50
+ "--tokenizer_name", "nferruz/ProtGPT2",
51
+ "--num_train_epochs", "10",
52
+ "--logging_steps", "1",
53
+ "--logging_dir", "test",
54
+ "--do_train",
55
+ "--do_eval",
56
+ "--output_dir", "/workspace/sg666/MDpLM/benchmarks/Generation/ProtGPT2/finetuned_models",
57
+ "--overwrite_output_dir",
58
+ "--learning_rate", "3e-04",
59
+ "--per_device_train_batch_size", "2",
60
+ "--evaluation_strategy", "epoch"
61
+ ]
62
+
63
+ try:
64
+ result = subprocess.run(finetune_protgpt2_command, check=True, text=True, capture_output=True)
65
+ except subprocess.CalledProcessError as e:
66
+ print("Command failed with the following error:")
67
+ print(e.stderr) # Print standard error output
68
+ print("Command output:")
69
+ print(e.stdout) # Print standard output if needed
70
+
benchmarks/Generation/ProtGPT2/protgpt2_generate.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import math
4
+ import torch
5
+ import sys
6
+ import pandas as pd
7
+
8
+ # Function to calculate perplexity of each generated sequence
9
+ def calculate_perplexity(sequence, model, tokenizer):
10
+ sequence = "<|endoftext|>" + sequence + "<|endoftext|>"
11
+ input_ids = torch.tensor(tokenizer.encode(sequence)).unsqueeze(0)
12
+ input_ids = input_ids.to(device)
13
+ with torch.no_grad():
14
+ outputs = model(input_ids, labels=input_ids)
15
+ loss, _ = outputs[:2]
16
+ return math.exp(loss)
17
+
18
+ if __name__ == "__main__":
19
+ device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
20
+ path = "/workspace/sg666/MDpLM/benchmarks/Generation/ProtGPT2"
21
+
22
+ # Load fine-tuned model and tokenizer
23
+ model_path = path + "/finetuned_models/checkpoint-4510"
24
+ model = AutoModelForCausalLM.from_pretrained(model_path)
25
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
26
+
27
+ # Generate sequences
28
+ protgpt2 = pipeline('text-generation', model=model_path, device=device)
29
+ sequences = protgpt2("", max_length=100, do_sample=True, top_k=950, repetition_penalty=1.5, num_return_sequences=100, eos_token_id=0)
30
+
31
+ # Store generated sequences and their associated perplexities
32
+ generated_sequences = []
33
+ perplexities = []
34
+
35
+
36
+ # Calculate PPL for sequences
37
+ for item in sequences:
38
+ raw_sequence = item['generated_text']
39
+ ppl = calculate_perplexity(raw_sequence, model.to(device), tokenizer)
40
+ generated_sequences.append(raw_sequence)
41
+ perplexities.append(ppl)
42
+
43
+ # Clean the generated sequences
44
+ cleaned_sequences = [seq.replace('\n', '').replace('<|endoftext|>', '') for seq in generated_sequences]
45
+
46
+ # Create df with cleaned sequences and perplexities
47
+ df = pd.DataFrame({"Sequence": cleaned_sequences, "Perplexity": perplexities})
48
+ df.sort_values(by='Perplexity', inplace=True)
49
+
50
+ # Save results
51
+ df.to_csv(path + "/protgpt2_generated_sequences.csv", index=False)
52
+
53
+ # View the average de novo generation perplexity
54
+ avg_generation_ppl = df.loc[:, 'Perplexity'].mean()
55
+ print(f'Average de novo generation perplexity: {avg_generation_ppl}')
benchmarks/Generation/ProtGPT2/protgpt2_generated_sequences.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Sequence,Perplexity
2
+ LAPSVVTGVAQSSPLTIVTNPKEPRQPVPASDGADYLKTIPGFAVIRNGGSNGDPVLRGMFGSRLNILTNGGMMLGACPNRMDAPTSYISPETYDKLTVIKGPQTVLWGPGASAGTILFEREPERFGELGSRVNASLLAGSNGRFDKVLDAAAGNRLGYLRFTGNHAQSDDYEDGAGNTVPSRWKKWNGDVAVGWTPDEDTLIELTAGKGDGEARYAGRGMDGSQFKRESLGLRFVKSNVSDVLEKVEAQVYYNYADHIMDNFRLRTPDPSMPMT,2.6532732777535712
3
+ MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQNLNGLDNLLYMSSTSDDSGNATITITFAPGTNPDIAQVQVQNKLSLATPILPQAVQRQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPASGLGIKLATG,2.829348107084168
4
+ MAYRSTTLLALLALVLLYLVSGALVFRALEQPHEQQAQRELGEVREKFLRAHPCVSDQELGLLIKEVADALGGGADPETQSTSAWDLGSAFFFSGTIITTIGYGNVALRTDAGRLFCIFYAAXFGIPFTLLFLTAVGDRLGSSLRHGIGHIEAIFLKWHVPPELVRVLSEMLFLLVGCLLFVLTPTFVFCYMEDWSKLEAIYFVIVTLTTVGFGDYVAGADPRQDSPAYQPLVWFWILLGLAYFASVSAML,3.119025307842878
5
+ MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQQMNGLDGLRYISSNSAGNGQASIQLNFEQGVDPDIAQVQVQNKLQLAMPLLPQAVKEQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPASGLG,3.775355043694786
6
+ LFLTMAEAQLRYKTTEECLAYFGVSETTGLTPDQVKRHLEKYGHNELPAEEGKSLWELVIEQFEDLLVRILLLAACISFVLAWFEEGEETVTAFVEPFIILLILIANAIVGVWQERNAENAIEALKEYEPEMGKVYRADRKSVQRIKARDIVPGDIVEVAVGDKVPADIRILSIKSTTLRVDQSILTGESVSVAKSSDAVPDPRAVNQDKKNMLFSGTNIAAGKALGIVATTGVSTEIGKIRDQMAATEQDKTPLQQKLDEFGEQLSKVISLICIAVWLINIGHFNDPVHGGSWI,4.136701078251139
7
+ MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQNMNGIDNLMYMSSNSDSTGTAQITLTFESGTDADIAQVQVQNKLQLAMPLLPQAVQQQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPASGLG,4.210716900525416
8
+ MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQIIEQQMNGLDGLRYISSNSAGNGQASIQLTFESGTDADIAQVQVQNKLQLAMPLLPQEVAQQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMRIWMNPNELNKFQLTPVDVITAIKAQNAQVAAGQLGGTPPVKGQQLNASIIAQTRLTSTEEFGKILLKVNQDGSRVLLRDVAKIELGGENYDIIAEFNGQPAS,4.526996795741569
9
+ MLKIIIPTTMLLPMTWMSKHNMIWINATVHSLLISLISLSLLNQLGENSLNFSLTFFSDSLSAPLLVLTTWLLPLMLMASQSHLSKETTTRKKLYITMLLILLQLFLIMTFTATELILFYIFESASLPTLLMITRWGNQTERLNAGLYFLMYTLAGSLPLLVALVYIQNTTGSLNFLIIHWSTHTSASFVSQTLLLMAWMAAMAVMAKMPLYGVHLWLPKAHVEAPIAGSMVLAAVLLKLGGYGMLRITTILNPLTNYMAYPFLMLCLWGMI,4.629232424547782
10
+ AKFINRWLFSTNHKDIGTLYLLFGAWAGMVGTALSLLIRAELGQPGTLLGDDQIYNVVVTGHAFVMIFFMVMPIMIGGFGNWLVPLMIGAPDMAFPRMNNMSFWLLPPSFLLLLASSMVEAGAGCGWTVYPPLAGNLAHAGASVDLTIFSLHLAGVSSILGAINFITTIINMKPPAMSQYQTPLFVWSVMITAVLLLLSLPVLAAGITMLLTDRNLNTTFFDPAGGGDPILYQHLFWFFGHPXVLILILPFFGIVTEASAIPRIFNWMVTFHGQLMYHHMWIIGVL,5.0608380016313275
11
+ LVEKDPIKTSFEKWAKPGHFSRTLAKGPNTTTWIWNLHADAHDFDSYTSDLEEISRKVFSAHFGHLAVVTIWLSGMIFHGAKFSNYEAWLSDPLNVRPSAQVVWPLVGQDILNGDMGDGTYNGFQVMTSGLFQLWRASGITNEYQLYCTAIGGLVMAALMLFAGWFHYHKAAPKLAWFQDVETALNHHLSGLLGLGCLSWAGHQIHVSLPVNKLLDAGVAAKDIPLPHEFILDPAKFASLLPGLTQGLTPFFTLNWSEYSDFLTFKGGLNPVTGGL,5.597917119515088
12
+ MVRKVYVTLQGKVQGVFFRAHTQATAKQLGVVGWVRNTSDGTVEGEAQGPADKVDEMINWLHRGPPQAQIESHEFNSEKKELEAFSSFHIRY,5.635017933300935
13
+ EFGFWEIKFPEYLKGRPTTGRPEWVQDVDLVNKWAVPGLNPPHHFSPPVNLTGVEDTLPVSWVMVSMVVGFVLIVATAGNILVIIAVFTSRALKAPQNLFLVSLASADILVATLVIPFAMANEVMGYWYFGKAWCEIYLALDVLFCTSSAWHLCAISLDRYWSITQAIEYNLKRTPRRTKAIIITVWVISAVISFPPRCEINDQKWYYVISSCIGSFFAPCLIMILVYVRIYQIAKRRTRDLSRKSGRPSLLSEVHAAKSLAIL,6.370992471309986
14
+ MVYVSRISVFAFLGALASVAYGQVTPPNFGTEQDRVNFTKQIVPVLKEKCVVCHGPDKTKGKLRLDLRIEAFKGGESGESIDVIPGDPENSELLERITSKDPEFRMPPKSEHKPLTEAEIALLKQWILEGAKYDPAWAFTPPKRTDLPKVKRDEWAKNDVDRFILAKLESEGLTPNPEADKATLIRRVTLDLTGLPPTPAEVDAFLADKSPNAYEKVVDRLLASPHFGERWGRHWLDVARWAESNGFERNTIRNIWSYRDWVIKALNDDVPYDQFTVEQL,7.0499259667086145
15
+ SSNAKTVLITGGTGFVGRALVKRLLSTTKHTIVVPYREEADLHDVKVLQVKGDLRDAASLDAAFEGVDCVFHLASYGMSGPEMFELNVEGTRNVVEACLRHGVRRLIHVSSIAVMGEPSDHPRREADESLPARQATAYAKSKVEAERIVLEANGSDGLETVVVRPPMVWGPGDTQFLPRLVRMARRGLRPVIGNGKSLVSMVYIDNLVDGLIAAMDHPEARGKTYFLSNDGHASQREFIETVARAIGRPAPKLTLPVPVLYWAARLLG,7.429969652690046
16
+ SPELIEQLLQNYLQLPDAEKRKVADQLQTSNIRYCYLLASEKGWLDRVESCLAAEGCDVLQPDHTGRNLLQVVASVSPDHTARLIRALLARGADVHAQDSLGNTVLHILILQPNKTFACQMYNEILILGAKLCPTVNLEAVLNHQGLTPFKLAGVEGNTVMFQHLMQKRKHVQWTCGPLTSTLYDLTEIDSSGDDQSLLELIVTTKKREARQILEQTPVKELVSLKWKRYGRPYFCVLGAIYILYIICFTMCCVYRPLKPRITNRTNPRDNMTSLEL,7.910941817905356
17
+ ADVNLNARDLHGMTPLHLAAKNGHDKVVQLLLKKGALVNIQDKLGSTPLLEAIRGRREDTVKLLVEHGADIRAQDSLGNTVLHILILQPENSTSLKFAEMLYDMILLRSGTWELETTQPNDGLTALQLAAKMGKAEILKYILSREIKEKPLRSLSRKFTDWAYGPVSSSLYDLTNVDSSGNTVLHAMIMVADNTPQNSRFVKQMYNLLLSKGARLCPNVPNHQGLTPFKLAGVEGNIVMQEILRGTTISIPFTCITCGKKDTRFRGMSCEN,8.179497248919981
18
+ DPFNNFFRRSKIAVCGLVFFVLFIIYMVLGSMIFSAIERDHEQQAQRELGEVREKFLISHPCVSDQELGVLIEEVADALGGGADPETQSTSAWDLGSAFFFSGTIITTIGYGNVALRTDTMGRLFCIFYALVGIPLFGILLAGVGDRLGSSLRHGIGHIEAIFLKWHVPPGLVRVLSAMLFLAIGCLLFVTLPAYVFSHMEDWSKLEAIYFVIVTLTTVGFGDYVAGADPRQDSPQYQPLVWFWILL,8.306921086116862
19
+ GPQSFVHFTKQSLALIEQRIAERKSKEPKPSSDLEAGKQLPFIYGDIPPGMVSEPLEDLDPYYADKKTFIVLNKGKTIFRFNATPALYMLSPFSPLRRISIKILVHSLFSMLIMCTILTNCIFMTMNNPPDWTKNVEYTFTGIYTFESLVKILARGFCVGEFTFLRDPWNWLDFVVIVFAYVTEFVVAEFVSFSALRAFRVLRALKTISVIPGLKTIVGALIQSVKKLSDVMILTVFCLSVFALIGLQLFMGNLRKKCFFPDG,8.471762198050271
20
+ MLKIIIPTTMLLPMTWMSKHNMIWINATVHSLLISLISLSLLNQLGENSLNFSLTFFSDSLSAPLLVLTTWLLPLMLMASQSHLSKETTTRKKLYITQLILLQLFLIMTFTATELILFYIFESATLLPTLLIILRWGYQPERLQAGLYFLFYTLIGGVLVLLSILMIYVNTNSLLIHTLPMFNSTMETSLYTKIMWFACMMAFPTKMGLFPIHMWLPVVHSESPLAGSCILAGILLKLGGYGMMRVVTILNPLTNYMAYPFLML,8.583127806228307
21
+ MVLRLVVLALLCWTPGLWAQQADTLTLDEVVVTATRSEQNLQDVPASVSVITAEDLQRQAPRTLGEALRYVPGVFLDGTGRTNGQDINMRGYDHRGVLVLVDGIRQGTDTGHLNGTFLDPALIKRVEIVRGPSAALYGNGAAGGVVNFITRQPSDQLTGSVRLNTSLPQHDGDNSQQFYSLMAGNRLGEEGKLGMLASFSRQEKGQARDGAGNDIASLDEDSLSGKLLWQLTPEQQLDFSLDHYRFKTNAPHNPVNTDFTRHTRQESDSTVRRFFNQVQ,10.282136779067205
22
+ RPLVAIDFGTTYSGYAFSFKNQPETITLHWNSEISKALRKPTVLLIDSNMKEVAFGYEAENKFATLALDAEEKHFFFEKFKMALYDKNDRSILPSMRSANGTEKKAIDVFAEAIRYFKDHALKTINSTYPIDKQDLLWSVTVPSDWDARSKEFMRQAAVKAGLGEASLASEPEAASMYCVEHEVNKFGDEIKSGTKFLVVDVGGGTVDITVHEVLENNHLKELYKASGGPYGSVGIDQEFMKLFQLIVGAEAIEQFKIK,11.589466291126676
23
+ MKVSVIIPTYNERENLEELFSRIDQALQGLNYEIVVVDDDSPDRTWEKAQELSSKYPIKVCRRTKEKGLSSAVIRGFKEASGDVFVVMDADLQHPPEVIPKLIEAIKNGSDIAIGSRYVKGARVENWPFYRKLISKGALVVTKIPLKDLKDMRDFACGFIAIKREVIEKIEFDENLTYGKILKILKYCWGGFSKVVEVPFTFGIRARGESKLKGKTIFEYLRHIWSLNYTFFRILKLIFALGFTFFGVSLAYLTLVLMEKYFLWYIPGWAN,12.090375297427133
24
+ PGMQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGNSPLGAPGGTPSDREMKLVPTSHIALPINDRLAFGFAAYSNFGLATDYGDTFVGSTTPTDLEMKLNSLSIGGNAEITDQLSFGASITYQRAKIERFAGDLGQLVAGQIMQSPAGQTQQALLQAQSQGNLGSALAYANGIDSNTKIAHLNGNQWGYGWNAGILYELDKNNRYALTYRSEVKMTFKGNYSNDMPGYYEMNVPAWHNVSLYHE,12.173339409793382
25
+ DASRVYYEDRSVVKEDGSVVKEGPFDLQSTLTLSGVVRDYASGTPLADAEITLTGPAFRAHTNSYGKFVFEGLAAGTYTLSVSRFGYEPVSETIAVSAGQTVESNVALFALASEVEILEVTADADPVFNTGDVATSVGTREMKEIPTVVGDVDVIKSLQLLPGVASAGEGTSGFYVRGGGIDQNLYLLDNIPVYNVGHLFGFFSTFNSDAIKDVTLYKGGVPARYGGRLSSVLDITMKEGNSDKLSGTASIGLLPASAKLQGPI,12.228122271950522
26
+ GAVIDLSTATFDFGGSYTGVAVGDTITAVVTAPTEDDYVFQWFKDNVLQSGATGNSYTLTAAEAGKAIKVVVSGSKSGYTSTAKTAAVTTAITASSLTLTADKTKLTVGDTVTLTASLSDKNGNAVTGRTVKWSSSNTAVATVSSSGLVTGVAAGSATITASAEGQNGNGTANITVVAASVSSISLSPASASVAVGATQQFTASGYDSSGNVVTSGRVVTWASSNTSVATVSASGLVTAVAAGTATITVTSGGKSGNATVTVTAATLSSLSVSSSNL,12.23423450162324
27
+ MQTYNNPEVTYDWWAGNARFANLSGLFIAAHVAQAALIMFWAGAFTLYEISWLTADQSMGEQGLILLPHLATLGLGVGDGGQVTDTFPFFVVGAVHLIASAVLGAGALFHTFRAPSDLAAASGAAKRFQNFNPDLSKLGFISRHTHAAKPELWSQLIGGKHKTTTGFAWVGVANPDGSITGMGTAGIQVKQAEGVTVGLAHYIWPLIGAAALAATICFFGYNSVITDIAYPEKKLEAVTFGYQTQAFDAFTQAGQVIGSTT,12.368396953842797
28
+ AEGIRFAIVDEVDSILIDEARTPLIISGQAEDRTKELYKTLTRVLKSLEGGDYSVDLKNKKVSLTEKGVERTEKLLREAGIISDGTDNLYVVGAIFHAQKVATGKDYLFRKIVEKGRVEYTIDEKLKQVVIVDEFTGRMMPGRRYSDGLHQAIEAKEGVKVQRESKTLATITYQNYFRMFKKIMKLAGMTGTAETEAEEFKKIYNLDVVVIPTNEPMKRQDHSDQVYKTKREKYNAVLKEIEELYKKGQPVLVGTTSVEASEFLSNLLKKRKIPHNVLNAKPHAREAEIIAQAGRKG,12.697313288610662
29
+ MPNFFIDRPIFAWVIAIIIMLAGGLAILKLPVAQYPTIAPPAVTISASYPGADAKTVQDTTVQVIEQAMNGVDNLMYMSSNSDSTGTATITLTFESGTDADIAQVQVQNKLQLAMPLLPQAVQQQQGVSVEKSSSSFLMVVGVINTDGTMTQEDISDYVAANMKDAISRTSGVGDVQLFGSQYAMLIRMKPDLLNKFGVTANDVISALQAQNSQVEAGSIGQLPTLPGTPLQLSITAQSQLSSEQEYGDIMLRVNQDGSRVLLRDVAKIELGGENYDIIAE,12.907199708267516
30
+ DPLYYTNNGGLGFVLSALFGYIWWGYKSGTPKEVRSEAKYRMLTVVVPCYNEEKTIGRTLCSLLESDYPEDKLQIICVNDGSKDKTLKELEDFELRDVPLVVIDQENGGKARALNAGIDAASYEYFACVDADSQVEKDSLKKMVHHFADPSVGCVAGRVKIGNRWSWISRLIDLIQYLIAFNIGRRGINSITVVPGAIGAYRVSAIKKAGGFSGKTMTEDLDLTIAILRAGYKVVYEPEAICWTDVPETLKGFTRQRFRWTYGTMQ,12.993370901156627
31
+ DISAEDRMWSDAEKRMEWQRIDRQVANRKSHGKRGLLSRIFGWIFRRNMDEKALKLLPHIKCYTPAEIANAIQSMTPEDLQRYELRASMFSLADKSNSGTISLTEFRNILECLGVQMSPTELQTLFQVCDRDQNDMINFNEFANRFHEPAKEIGFNVAVLLTNLSEHVPHDPRLRNFLELAESVLNYFQPFLGRIEIMGSAKRIERVYFEISESSRTQWEKPQVKESREFRTMQEIYNHIYYHTKQKENENVQRNAERWKMIEENKL,13.119829828981848
32
+ SDITRLIVLVGTTLGVVLFLALAVWIVKSFWSPYQEINDWALALTIVDVLVVGVPAALPSTVTVTMALGAAYLAKKQALVKKLPIVESLSGVEILCSDKTGTLTKNKLSLQGAWLPGSEKPEQISGLVPEGSRQNITKCIHIAVLCNRASYKDGKLVGTPTEKAILKGLECWGVGYGEMRKKYPLVHQIPFNSTNKFQLSIHDKDNRYLLVMKGAPERVLEKCSTVLLQGKEQPLDEQWHTAFQTAYLSLGGLGERVLGFCQLYLSE,13.625918655212923
33
+ MEVTLFALLALVVASAIIAWGPVTKPLHPHEALVDVGGHKMHYICQGKGSPTVILEAGGGGGSIEWGWVQPQVAAVSRVCTYDRAGYGWSDPAPHARDAGIVAEELHRLLRAAQVPGPYVLVGHSIGGFNTLHFAARYPQDVAGLVLVDATHEDQYRRWKGYEQEMAPFTSGQALDNLAANVRVMESLPPVDAGKVRDLPVLVLSAGREHPPFDMKLYREQWQREVVDLSNVSDRQKHIVADRSGHHIQFDEPDLVVAAIRE,14.117540370332351
34
+ MDYHEDDKRFRREELCREAEFLKLKMPTKKVYHISETRGLLKTINSVLQKITDPIQPKVAEHRPQTTKRLSYPFSREKQHLFDLTDRDSFFDSKTRSTIVYEILKRTTCGITSLLANGIYWLAISTPTINEYPSFLSPSLYAAVLPFTFGFVVSFITLPRKALEYIEQNGQGKAAVHHHTHTHDHDAGDVKIVVNDKDLESHVVAGALMFVAALFSLVFHQWWSDYCDVAYTVFIRVRDVIFGHVKWT,14.986517088631075
35
+ PSNISAWWNFGSLLGACLILQITTGLFLAMHYSPDTTTAFSSLSHICRDVNFGWFIRNLHANGASFFFICIFLHIGRGLYYGSYLYKETWNTGVILLLTVMATAFMGYVLPWGQMSFWGATVITSLAVYLPWWGQHVQKLLFQLIPALLVLLTAWTPFLIGYTLIRETTETESTNYGTPLRLHRIISHHLLLLRAVAXXXXXXXXXXXXXXXXXXEIKAAFWSVFHFILPFMATALAAPRSLLLDEANSTNTLVTTNLIFNFIFFLLPIFPATLSMFSPNLLGDPENFTPANPLVTPPHIKPEWYFLFAYAILRSIPNKLGGVLALAASVLILFLIPFLHKSKQRSTMTF,15.230674054330438
36
+ SRTSELAVGIFVIIFGIALFFLAMKVSGLVGTNLSDGYTMKAQFDNVNGLKPRAKVTMSGVTIGRVDSITLDPVTRLATVTFDLDGKLTSFNAEQLKEVQKNALDELRYSSDYTQATPAQQMKACSEQMMTLLAPQQKEKKTLEVGDIIATSKSSVIYNDMSTYLNDLIGDLGTIASGVNELWPTLQANFSTVKTMAQNLLTANQQLPQLLGNVQTTSQLLAQDNNNFNKLVTDFALTIDALNAVVSKSGANLDTAIATANDLNTVLTENRQ,15.441068484289472
37
+ DDVTVVYQNGLPVISVRLPSRRERCQFTLKPISDSVGVFLRQLQEEDRGIDRVAIYSPDGVRVAASTGIDLLLLDDQLIIREKYQIFINDMSPGAKVAQTAPAREIKWDHEALTEELTYEGQSEKLRDKDRTEVRRTMLNLERRLSDIRRQLAPLEKVRIEISRKMEDKTIQSYALWLMLAVVVCLMGLAWWQVLASLATFCVAVIIMVFVGRNWSAVLQRRRKRMGAEELRHRAYQTHQCHLCAICFTNQKMATLVPCGHVFCEECIKQHL,17.259236085949127
38
+ DNTTNIVHVPVHYVFIMALPIIMCILGLLLNVLALWVFYGHMKRTTSVVYVINLAIADLLFVLSLPMYIHYYFNKTHWVFGELLCRITGTLFYMNTYCSILFLTCISIHRFLGVCYPFRLNLVKRNYAVCVSVGVWAFVMLACMPTLVFNQTEDYEGNRTICYDHLEDAQRHWALYLQVKVNVFVIGFLIPFLIITFCYSQIVATLLKVEANLAKKKSKAIRLVLTVVTVFVLSQFPYNFILLAKTIKLQQINSSCEFEKIIE,17.688518287684857
39
+ MDYHEDDKRFRREELCREAEFLKLKMPTKKVYHISETRGLLKTINSVLQKITDPIQPKVAEHRPQTTKRLSYPFSREKQHLFDLTDRDSFFDSKTRSTIVYEILKRTTCGITSLLANGIYSAAYPLHDGDYEGDNVEFYGDYTIHAGDPENGGQCVVITLTDYGNYEPFYSASLEFSRKHFGFSALSVQCELSDVQSFTAVKQQFINLLSSRAPITVRKFVSPEFPRNSDSHDIFSLSCDVSNTGHVTAVTCQVSARFLTRYLTD,17.749448694031326
40
+ MDNKLTLALAAIMVVLIAFVGINVMNNVNTNPTVVKTATVERGEYVERVDATGKVVAAQSTDLSFPATGEVTWLKVKVGDRVSKGQLLAELDTTDLEAQKNLALSQLEQSRASLALTRQTLARQQALAQTQAVSQQDLDNATNALRVQEAQLNQLRSGSRPEDIAAAQSQLRMAQDDLNRLRNGSRSEELRSLQAQLDVDKAKLNWDQKIVRRNQVRAPFAGVIAERLAEPGALVSPSQPILSLVADDNLEIEANVSEADILHLKPGQKAWFT,18.826645316378666
41
+ MVSVIIPAYNEEKYLEKCLESVRNQTYKNLEIILVDDGSKDKTLEIAKEYAKKDERVKVVTQENGGVSSARNRGIEESKGEWIAFLDADDYWEENHLEELVKAIESNNCDMSICNAIWYYWWDENKRIIKRLPRESVIEAEDFFKELPIFMLTVVVWNKLFKKELFDSIRFPEGKTYEDTATIVDVLMKCKKVAYLNKALVNYRIREGSASTSFNPAKAKDHLKAIEVAFKEAHAEGLGDVALRAFQRRYVNSII,19.064942570982527
42
+ MRPNLFLLALPFIALAAPAHAESITVNGDARIRALGKQNYAEVRTHISDNGTKATVDATGHLRIDAPLGERAQVKAYGELEAIYAKPSGDKNKASNTERLAYAGLKFADHGSIDYGRNYGILYDTNAWTDVFPLWGADVLESNTAAYKRTYGNVLTYRNNNAFGYVDGLSFALQYQGKNPTTGEVVKGDRVNSDGRRLGAATVGYDFDGFGIGFAAASSKTEQNGIKKDTDGREYAVAGSAKVGAAQVAGTYAETRNATRFGQTGKGRVE,19.328842227230165
43
+ DDALPLSYYGTNKGLDRPATGPDRREHRFGFIADASAYPSQQLFIRGKVDVRDYQGSDTLRDDNAYVRLRNLTVGYDNLLPGSPLNVVAQFDLFNVLNATNVKDYQEVLSGGKAAAANFPIPRTYTLGLKLTF,21.699704462300236
44
+ MAGRKILRDPYIIKLLELTEHNPGKRVTARCTSEGILTVPPDLICCLLIQLPIDSIDHHSFILNLQCKDDYQLILKNGSVLHSSCKYTPGKPAEVKAEGGSISIAITKLQLSDSGLYSCQPPNHEPSHGQLNLTVYKQTGFISVSDTGVGIVRVRAYAERPDDLNVTLTCLVTGVFPHDVTVQWTKNNSPLSKDSSPAEEQQHEDGTFFLYSKLTVDKSRWERGDTYTCVVAHEALPNKITKTLDRSKCQGEGLAPL,21.725914279351123
45
+ MIMTMTLTMMMVMISNKTHWNSFQMNLMMTSLMILSLGGLPPLTGFLPKWIIITELMKNNNLITASMAMMALLNLFFYTRLIYSTSLMKLYPTNNQTKTKPKMMTHQMKLTALMTITMSSMTLPLAPQLITTELMAFAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTLIMMATSLPIIMKPMTPWWC,22.24920166064802
46
+ GVVKAAVSFCLAFCLVLCIAVTAVWFLSPTSDLDKKAVLPREYEQFKANQSSDQLRAFAAQYGLDATPAEGATDAMLAKGREIYVVNCSICHGSDARGASGLGVTLNPRPPSFTDPGFAAAHTDGEIFWVIRHGIRNSAMPAWKDKISEQDRWDLVHFLRTFKPESQKELTEAEIAALSVGEKVTMGQALFQEKCIVCHGANGQGNQTVGPVLNPSPRNFTSGVFKLRSTDQGELYAIRNGIRQHGMPPWGSQLKD,23.148315124172598
47
+ MRTHTGEKPFACDVCDKRFNQKAHLNRHKTIHTGERPFACDICNKKFSEAGHMKIHTRTHTGERPFSCDVCSKKFSQKIHLNTHMRIHTGDKPFVCSVCNKSFSRKGDLNKHMITHTGLKPYSCDICSKSFSLKYNLITHKRIHTGEKPFVCDVCGKAFTQKCNLNIHMLIHAGVKPYSCDLCNMSFTQKSSLNTHLRTHAGVKPYACDLCGKSFALRQTLSRHHKTHTGVKAFACDFCDKTFFANQHLKRHRLTHS,24.00786399231911
48
+ MRITKGFTLIELMIVVAIIGILAAFAVPAYNDYIARSQAAEGLTLADGLKVRISDHLEAGTCTADNTAVNGKTIGTEGTVGALPEGVSGDCKLSVAFTAGAAGKEITVKYDHKAGALTYQSATGKTISLVLPASLITKAGSWQGSVSWDYLKNLVPTNLRYAYVRSYMGPDYNPNNWPASGSTMPSDICWKSGDPNYTGTPGCTKNNSVAWGYPINPATCTFTPVADPTPTLAPVASVSLNKCYSAGTATLTATAA,24.29027038113483
49
+ MVGRVGGWIVSVDPDGRFGPKPYKRHRAGIKDALSYLYQLKCRLRIDPDTWREWASPLRESITLEECRYTMPSFAVQASFMTLYWSVCEALFGCRFVYGPFNPILGETYEAHVADSDDEGQKTRYFAEQVSHHPPISACHVDSEKFYLDGHTCIRSKLTGKAISVHHVGQSCLTFKRAGETYLIHMPNQYCRSILTVPWAEQETVHCPTENHSAILEFTKGGFSAKFTGRWSSVLHVISAPHAATAEEKYPVTQVD,24.583101326751542
50
+ GFHYFDITLAYFIPALLALLSSAWLIRAIRMDRADERAALTARIDELEQHNAALQARVDELERHVAMRTSELLETEQALAAERAALLDKGNHLASNFDTLKQRVAQLESERDALAADRDNLRGERDTLSGQVATLEAQRDEFARQLDAARQQAATAEERARQAEAAAASLRQRLDEALARVTELAGQNSELQAALARERQNNDALNARVRELEEQVARAQAGANQAQAARDRAQADAERLRQLEQQLAGANEAARRRIADLEDQLNRANRTIAEL,25.434296722653517
51
+ QDTVADETGFFETELTVGTKEDRYSTVFNYRRINRDLKEPQDVNVYYARYEWQVAEDWKLRPGIRLDHDDFFGLTSSPKAYLMYEHDNGDTYKLGVARAYKAPNLYQSNPNYILYSKGQGCYGSSSCYLQGNGDLKAETSVNKELGVEYHHDRFAAALNLFHNDYKDKIVAGISTGVSGNSEMTTANYMEGWMTSVKWDWQIADNWKTDTSISWSRNKPKTSSSLDYQLRPENTLNSTLTWQARENLDFGWRVVHYG,26.433123728733182
52
+ PPSECPPSPCGEKEYFDVCGQCCKKCKPMEGKISTACRKISDAVCDSGEWVEHPASDKCYACQKTCATRRPTQKACAAMRDCKCLDYFYRQLCVSCIPKCPRACDNQFCTAICNPGCVCPEGLFQDEFTGLCVPESECRTGCSNGQVYRECTSPCPSTCGNPNPRPSCSKTCFDGCACPEGMVLDDQNICVLPEQCGCTLYGRHYKPGETFTSDCGNPCEPTCENAYRTVVCTR,26.468925245048567
53
+ ENKYSLLYKNQTLFDEWGIKYQVKSRMIEKSLYSVVFNVNDKKYNIIMRLYDKETKRIYSKREIINYIKNNSSINYKIDLIENGEYYAIAMPYIKGCTLRQYINKHISEKDFINILQPLIETLKVLHDKGIYHRDLKPENILIEQDENLFMIDLGLAIDLTNAIPTIDYGTDGFMAPEQALGNKPTFASDIYSLGVIAIELLTLKNPFDSNISLSESNWISTLHKKDKPLSSVLSKLILKMLEPSPNDRPNIKDVLNSLNSLEVLQRGVN,27.369412815985804
54
+ EKKRKRDAVTWPPEKRQDAILFYLKNHNAPGMEFTEVAKAAGIHKSTVSRELKDPTFPPDASSRAGPGRPKKLSAKADELLNAWIKDTYVEGDLRREVTANILREKALEHGIIELSASTVWRILHKQLGYSSKKMSNRAIAADRRQVQEYRLEVIKAMHDNPYIYLDEIWINQNEAMNHVWFHDSETGLRSTMGLNKGSRGKRIIGVIDAEGFLHYEFKSTTDSTAAKTIVDFLEHNEGDNYLIVVDNAKYHSRL,29.60231093300791
55
+ MVLFRATLVLTLFCVQLALAQVGINTSTPKATLDITAKTTDGSKPEGLLIPRVDRQQAETIPANPQLTIYTDGKTGKGFFYLGTTTPAGTANILDISKNGYYFYNGTAWVALNSGTYGSGTSGTPSATTDKEIYTNSTDKKVGFYSPTGTLVGYNSLTTTDYNSLITSGVTPSYAIGTSNTAALSSFYTGSVSGTLVTTGLTPVIGAAATNIYTVLDGGTSSTITIGSGGTVTSVTPIGGVTSVSLPLSGVSAVSITGSGSTITMGSGGVVTSVTAPSSVSTISITPTSGSIT,29.77388588380658
56
+ LATLRQLWAGTFRRLWRAGDRDPDPAKVPLRARLVLMAALPVLALVLSAALTWQAASEQVRSATDRTLLGEVAEIGRTVSTAYGDVDTRLRGQLDGLARIPGVRSAAVVPLGAEGGTTVLGERTVPAADRSRWFSSLPLRSGSPDTVVSAPVLRGDRVLGSVQVVLDTDRVNALVSGLGWVLLLDWLAVTLLLWAAAMVLLRRQLRPLARMTAVAGAVAGGDLSRRVPDPGPDEVAQLGRAFNTMLDRIEQLLAGQRALLDDVSGELNARTVEL,30.911742604776983
57
+ LSSSCFPWSLGVSVMTFISLSLLSYGPDRPLCPLTPTLSSLQFLVGTWKMVEGSGMFQEFCNHSASQWTFTADGHMTSKAFYVQPQQGQQLRCEEMRLIAQKHHPDTHRCRSLGQPPDTPYHYEYRRDCQDPLTMQHYVTEVMSRRLILSRQKPWDPAPDHIPPGTKIRYVSSPWGPEFCEPVPTQGEAVTLHGTVTHHTLGPLWGEGNHTALTDGFPEGVSPDVFLSAWGPKGLEKLNSLAR,32.019316129846914
58
+ SPLQIVRDHFIREGRLIDPPEREFVPNDMPEYVLPSGERLPPIDVAKSRHRAVMPPPPSDYMAEYMAYADIMAPVTYYTRKDLGLGTKTILVAGAIGGLCGFLWFFMYVKGMGVLDALGITPFQIVRGDFSDTMSMANGFHMFFMITCGICFGGWATNWSRKAGFSDSMEASLMSAVVAYVLMVPMMMGATHTEMLANGHLMDLTHWTVAHLNPFHMMGFFAINVVAGLASIMVFALHLWYALTVRKTFDPEVELKTLKN,33.613951886997285
59
+ MAYRSLFTSESVSEGHPDKIADQISDAVLDAVIAADQASCGTAKAAVTTGLVTIGGESAMCWVMSDMIRTTLVDIGYSVTAVGDEGGFAPNIQSFHDALKVIGDTIVNTRKAQSDTNVQIGIDVCATSAKVLPTEYMGYEDRGASLIFSHRSGETEDSTIADFCVGVLAADIKQTLPPIVAELGKPARLRAMGQLAPLAEDAAFVGYDWNHTTGFPRFSAGSMSTADALAAADNTADAAAMANTALAEAAMAGDHATAARWSAAVEDLTAQAKAGTITTGKIAEAIRAACL,34.430385572117466
60
+ SPDVQIHPPKRDPDPWGIKGLSAFLLGGATLWGLAALAIHLAGLVPFPTVELGTADFHMTLPFMAAAAGGFLIAKHQPRDMFGIGMPEDRPLIATGAAVSFALVVIALVLYAVAPGTYTPRAIGLVGSLAVSAGILGVFGAVLGRLRPVRGIGLVPAAILEGIARQPEARGPVLVSMVAGFALGAVGLLAPHHFGLAFGFGAIGGLGAVALAGWTGALVGAPDISGPTAIAAKMQRFYLWATVLPVAAMVVALVAIATPHLNLGIGEGLLLGGMLAGPLCVAA,35.89969391101693
61
+ SAWNTNLNMDARSAWATYQRQNGEVIGWMPIVNYADTIHDRDFAQAQLIFSTQVSKLWWAEDLGVNAFVVTLSNDLYQLWLNSPDEKADLMKQININAYNINWGVDDGTYADFQVWNIARMLRNDPSTNGKRYFAYGSDAPLIAAYRDQGWETNTVRGYGEYVVLPKAAGTVDNEVAQAAVDNWYSGAIANRLGTMANTGAVVQTGTTDNGIYGYAMTDGKTLYFPRYNTKYYNTDQGGVAHEFGHHVDYAV,39.68959897551875
62
+ GEKWIMKFDGALNPSNISAVLAGGLIGLAVGLQATFFNVSTTSHVTGVLGGATVVGMATYYKWASPWAISAGTFFSLVLGTYLGSQLVKRLHVYKLPEPIAFFGGSFVMVWLWSWMTTYIYPASHALTPYASHLSYLCAMLLGALGGILGSLITPPLKDTFIASALGIIGGTGFAVSHLTMLNPTIPSTLYAIAYAATGIWGAITATRIARVLNLFEGALVCGAATVFYSFVKVVAPELLPVALASIVCAAGVLYVANLTKVV,45.78566418659657
63
+ LAPSPKVFFIDDTPIQWGFVIILLLLSSGGLFFDSKLAGIFTSLGIAVGLIGAALTTFADTRKGKVTPEQLDRVNSTLKTFFGWSLISGVLGLAIYAASLNIDGKLAFVDSLFYFTGTGLVTVGFGDIVPTTTAAKILVVVLIVGGIGFAGSMISTVASWIRSQQEKSELDKHTIRAHARNIVICHDDPRVSALCEYLQGYFLVDDKQSTYHVLPMYLDGNSLERRALRKKLFSNRVAKHFAREGSVRDLDAVRRANVAGARAVIVLSKADENID,47.50012378184719
64
+ GSTDLSTWQTYVQSTAATITSYYQDTASQAQKNQVLANVTQIINQLDSSTKTKAEVDSALTAINKIKAQIAGDAGGGSSTQATIQGVLDNLITKANNLLRQGQTISEVNALISDLNNLVTQAKGQARSDQESVYTKADSALSNLQNQLNQEREVGSNDRYISQTEKDNLIQNVNNYINNEYLWTDGTSNEGQRLTAAKNLISDTLTNDQKRAAQDAINQLIKDANDLLNQARDRAANQGVTQTEKDNAISNVKTVY,51.616257412346954
65
+ PVPVPVPVRRPSNTQLDSPGHLRTLLDRNHLPPPDTQLSPDNRLLQDNVPGSGRPLPERTRLSPDRRTLQDFPVHGRDLPEVHRDHGLPAPDHIPPGYGGFLTEAQRHKEWFHVSDTHMAPPDGTSYPIARFHVSAGRPGMPAPDRYFAALGGAQGMASHMHGSGMHSSHGMHGSMGMHGMGHGMFGGGAMGPVFFIVAALAIIIAIGVAVAAKAGGGEGA,52.07059943074766
66
+ APFAICRRCRRRRGLPVCARRRWRRRRGNIWCAVGSGGIWRPCCRCITRITCRLRVSAAWRICCAGCRGRTCCGSFWWSTTCGSRACTARWTPSPWRSTGRCTRGAWTRWWRCARSTATPSPATASTWRAGWRAATPCGSATSTTSSCRCSTAW,52.5490203150644
67
+ LVLFAPTFNLSDPEGTVFATLVAIATAVGGYVAIPISGIDSIAGGVVSGYAVAKAGQFTNALKTTAMGAAVGEILGEQLYFGGFGPLGIVAGLITAGAIHKWLVMNKVSVNIYDAIGGRRFEVVLAVMIVTGLIMSFFVPAPVGGFIDNAVSKVGQSAAIGFITDSGSTLLANGINPVIAIGFLFAMAGVLIGGFKVASAQMGTLMGAVAFITGAFGFAVHFGANMVGVGALIAGRFTGRAFSDKVNETWPAVTDAVNNRYRTMVNVLAGSVVGAIFGL,52.5972401908542
68
+ MAAIHPPNLSFLPKPSAIHLFAFWTGSMGCLCPLLLGSQPILWASTALLLGTLQLGMGLKASLYPSPFPSHHLFQTTNYFLSFFLPFSLLSYASFFPSTLFPPGAIVTLTGLTLHGVSAYTLGGATGAWINYNTNHIFTAENGTVTGIKEMDTYSMVTANRFWSQVFQILFWCTNALALATHFSRIWTISRAEKHQLHVEEEHHHTAAEMVLAENIGIKTLTDYDDDDKMISYYRKDGVHHMHVEDAELALKLQEEEDLKNKKN,52.98995197391265
69
+ GLFAVIVEIPFSLRLPSVVQAKGSFSDSLFSHSAYPVVQPYFSPETLFGFDILLPITGEPVSRGLYTGHQPLLVVGVETSFLLTVETRLTGEVYSKGGRNSWDIQNCNFFGSDGKKYSLPAFERKKVKDVKCVDQDGVFSEVILERTHTSFTLKYTLPDSEWLIHSRSQLVKREDSNMGRPRKHLSSLVARNSSFEATYQRVSEKETEVSVQFGFSVGWKVIYLFLVKHFPFVFHWISNVLFYLLLNTLFAYIPDFSTFDCLAFLVTL,53.005620188296234
70
+ LTPRQRMWYGILSTAVFLLASEGSFFAISLTALVSYAYYQSILAQTQPAAAPAISAGFAFMLGVVIFGWVVLGVIQALINAISEWIRALVINIYSRTVFAPYVRALSHTPEGVRVINLQSSQLAGLFVNEFVKGFVDGLALIASLLVSLLISLWMGGVLGLIVFLYFCFRVMRQVGENMGRLREAQGQMYEQTLGLVEGLKDIRAARREEVYKGRIESLFGELAGMEVAGAKVQAVSTLMMRVVTQVAYLCMLWVGAYGVFHGDLS,56.40990415587325
71
+ MNINQLVLKAREENKQHENFQQGRLNLRYQEISKIEYLNRCRKLAINGNRIQRINDLQFFYHLTYLDLSNNLITSIENLHCLPLLRNLNLQKNLIGHITGLETLVRLEYLNLSHNQISKLENLECLVNLERLDVSHNHLTKIEGVCFLKSNILKELNLESNLLQELKFCEHLDYVTISNNNISSFSQVCYLLEHMPRLKYLSFTGNPYEQKLKQYRMVVFSKLQYLDGFVITEEELCRGSEVVDWIDSGSEFQRFRYCVINFLKDENNRT,58.18325968813114
72
+ GWVRQLPVYKRFAPFLSKFTLVTSLIAVGAGSGATYIQNLRKPRVRDKIVVHTVPLTPEMSGGKRFSVAPPSGIPHASHRMIPIERQREEDAXRERALRKKMLRRTAMLASGAFCLVLFVALGATIGTLRSEGVLKKDEFIPRPAIVGADGKAYDMDHPYAPPVKYQVQWEPKMGEKYYFHDYAKHHPNDNPENPYNKVAARA,60.55516244953947
73
+ MSASLFQTQGNYLVAAAISLSGLFMLVGLLAGSPRRPTYRWLLASVTLFCVAVSYFFMLSATTLEQGLVVKTNTGERALVDAVNGSVQYADGHYEIEATLRNLGSQPVRVEISRLQVVGEKMFGDIQSRTVEVGPNETRQVKFLLNRVLTSSANFRDRVLFVITDAQGNRQFIEVPVAYQYAQITGLLIALAWLAVIVIGFPVAWRSRMRIASGNRPVASGPQIAYLTALLFAATWTLVLMIAGTQIIGSQAGL,60.895889009456674
74
+ AQTTLNVADNSGARQLMCIRVIGASGNCSFVNQQKCTGICGCTRNATPIESEEIFDCIMKCGGQPGDCEVFQTHQCQQRMANNAHHYRRHWLSHTDFCVLPEHFHLDQDRHFHFQQHHHNWHHGHRHHHHHHDFHFGKFFETFAAPFASIFGGHIHGGFEKFSEMLANGFGGFDMFFGGFGGHGHFGGYEQEATSFKILASVVAAILLIAIAIPLGWLVKSQVSGIKVITTTTSGANQIILMKTVVAIATILAIAIAIPTG,62.10915337352313
75
+ GKRKAAVSRAAKLAATRAVPFARAAAIGPYAAIAIAGTKMAIDDHYKKDREKNREFVFNQWMSRKQLYDYKRKFWMFGPEKMKQLYEESGAKGAEAFFKENAETFKKIRDEYLVDLKNGTANPLTGEKVPLNPALPEDIRFPKYTPPPGLVPEGENPYYIPPPGYVREAERAGMPPPKKREMRMRPAGSEPGTTFGGAGYNPFAADPEYPHTAYAXXXXXXXXXXXXXXXXXXXXXXXXXXXXKRKAALSRTARLVATRAIPFGRSAAIGPYAMTAVAVAKMAYKDD,62.61105811999597
76
+ GWVRQLPAEERPVVLDRDEIELDPPVIGMGRNLAIMAVSVFLFMILTAWFALGEIQESEIARGTLRADRTLLDRTFIPITERGVFTTLDSRWALADVEPGELVWIAVDKHPATLQPGQSVQVYVRAVNDKPDNSVITPYRAVFAEIEREGFRWIVSVDQERFDQFRAHVTESLRLVNRGEALVGADGAPIPTIDLEATPGLAPDIPVTLRFEAEDIDWRILDQSQVQVARANVASADVSQPGWQEVELTAVAPWQAGKT,66.68297956107664
77
+ GWTLHPVSLYFSNHLGYVRLYQLWLTSVDKKSTNAFYHEVSDSQRKLVKRITRMELCFLGVMTLISLASLAIYAKFDQTSLPMLNKVFPRQNDIVTPVKFSLSASFFVFFLLLACFLSHAVNQVAKLASFCSALEDIQEFYVRIREELDSLRSYVENLEKRSAVSEEKLRLQASQTEMLLKRLPSFSSFCLLTLDRPILLSSHCPSLLPTVKGILNRGYKLSVYDPPPFQLGLCKDTHISDTQIYYNNGSRLEGATFHL,71.44388492712908
78
+ ALVPSDVSDQAEATLAFARQNLAKIEPEKIEIKQEPASGVNPADQPSQLDIYLTCTLKNEIRAPPGTTMPQLNFLRNQLEKNLLVPASQRDAYIQANPQQTLILDQPSPLTPEQKEDLAQLTITYGKNNLEVNTQRWPLPSLQVAMQTLESGEAHLEYRIHALPKAAGQPPVPVLKLVSKTTLPATAPVPNTASPTLSVRLPPRRTPPPPPIADEDLDDSPIVRDSRTLLKILLPTVLALVIALIAWRLWSSFTSHRIEAIATVPLPSATATPTP,72.99576740691371
79
+ MATPSFVSEPFAGLTPRQRQAIAAAMRSSLGYVQESVALNRMYSSALQGLVKPAGNAATIVASTGNVGTALSTLSGIQTAFSQYLKGKGSLVGSATNTLIAAQGKLISDLGSLITQEKTFMDSISKKLISDMDIAVSRTQTINSEVTKLTQERNALVAQLEAARKDADSAQKATITTELSNIIGTVAGAFLTAGFTAGIVLSIWELWAWGATLAAIAVGVGILLIIYATSRSSASNRKAELDAANSNLQNAQSTLKSDQQ,76.45022543295501
80
+ TPGLIDKLLGGGVQLPPGLLMALAVLAIQLGFIALIGKRVQFGAVARRYKIDAPETSLITAVLLGLAGYLAIFFAMRGMPWSATGELRWISGPHLNPLTFTAKFAMCALIVVPAAMRGWWAFSGPGADERSRHNARYAFWGSIVAVTALVVEGFLIMAPSLTEARFSPFYYARLLTYFVVTTALLVWTTVRESETPGRTLMGFALFSAAMVGLEMLSFTRFAVQFPTWWNVEVANLMYFGTMMIVLGLFFAMGGNIRWMVAA,77.96717901193621
81
+ MRYFKIRSTTLLIYLAAISVCALSICAPGFITPDEPAHFNYIRYLADHGQLPRIDPYAYASWGSTLSSLSYEFFAALFSWIPLETARSTVIFFAILNAVIIFATARRIAARYGSTGAFAAAAVFLLSPRVLAQSSFNNYDSLGIALMLAAWIFYEKVLTEKRLLPAVLSAVAVSIALLTNYQGYFIFAAVLLFSLPFPKLFFSRKNILFSAGVLSAAVIAAGLFAVFYKDLFLYSVFDVRLMSVFKMMTHQYPFSDAMTIYGGYFTVLF,81.23947975232642
82
+ LSPDLVAQLKAKTGVSYKEAKEALEATNGDIVAATIWLHEQARTSTFFFFFFFFNLVVGMGLFGPDRPLWLPGHALRLQPRHGLPGHRAAGLHRPRALPRLRLRPLPRLPRLRQGRPHLRHAPARPLRQPRLLPGDHHGRLLPRPRRLPLRLRLRHPRLLLRHLRLRPVLLPRRPLRALRLPRQPRRPLLPLLRRLLQGHPHRGARLPLRPVRRHLLHLRVRPLLRRLLGLLRARVRPQLLRPAPQRLHRLPRAQARPQGPRDGLLLPGQGPAQG,81.88453571822619
83
+ VGRINTAVTKVEGLKGVFDTASVFIIMRLILGALPGHDYFWHVATHKVLSTTWYELFSNVFLQVPSFITTFFMGAMLVQTMAQKSPEMQEFLKKGGIIFMTLAWFFFAPSGDYVVMRVISACTALVFIVTSMLEMNHVTPPPDTGLPRPIALCLRAFFYIGFLEWCVQQNFYAMCVLFFFMLGGVFTHYTALFVARYMKFFETFVPPIVHSGFSIAWMMWATQEGFITPMGQEPLLLTVLSVMVFFSVMSMC,86.07420147138896
84
+ MVYRITTIILVISTLTSFLIMFIPLTFRTFHYVMAFMVLLETSMFMWWYFDMSTSSYWNQERVHYEENGVPEFSLSFWSGLMFQMASVCYTYGKVYLSALRFGDMDHVQGQFIDLSNHFAMKTGLNPNDFKMRWPIQLMHNIINTMVEETEKLNAKQQREGITAEVEGEGRPQTFYEIQMLWHCITIILDELKRCTTVSNAIMTKETVDRMVHLCEKGIIPPDLEDFVFKLVFFTPPFEMILNFAI,92.19736060379442
85
+ MFSKLSLDAVPFARAPQWQRHLLRVACLISLFSLAYLAIVIAADTTNSIFTVGIGILLAAGVWFYWRDAVREELSHNPLGTRAAGIILGSGLVMLGLQLSAHLTGTWGYVTPTTFRWLAIMALAWPAAFLALRLTRDEEPVSEAMDNFDRAMAIMLVVSLVLWTFSPLLRGAVQHLHWLLFADYCFVVVDVVAVVMIYHMVRFLLAPLRETHPDAAQAIARKADAMVLWLFLWALYPIAYLVPAFVWGFHFPEGSIW,94.53532369154783
86
+ MVIVAIDRAIKATTILISPLIVIDLISAFIIGFRYQLVHDVIARICFAMVFVYYFALFFEIYYSRHFQGYQSALIKRCFLTLVPWLIYGPLLLLYRPVGDWYFPMTLLAFTIFALLAKRFVIEEETRDVMLEKERRMHFFAMVLFVGAMAIAFALSHFGVLEAFMPYRAFWMRGVTLIYFTSFYLVLLHHYGLREEIAFHKRGEVKPYPAYLAYTVINLTAWAVFFFFTHYAPTSAFARWWAWANFICIPFYAIG,95.44574656037318
87
+ MLLVFFAVMAGLLYGWWLRGSPKHARVTLGFYFITFFLLALLVWTHLGPSQAWSGFSVTLNRFYFWYLIITANAGAVLSAFGLVHRKTYVPEAERKRISLQFDAVFLILWLASALVCTFVMCEYLRWGWTGTDTLFGNHYLTPVLGPLLFWEWVTGLGLVVFAVLCWIYVRKFHYHDNLTARFAYSLLFVAPLIYLWMWVAGHPYQLAWTQDTAWLQSMGYWNGYPFMNPAHMIAFLGAGALFSLAMVAHGFRSERDGY,99.45883530953378
88
+ FGKDVVPVAATMVPFFGAIGFVLALRQPHFYPPAILIHGFIAAHFIGLYGENDFGEDFVPYFVAGLFVFWGFVAFILNVYFPPTPQNKRTLREEKYHEQVSVLTQAAIDGQEPQEIEVALGQVQANFDTAKSALEADRLIANQKLRAAVSTAATLIVMTAVIVGVHSQYDLLGLVLAMAAAISTLAGLYVFVGVSRAVLTFFTLRRGKTDEFLADADNFLKRNPVPVAALSALAKGHRDQAVAAAQSAIDNINPNPTTSSSSSTSASSSSSWAFDPLG,100.05271078832499
89
+ MRCGGTMPSTRSTTTGRCTRGWRRRICGTGTATSWTRPTGCTSRSRTSTTSWAGSPTCRTSRTGSSTPWTCPCTCTWRTRSSRSSCRTSPSWTSRRSTRSSRCTKTCWWTRSSRTTWCISGRCSWRSGSTCTRALSWSTWGRCATRRPTWSARSTSPTRRWSSACTARSGRRWCSTTPTSCTWGSCTPARTTGAWKPCSGSTSSPTPTSRPQARCSSATWSRACPTWRSTSSTRRRTSSAATPSSGPW,101.66820225573242
90
+ RIHDTILPFLMLGVGAFLSGIATLIEKSPNIMKCLPLLLTIGCCIPFLGWVSPIVLPFFSMKTQTTLSDGAIYGNSSISRVYENGIVEETQYVCGLNIFTSRIEVDGDFLFPKYYAPTNDTELQYVTEIPTSAHGTNPAELNATRKNLLNTLGPRYTLVLTDTDGVVRDYVVGNIPQGSPSPNLRYKGLRLELAVDQLPAYTISPPDGTSAFTFINKHWLIDIPTTLISETMVRKLVKAAGPLGPAYIVITEQSPNPIVATAGQAR,103.31987729217545
91
+ CRFGTCTVQKLAHQIYQFTDKDKDNVAPRSKISPQGYVNPNNEPTSYISPGHLRTKKSNMIPAKEVTRIDPNIVPNPNVQYPNLPAPYMYSGRAKRRRNLGLLMGRPNENPDNQHEMQDGYESAAYSNSYKGTYGKLTRWTSRWINNHYIDIERKVHFKDGRIFKTRAESSRINPKIGDFKTTKYITRGEKEALGFKIGGRLLLRPSSKLEKNFTVTETRTIRNGYTTTISRTIRWDDLEKCPLGNCAVGDLVTIDVTD,103.52201921223038
92
+ MWWWRLLVVALLRIGLALEDPARNPCSRVFFEGLTGCQQKVLRAVYPDPSRCLKACSEMKEAANSWGTRYAVATSVLGLEWLAYSWIQDKVACRCRGLSIPPTQKPSLFEKLLHSPLLLQGLQRAAEPVLGFFTQATQALKEAVWSALQWLGGQAGHILAFSRHFAFCLMAFSTLTLLSVCCNWWAIRRRFHQLESVTEQLLRCQQYVLQFRAVSRRHYISWALQLYFAHAFILRACAQLVSVLTTVSNMVSDSFSL,113.06700569292313
93
+ DPLSIILGILAGLFLIILIVLYFCGPYCTCIKRTGCCGNRWCYRWRCCCRRRRWRRRCCRWRTCYRYRWYSTRVRKCVKVPVVKTYKYRSKCGTCYGYVTRTRKVCCSYSSSSKKVCYTACRKKVYKTRYTYKVKVRNCKPCTKYRTKTVCSKCSYKTKIRTRTYKVRVAKCPRKSYKVVTMCKKKPSYRTCSRTSRRSRKVCLTCGSKAYRTKRTIKVPVKKTCSRKVSYKVCARTSHRTH,118.16791808043538
94
+ MQTYNNPEVTYFDRSQTDVEYGWWSGNSAWKNEQWLVMTKEEAKEFFRRSWIKLLDAFLPTTYIIVRWYQMYNYGCPLWCEKNDGKAHCKDWDYHPTCGKGPWWANNPLPTVKGQCEVYTTHRAGSSKECRSYYDLTSAQKAALQSSDCKATTGVYPFYSQAGTCRLNADYPLEKIPEGICNVHLNHKTRASHWGDPDIPTSQIWYFAAYDQAEKEWRTLSGTLEHTWVALSHEDYQRLVDIESKVPWSVSP,119.47290840438525
95
+ VQHFTGYVEDGRGIFYSLPMTNKGLDRIMLCIAVIVAFGMLLCPLASLYFSSEPVLVREDIFSALRTLSIFAAVWQIADVLRRTYVVVSKNPLLLLGLALELTFYTVYFGLDKLYPYPLAVFLPLQFGGILLRHITSIYLQAVSSRNESIIAQLRREREREARRTRERNIAQKRRIDAALWRQMSAVVIFLLLWLIAFTSSALALYNNLLASQQLSIAGLTPSQAASLNTGELLLRVIYGLVISCSAVLFTMTVEARDKIMHD,126.04829832003558
96
+ MLARFFRRQRTASFSLATVVALSALALHTSGMHRPALYASATAVHAITLITLGVMYARSMAPRAEGADHDLRHFMTAYLVTALAWPLAMVLTFALTHFLPGTDPLVPDRTLRLVTLINLAFMASATAHFAFALHTGWNVPRAIAATIVVFALVALTAWLIEIFTGGSTHWSFWAVLIASAAIWLGLALYFRRHAAAIAAFERRHNAQILARFIAAQDETHEQAGGGARSLAHNLDSPLTAAALFADDLSGKVDAPVREHLRLIRRSAND,146.856677830673
97
+ GWFDAMLASVSEFAPIFVVLIIFIVRVYKPFGSEWIVHVLHIADKRPGLNALIHRLLPRTAVHVPQAVKDKYVFLNSEHCIQFGCRHDPVPQYLELLSKGTYSLKVDVWFKHDRAREFYNMLLNEAQTASENHHASKIRHWTDEKMSELFAMAKKAYIPLNETREHSHDKAHSHNHAHSHNHSHDHGHSHEHTHGHDGVHAHDHSNTSDAHVHSHKHLHLHVHVHDKKNIIRRMNSALRKMKAAGVNTHEVAHVHDETTP,166.7331315912073
98
+ GAPPITGEALEKDISRREKGVGGFLSRLFSLVNSTNPFAVGVEGEKLLEEIENIRDSMGHQTAAQLYFAQQQSLLQAEYARWQESHNATLQATKDHIFNAQLGHILMLAGAVVCYTAGLRAWAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXFLAITNTAINTYFGGSITQSLEQVSAAVEHAKKVGLLSQETVGQIEAAYNSATGKALSYNNIADTEAAFQEFSRNHVLRSQLDKENADAAWTRVQSEGASLRAYLDHASRAITSALNGAIFAFGGP,169.6224292611664
99
+ MSAMSVQIDRLQDQLNHLGELVAQNSKVIAALTQRIQVLERIVTERLRIPYIPLEKRTALMFPLHDEEKQSEITLFINAELHLGTAPGKHKVYYTTVEEMIQHFKEGKCLPQNWPQSDNPFWPCYRELADEMKSNTAAYNNFIKMQDEMRKLCIMLSSGVYHISRNPGGAKDLYTDPKLFIQIYTNECLRNAIPAEILDQMIIDLYANYTEADIHNMAEVRASRNFNHLEKQYMHKLLKLKKTLPFAIQASMDVVL,187.47480920419864
100
+ AAGVAAWLPFARAAAIGWMPVASTAPRAMTATASWPIWMIWAMAMPAMTGRRWRRSRWATAPMARSSGAAPMMARPVMTAMPVAAATIRPSALRSISATASATAGSASAMTRSAAMSPIRTIWRRCSRARRAVASGAWSAISATSSARRRTTSTARALSCAAMASGLPSASMKAAAGGGSSNTMPRCSGSSASRTRACGTASPSCCRAATAASASARAARARSCRASSRARSAIWRAMSVRSRTWSRSARLRRSTSRPSMRSATAA,216.8557544805602
101
+ EKKEVCSVFLTNRVPLDDKRFRRERVYLPGESPFIDPDLFLSREHPLRAQVRGTIIEWLRASIYGIYPYPEQRDPNLWCTERFKQEVMPDGHCEPTLGFVPLTFSTCLTRDMIAASSYNWRKTMEVPGAKMLLHVGPLGTGGHYDYAFTFLQPDNTFAYVKGNKLVRQTKIWNDAGFQLVTEEATLLDAQEYFGAANKLGVCIFCGNCVEYCPTNCLSMCEEVLPRGNALQESWTILERVFMPEDPEHENFKYRRLRTSDGAKFINYTS,520.388790480398
benchmarks/Generation/ProtGPT2/protgpt2_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
benchmarks/Generation/ProtGPT2/protgpt2_train.txt ADDED
The diff for this file is too large to render. See raw diff
 
benchmarks/Generation/ProtGPT2/run_clm.py ADDED
@@ -0,0 +1,657 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2020 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
18
+
19
+ Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
20
+ https://huggingface.co/models?filter=text-generation
21
+ """
22
+ # You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
23
+
24
+ import logging
25
+ import math
26
+ import os
27
+ import sys
28
+ from dataclasses import dataclass, field
29
+ from itertools import chain
30
+ from typing import Optional
31
+
32
+ import datasets
33
+ import evaluate
34
+ import torch
35
+ from datasets import load_dataset
36
+
37
+ import transformers
38
+ from transformers import (
39
+ CONFIG_MAPPING,
40
+ MODEL_FOR_CAUSAL_LM_MAPPING,
41
+ AutoConfig,
42
+ AutoModelForCausalLM,
43
+ AutoTokenizer,
44
+ HfArgumentParser,
45
+ Trainer,
46
+ TrainingArguments,
47
+ default_data_collator,
48
+ is_torch_xla_available,
49
+ set_seed,
50
+ )
51
+ from transformers.testing_utils import CaptureLogger
52
+ from transformers.trainer_utils import get_last_checkpoint
53
+ from transformers.utils import check_min_version, send_example_telemetry
54
+ from transformers.utils.versions import require_version
55
+
56
+
57
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
58
+ check_min_version("4.45.0.dev0")
59
+
60
+ require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
61
+
62
+ logger = logging.getLogger(__name__)
63
+
64
+
65
+ MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys())
66
+ MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
67
+
68
+
69
+ @dataclass
70
+ class ModelArguments:
71
+ """
72
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
73
+ """
74
+
75
+ model_name_or_path: Optional[str] = field(
76
+ default=None,
77
+ metadata={
78
+ "help": (
79
+ "The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
80
+ )
81
+ },
82
+ )
83
+ model_type: Optional[str] = field(
84
+ default=None,
85
+ metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
86
+ )
87
+ config_overrides: Optional[str] = field(
88
+ default=None,
89
+ metadata={
90
+ "help": (
91
+ "Override some existing default config settings when a model is trained from scratch. Example: "
92
+ "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
93
+ )
94
+ },
95
+ )
96
+ config_name: Optional[str] = field(
97
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
98
+ )
99
+ tokenizer_name: Optional[str] = field(
100
+ default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
101
+ )
102
+ cache_dir: Optional[str] = field(
103
+ default=None,
104
+ metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
105
+ )
106
+ use_fast_tokenizer: bool = field(
107
+ default=True,
108
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
109
+ )
110
+ model_revision: str = field(
111
+ default="main",
112
+ metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
113
+ )
114
+ token: str = field(
115
+ default=None,
116
+ metadata={
117
+ "help": (
118
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
119
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
120
+ )
121
+ },
122
+ )
123
+ trust_remote_code: bool = field(
124
+ default=False,
125
+ metadata={
126
+ "help": (
127
+ "Whether to trust the execution of code from datasets/models defined on the Hub."
128
+ " This option should only be set to `True` for repositories you trust and in which you have read the"
129
+ " code, as it will execute code present on the Hub on your local machine."
130
+ )
131
+ },
132
+ )
133
+ torch_dtype: Optional[str] = field(
134
+ default=None,
135
+ metadata={
136
+ "help": (
137
+ "Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
138
+ "dtype will be automatically derived from the model's weights."
139
+ ),
140
+ "choices": ["auto", "bfloat16", "float16", "float32"],
141
+ },
142
+ )
143
+ low_cpu_mem_usage: bool = field(
144
+ default=False,
145
+ metadata={
146
+ "help": (
147
+ "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
148
+ "set True will benefit LLM loading time and RAM consumption."
149
+ )
150
+ },
151
+ )
152
+
153
+ def __post_init__(self):
154
+ if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
155
+ raise ValueError(
156
+ "--config_overrides can't be used in combination with --config_name or --model_name_or_path"
157
+ )
158
+
159
+
160
+ @dataclass
161
+ class DataTrainingArguments:
162
+ """
163
+ Arguments pertaining to what data we are going to input our model for training and eval.
164
+ """
165
+
166
+ dataset_name: Optional[str] = field(
167
+ default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
168
+ )
169
+ dataset_config_name: Optional[str] = field(
170
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
171
+ )
172
+ train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
173
+ validation_file: Optional[str] = field(
174
+ default=None,
175
+ metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
176
+ )
177
+ max_train_samples: Optional[int] = field(
178
+ default=None,
179
+ metadata={
180
+ "help": (
181
+ "For debugging purposes or quicker training, truncate the number of training examples to this "
182
+ "value if set."
183
+ )
184
+ },
185
+ )
186
+ max_eval_samples: Optional[int] = field(
187
+ default=None,
188
+ metadata={
189
+ "help": (
190
+ "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
191
+ "value if set."
192
+ )
193
+ },
194
+ )
195
+ streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
196
+ block_size: Optional[int] = field(
197
+ default=None,
198
+ metadata={
199
+ "help": (
200
+ "Optional input sequence length after tokenization. "
201
+ "The training dataset will be truncated in block of this size for training. "
202
+ "Default to the model max input length for single sentence inputs (take into account special tokens)."
203
+ )
204
+ },
205
+ )
206
+ overwrite_cache: bool = field(
207
+ default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
208
+ )
209
+ validation_split_percentage: Optional[int] = field(
210
+ default=5,
211
+ metadata={
212
+ "help": "The percentage of the train set used as validation set in case there's no validation split"
213
+ },
214
+ )
215
+ preprocessing_num_workers: Optional[int] = field(
216
+ default=None,
217
+ metadata={"help": "The number of processes to use for the preprocessing."},
218
+ )
219
+ keep_linebreaks: bool = field(
220
+ default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
221
+ )
222
+
223
+ def __post_init__(self):
224
+ if self.streaming:
225
+ require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`")
226
+
227
+ if self.dataset_name is None and self.train_file is None and self.validation_file is None:
228
+ raise ValueError("Need either a dataset name or a training/validation file.")
229
+ else:
230
+ if self.train_file is not None:
231
+ extension = self.train_file.split(".")[-1]
232
+ assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
233
+ if self.validation_file is not None:
234
+ extension = self.validation_file.split(".")[-1]
235
+ assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
236
+
237
+
238
+ def main():
239
+ # See all possible arguments in src/transformers/training_args.py
240
+ # or by passing the --help flag to this script.
241
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
242
+
243
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
244
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
245
+ # If we pass only one argument to the script and it's the path to a json file,
246
+ # let's parse it to get our arguments.
247
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
248
+ else:
249
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
250
+
251
+ # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
252
+ # information sent is the one passed as arguments along with your Python/PyTorch versions.
253
+ send_example_telemetry("run_clm", model_args, data_args)
254
+
255
+ # Setup logging
256
+ logging.basicConfig(
257
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
258
+ datefmt="%m/%d/%Y %H:%M:%S",
259
+ handlers=[logging.StreamHandler(sys.stdout)],
260
+ )
261
+
262
+ if training_args.should_log:
263
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
264
+ transformers.utils.logging.set_verbosity_info()
265
+
266
+ log_level = training_args.get_process_log_level()
267
+ logger.setLevel(log_level)
268
+ datasets.utils.logging.set_verbosity(log_level)
269
+ transformers.utils.logging.set_verbosity(log_level)
270
+ transformers.utils.logging.enable_default_handler()
271
+ transformers.utils.logging.enable_explicit_format()
272
+
273
+ # Log on each process the small summary:
274
+ logger.warning(
275
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
276
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
277
+ )
278
+ logger.info(f"Training/evaluation parameters {training_args}")
279
+
280
+ # Detecting last checkpoint.
281
+ last_checkpoint = None
282
+ if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
283
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
284
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
285
+ raise ValueError(
286
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
287
+ "Use --overwrite_output_dir to overcome."
288
+ )
289
+ elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
290
+ logger.info(
291
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
292
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
293
+ )
294
+
295
+ # Set seed before initializing model.
296
+ set_seed(training_args.seed)
297
+
298
+ # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
299
+ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
300
+ # (the dataset will be downloaded automatically from the datasets Hub).
301
+ #
302
+ # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
303
+ # 'text' is found. You can easily tweak this behavior (see below).
304
+ #
305
+ # In distributed training, the load_dataset function guarantee that only one local process can concurrently
306
+ # download the dataset.
307
+ if data_args.dataset_name is not None:
308
+ # Downloading and loading a dataset from the hub.
309
+ raw_datasets = load_dataset(
310
+ data_args.dataset_name,
311
+ data_args.dataset_config_name,
312
+ cache_dir=model_args.cache_dir,
313
+ token=model_args.token,
314
+ streaming=data_args.streaming,
315
+ trust_remote_code=model_args.trust_remote_code,
316
+ )
317
+ if "validation" not in raw_datasets.keys():
318
+ raw_datasets["validation"] = load_dataset(
319
+ data_args.dataset_name,
320
+ data_args.dataset_config_name,
321
+ split=f"train[:{data_args.validation_split_percentage}%]",
322
+ cache_dir=model_args.cache_dir,
323
+ token=model_args.token,
324
+ streaming=data_args.streaming,
325
+ trust_remote_code=model_args.trust_remote_code,
326
+ )
327
+ raw_datasets["train"] = load_dataset(
328
+ data_args.dataset_name,
329
+ data_args.dataset_config_name,
330
+ split=f"train[{data_args.validation_split_percentage}%:]",
331
+ cache_dir=model_args.cache_dir,
332
+ token=model_args.token,
333
+ streaming=data_args.streaming,
334
+ trust_remote_code=model_args.trust_remote_code,
335
+ )
336
+ else:
337
+ data_files = {}
338
+ dataset_args = {}
339
+ if data_args.train_file is not None:
340
+ data_files["train"] = data_args.train_file
341
+ if data_args.validation_file is not None:
342
+ data_files["validation"] = data_args.validation_file
343
+ extension = (
344
+ data_args.train_file.split(".")[-1]
345
+ if data_args.train_file is not None
346
+ else data_args.validation_file.split(".")[-1]
347
+ )
348
+ if extension == "txt":
349
+ extension = "text"
350
+ dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
351
+ raw_datasets = load_dataset(
352
+ extension,
353
+ data_files=data_files,
354
+ cache_dir=model_args.cache_dir,
355
+ token=model_args.token,
356
+ **dataset_args,
357
+ )
358
+ # If no validation data is there, validation_split_percentage will be used to divide the dataset.
359
+ if "validation" not in raw_datasets.keys():
360
+ raw_datasets["validation"] = load_dataset(
361
+ extension,
362
+ data_files=data_files,
363
+ split=f"train[:{data_args.validation_split_percentage}%]",
364
+ cache_dir=model_args.cache_dir,
365
+ token=model_args.token,
366
+ **dataset_args,
367
+ )
368
+ raw_datasets["train"] = load_dataset(
369
+ extension,
370
+ data_files=data_files,
371
+ split=f"train[{data_args.validation_split_percentage}%:]",
372
+ cache_dir=model_args.cache_dir,
373
+ token=model_args.token,
374
+ **dataset_args,
375
+ )
376
+
377
+ # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
378
+ # https://huggingface.co/docs/datasets/loading_datasets.
379
+
380
+ # Load pretrained model and tokenizer
381
+ #
382
+ # Distributed training:
383
+ # The .from_pretrained methods guarantee that only one local process can concurrently
384
+ # download model & vocab.
385
+
386
+ config_kwargs = {
387
+ "cache_dir": model_args.cache_dir,
388
+ "revision": model_args.model_revision,
389
+ "token": model_args.token,
390
+ "trust_remote_code": model_args.trust_remote_code,
391
+ }
392
+ if model_args.config_name:
393
+ config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
394
+ elif model_args.model_name_or_path:
395
+ config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
396
+ else:
397
+ config = CONFIG_MAPPING[model_args.model_type]()
398
+ logger.warning("You are instantiating a new config instance from scratch.")
399
+ if model_args.config_overrides is not None:
400
+ logger.info(f"Overriding config: {model_args.config_overrides}")
401
+ config.update_from_string(model_args.config_overrides)
402
+ logger.info(f"New config: {config}")
403
+
404
+ tokenizer_kwargs = {
405
+ "cache_dir": model_args.cache_dir,
406
+ "use_fast": model_args.use_fast_tokenizer,
407
+ "revision": model_args.model_revision,
408
+ "token": model_args.token,
409
+ "trust_remote_code": model_args.trust_remote_code,
410
+ }
411
+ if model_args.tokenizer_name:
412
+ tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
413
+ elif model_args.model_name_or_path:
414
+ tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
415
+ else:
416
+ raise ValueError(
417
+ "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
418
+ "You can do it from another script, save it, and load it from here, using --tokenizer_name."
419
+ )
420
+
421
+ if model_args.model_name_or_path:
422
+ torch_dtype = (
423
+ model_args.torch_dtype
424
+ if model_args.torch_dtype in ["auto", None]
425
+ else getattr(torch, model_args.torch_dtype)
426
+ )
427
+ model = AutoModelForCausalLM.from_pretrained(
428
+ model_args.model_name_or_path,
429
+ from_tf=bool(".ckpt" in model_args.model_name_or_path),
430
+ config=config,
431
+ cache_dir=model_args.cache_dir,
432
+ revision=model_args.model_revision,
433
+ token=model_args.token,
434
+ trust_remote_code=model_args.trust_remote_code,
435
+ torch_dtype=torch_dtype,
436
+ low_cpu_mem_usage=model_args.low_cpu_mem_usage,
437
+ )
438
+ else:
439
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
440
+ n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
441
+ logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
442
+
443
+ # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
444
+ # on a small vocab and want a smaller embedding size, remove this test.
445
+ embedding_size = model.get_input_embeddings().weight.shape[0]
446
+ if len(tokenizer) > embedding_size:
447
+ model.resize_token_embeddings(len(tokenizer))
448
+
449
+ # Preprocessing the datasets.
450
+ # First we tokenize all the texts.
451
+ if training_args.do_train:
452
+ column_names = list(raw_datasets["train"].features)
453
+ else:
454
+ column_names = list(raw_datasets["validation"].features)
455
+ text_column_name = "text" if "text" in column_names else column_names[0]
456
+
457
+ # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
458
+ tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
459
+
460
+ def tokenize_function(examples):
461
+ with CaptureLogger(tok_logger) as cl:
462
+ output = tokenizer(examples[text_column_name])
463
+ # clm input could be much much longer than block_size
464
+ if "Token indices sequence length is longer than the" in cl.out:
465
+ tok_logger.warning(
466
+ "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
467
+ " before being passed to the model."
468
+ )
469
+ return output
470
+
471
+ with training_args.main_process_first(desc="dataset map tokenization"):
472
+ if not data_args.streaming:
473
+ tokenized_datasets = raw_datasets.map(
474
+ tokenize_function,
475
+ batched=True,
476
+ num_proc=data_args.preprocessing_num_workers,
477
+ remove_columns=column_names,
478
+ load_from_cache_file=not data_args.overwrite_cache,
479
+ desc="Running tokenizer on dataset",
480
+ )
481
+ else:
482
+ tokenized_datasets = raw_datasets.map(
483
+ tokenize_function,
484
+ batched=True,
485
+ remove_columns=column_names,
486
+ )
487
+ if hasattr(config, "max_position_embeddings"):
488
+ max_pos_embeddings = config.max_position_embeddings
489
+ else:
490
+ # Define a default value if the attribute is missing in the config.
491
+ max_pos_embeddings = 1024
492
+
493
+ if data_args.block_size is None:
494
+ block_size = tokenizer.model_max_length
495
+ if block_size > max_pos_embeddings:
496
+ logger.warning(
497
+ f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
498
+ f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx."
499
+ )
500
+ if max_pos_embeddings > 0:
501
+ block_size = min(1024, max_pos_embeddings)
502
+ else:
503
+ block_size = 1024
504
+ else:
505
+ if data_args.block_size > tokenizer.model_max_length:
506
+ logger.warning(
507
+ f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
508
+ f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
509
+ )
510
+ block_size = min(data_args.block_size, tokenizer.model_max_length)
511
+
512
+ # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
513
+ def group_texts(examples):
514
+ # Concatenate all texts.
515
+ concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
516
+ total_length = len(concatenated_examples[list(examples.keys())[0]])
517
+ # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict.
518
+ # We could add padding if the model supported it instead of this drop, you can customize this part to your needs.
519
+ total_length = (total_length // block_size) * block_size
520
+ # Split by chunks of max_len.
521
+ result = {
522
+ k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
523
+ for k, t in concatenated_examples.items()
524
+ }
525
+ result["labels"] = result["input_ids"].copy()
526
+ return result
527
+
528
+ # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder
529
+ # for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower
530
+ # to preprocess.
531
+ #
532
+ # To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
533
+ # https://huggingface.co/docs/datasets/process#map
534
+
535
+ with training_args.main_process_first(desc="grouping texts together"):
536
+ if not data_args.streaming:
537
+ lm_datasets = tokenized_datasets.map(
538
+ group_texts,
539
+ batched=True,
540
+ num_proc=data_args.preprocessing_num_workers,
541
+ load_from_cache_file=not data_args.overwrite_cache,
542
+ desc=f"Grouping texts in chunks of {block_size}",
543
+ )
544
+ else:
545
+ lm_datasets = tokenized_datasets.map(
546
+ group_texts,
547
+ batched=True,
548
+ )
549
+
550
+ if training_args.do_train:
551
+ if "train" not in tokenized_datasets:
552
+ raise ValueError("--do_train requires a train dataset")
553
+ train_dataset = lm_datasets["train"]
554
+ if data_args.max_train_samples is not None:
555
+ max_train_samples = min(len(train_dataset), data_args.max_train_samples)
556
+ train_dataset = train_dataset.select(range(max_train_samples))
557
+
558
+ if training_args.do_eval:
559
+ if "validation" not in tokenized_datasets:
560
+ raise ValueError("--do_eval requires a validation dataset")
561
+ eval_dataset = lm_datasets["validation"]
562
+ if data_args.max_eval_samples is not None:
563
+ max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
564
+ eval_dataset = eval_dataset.select(range(max_eval_samples))
565
+
566
+ def preprocess_logits_for_metrics(logits, labels):
567
+ if isinstance(logits, tuple):
568
+ # Depending on the model and config, logits may contain extra tensors,
569
+ # like past_key_values, but logits always come first
570
+ logits = logits[0]
571
+ return logits.argmax(dim=-1)
572
+
573
+ metric = evaluate.load("accuracy", cache_dir=model_args.cache_dir)
574
+
575
+ def compute_metrics(eval_preds):
576
+ preds, labels = eval_preds
577
+ # preds have the same shape as the labels, after the argmax(-1) has been calculated
578
+ # by preprocess_logits_for_metrics but we need to shift the labels
579
+ labels = labels[:, 1:].reshape(-1)
580
+ preds = preds[:, :-1].reshape(-1)
581
+ return metric.compute(predictions=preds, references=labels)
582
+
583
+ # Initialize our Trainer
584
+ trainer = Trainer(
585
+ model=model,
586
+ args=training_args,
587
+ train_dataset=train_dataset if training_args.do_train else None,
588
+ eval_dataset=eval_dataset if training_args.do_eval else None,
589
+ tokenizer=tokenizer,
590
+ # Data collator will default to DataCollatorWithPadding, so we change it.
591
+ data_collator=default_data_collator,
592
+ compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
593
+ preprocess_logits_for_metrics=preprocess_logits_for_metrics
594
+ if training_args.do_eval and not is_torch_xla_available()
595
+ else None,
596
+ )
597
+
598
+ # Training
599
+ if training_args.do_train:
600
+ checkpoint = None
601
+ if training_args.resume_from_checkpoint is not None:
602
+ checkpoint = training_args.resume_from_checkpoint
603
+ elif last_checkpoint is not None:
604
+ checkpoint = last_checkpoint
605
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
606
+ trainer.save_model() # Saves the tokenizer too for easy upload
607
+
608
+ metrics = train_result.metrics
609
+
610
+ max_train_samples = (
611
+ data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
612
+ )
613
+ metrics["train_samples"] = min(max_train_samples, len(train_dataset))
614
+
615
+ trainer.log_metrics("train", metrics)
616
+ trainer.save_metrics("train", metrics)
617
+ trainer.save_state()
618
+
619
+ # Evaluation
620
+ if training_args.do_eval:
621
+ logger.info("*** Evaluate ***")
622
+
623
+ metrics = trainer.evaluate()
624
+
625
+ max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
626
+ metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
627
+ try:
628
+ perplexity = math.exp(metrics["eval_loss"])
629
+ except OverflowError:
630
+ perplexity = float("inf")
631
+ metrics["perplexity"] = perplexity
632
+
633
+ trainer.log_metrics("eval", metrics)
634
+ trainer.save_metrics("eval", metrics)
635
+
636
+ kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"}
637
+ if data_args.dataset_name is not None:
638
+ kwargs["dataset_tags"] = data_args.dataset_name
639
+ if data_args.dataset_config_name is not None:
640
+ kwargs["dataset_args"] = data_args.dataset_config_name
641
+ kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
642
+ else:
643
+ kwargs["dataset"] = data_args.dataset_name
644
+
645
+ if training_args.push_to_hub:
646
+ trainer.push_to_hub(**kwargs)
647
+ else:
648
+ trainer.create_model_card(**kwargs)
649
+
650
+
651
+ def _mp_fn(index):
652
+ # For xla_spawn (TPUs)
653
+ main()
654
+
655
+
656
+ if __name__ == "__main__":
657
+ main()
benchmarks/Generation/Visualize/analyze_mdlm_denovo_gen.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ path = "/home/sg666/MDpLM/benchmarks/Generation"
4
+
5
+ res = pd.read_csv(path + "/mdlm_de-novo_generation_results.csv")
6
+ average_ppl = res['Perplexity'].mean()
7
+ print(average_ppl)
benchmarks/Generation/Visualize/esm_umap.png ADDED
benchmarks/Generation/Visualize/esm_umap.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ from umap import UMAP
6
+ from sklearn.manifold import TSNE
7
+ from sklearn.decomposition import PCA
8
+ from transformers import AutoModel, AutoTokenizer
9
+
10
+ path = "/workspace/sg666/MDpLM/benchmarks/Generation"
11
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
+ esm_model_path = "facebook/esm2_t33_650M_UR50D"
13
+
14
+ # Loads ESM model and tokenizer to embed the sequences
15
+ def load_esm2_model(model_name):
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+ model = AutoModel.from_pretrained(model_name).to(device)
18
+ return tokenizer, model
19
+
20
+ def get_latents(model, tokenizer, sequence):
21
+ inputs = tokenizer(sequence, return_tensors="pt").to(device)
22
+ with torch.no_grad():
23
+ outputs = model(**inputs)
24
+ embeddings = outputs.last_hidden_state.mean(dim=1).squeeze(0).cpu().numpy().tolist()
25
+ return embeddings
26
+
27
+ # Load a random set of 100 human and reviewed sequences from uniprot
28
+ def parse_fasta_file(file_path):
29
+ with open(file_path, 'r') as file:
30
+ lines = file.readlines()
31
+
32
+ sequences = []
33
+ current_seq = []
34
+ current_type = "UniProt"
35
+
36
+ for line in lines:
37
+ line = line.strip()
38
+ if line.startswith('>'):
39
+ if current_seq:
40
+ sequences.append(("".join(current_seq), current_type))
41
+ current_seq = []
42
+ else:
43
+ current_seq.append(line)
44
+ if current_seq:
45
+ sequences.append(("".join(current_seq), current_type))
46
+
47
+ return pd.DataFrame(sequences, columns=["Sequence", "Sequence Source"]).sample(100).reset_index(drop=True)
48
+
49
+
50
+ # Obtain/clean sequences generated from ProtGPT2 fine-tuned on membrane sequences
51
+ protgpt2_sequences = pd.read_csv(path + "/ProtGPT2/protgpt2_generated_sequences.csv")
52
+ protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('<|ENDOFTEXT|>', '', regex=False)
53
+ protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('""', '', regex=False)
54
+ protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('\n', '', regex=False)
55
+ protgpt2_sequences['Sequence'] = protgpt2_sequences['Sequence'].str.replace('X', 'G', regex=False)
56
+ protgpt2_sequences.drop(columns=['Perplexity'], inplace=True)
57
+ protgpt2_sequences['Sequence Source'] = "ProtGPT2"
58
+ bad_sequences = []
59
+ for seq in protgpt2_sequences['Sequence']:
60
+ for residue in seq:
61
+ if residue in ['B', 'U', 'Z', 'O']:
62
+ bad_sequences.append(seq)
63
+ protgpt2_sequences = protgpt2_sequences[~protgpt2_sequences['Sequence'].isin(bad_sequences)]
64
+
65
+
66
+ # Load MDpLM generated sequences
67
+ memdlm_sequences = pd.read_csv(path + "/mdlm_de-novo_generation_results.csv")
68
+ memdlm_sequences.rename(columns={"Generated Sequence": "Sequence"}, inplace=True)
69
+ memdlm_sequences.drop(columns=['Perplexity'], inplace=True)
70
+ memdlm_sequences['Sequence Source'] = "MeMDLM"
71
+ memdlm_sequences.reset_index(drop=True, inplace=True)
72
+
73
+ # Load UniProt sequences
74
+ # fasta_file_path = path + "/uniprot_human_and_reviewed.fasta"
75
+ # other_sequences = parse_fasta_file(fasta_file_path)
76
+
77
+ # Load test set sequences
78
+ other_sequences = pd.read_csv("/workspace/sg666/MDpLM/data/membrane/test.csv")
79
+ other_sequences['Sequence Source'] = "Test Set"
80
+ other_sequences = other_sequences.sample(100)
81
+
82
+ # Combine all sequences
83
+ data = pd.concat([memdlm_sequences, protgpt2_sequences, other_sequences])
84
+
85
+
86
+ # Load ESM model and tokenizer for embeddings
87
+ tokenizer, model = load_esm2_model(esm_model_path)
88
+ model = model.to(device)
89
+
90
+
91
+ # Embed the sequences
92
+ data['Embeddings'] = data['Sequence'].apply(lambda sequence: get_latents(model, tokenizer, sequence))
93
+ data = data.reset_index(drop=True)
94
+ umap_df = pd.DataFrame(data['Embeddings'].tolist())
95
+ umap_df.index = data['Sequence Source']
96
+
97
+
98
+ # Do PCA
99
+ umap = UMAP(n_components=2)
100
+ umap_features = umap.fit_transform(umap_df)
101
+ umap_df['UMAP1'] = umap_features[:, 0]
102
+ umap_df['UMAP2'] = umap_features[:, 1]
103
+
104
+ # Visualize the PCA
105
+ plt.figure(figsize=(8, 5),dpi=300)
106
+ sns.scatterplot(x='UMAP1', y='UMAP2', hue='Sequence Source', data=umap_df, palette=['#297272', '#ff7477', "#9A77D0"], s=100)
107
+ plt.xlabel('UMAP1')
108
+ plt.ylabel('UMAP2')
109
+ plt.title(f'ESM-650M Embeddings of Membrane Protein Sequences')
110
+ plt.savefig('esm_umap.png')
111
+ plt.show()
benchmarks/Generation/Visualize/mdlm_de-novo_generation_results.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generated Sequence,Perplexity
2
+ GEGQPTLDAEGMPKADEGKMMTFKSENFTDDSVENLVLTSYGVYNPVIFTDLVIRTPKEGAVVPPTVVLMNGEWTEVMPNLTGAETFDTQSKYLVNGLKRYGVSKKKHVQVYQMARRTKDLLTMIPDGMASADFSFEAPGRANTMPAVGLSMDSAVGQPNLSRLRGVDVFFRYIVYTADPFGSETQNLEVQASERTNILFLNQQKKKVKSGIVVQMQKGILFERFGEVMDGQRPSNQRVGSQDMLIGVGALVKLNQKKIRTRIIQLFNLGYDDSEAIDWLPTTVAYLDSTYYVAMTTIQSIWVTDYYGLQGLFPFNQNKIGKHGVEVKHVQYFLEFVEAYVDQLEDLFTEYNERNSKLSNSNAIQAITIAEYQQLKDQLQLLTTENPIVDSSMIALRIKKLDNSATRELVSQFNRDVERATPNITAAQISVLKDNMTILLQDELMHMSDLNGEAADATYTLQAARESLEQLTTAAEFAPEYLTIEEQDISDFKARMELLKEIVGSLSNRIESAVKNKQDKEGIQYAMYKRPNRIDILIKNINLKFKGIQFQIDSIVAKVRNMEAFIKALVYRLDNVRISLVQRVGNRRHLAKKEKEPETVLIVNLRDYRSTLILFDIMTNLRITDEGQPENILRMKPVLDNADIPTENERIPSLSMPLMVRYTTVVINLPELDEHKAPLGINIVVAKDAVVSRLEWEWEGDVFKNKPYRIKRAGYGPDYVRAGALAQVFIARSDTATQSIAVRKTANEKFLLRLPRLPGSLMGEVVLKSFATFHQAFGTGRNNVYQRDEDSDKKYNQTLIDYWFDLNRFFGLSQREEGVQMMLLVEEPFTAGILSKAIVFDDDKKSAFLMMARAFLVYLPLHHSPDAPLEVANNSPKNIRLNLQATIAARG,18.2131
3
+ WTTGWGVSQDLIDSASMSPGMIWILLVDSYKERWFGTYWWGTSTCKEGAFPFEDVMQRIELRILKKYFYYLAIISSVLTLLMIIAKLVTNCLSFANIHKSHRYFFCVNCFWFISQLCNDLSAFPVLKKLESATRFVIYPSPVKAVQLDTMPDKIVLYLIFLNIFSTHTVLVFQSMSLGLITGIIDIPTAKRIIVPNLGILVIKTFSSKNCKLSLLAPEMWPKCMYDYVAFKNIEAQIVITSTSVGAVLCLLLILKGSVFVSSSYMFVGGKPANPGTGTRMLLPKDDHFEHKFCHNFSNVEKISASSYAASPEESILLLVNKEEHNKLRVLAVVPKGARNVLVIEIMKLKPFQTTYNDLYLPRDENNQLQKNKKVVSVGKIVLKDPASWVYLPQGRLKMNFKKAYIKSGAAPILLSFGQRLISVDNAVPLAKMRTTGITVLEMAPRGSRVQAIVVLPGQLKCGKSETVYWFTVSSIDNNRRGIAKYMGGVTYRGRAFIDMDKNLAGPPLVSDAYQMLFNDWLEMLCGAMKATESEKVKSRKGASELRVIHRSHHGCIVAILDDLYRLRFDLVDIERIGMINEEGRINGKIRSFEFQNFMLTSKNDMKTGFVNMPESFKPRTILTGDLIDNDWAPSFDLAAIRHGNIQVLVDGNDLEGSEEATNCHHGNAFSLGPQGRKVVVGAVVAPKTATPKCNSISINLQELPANYVVLGAKALTAQHVANFSVNGTKHDKKTCKQHEEMWKMYIQKSGQISKQASIKQCAILLSLGKLRATVKWFLGKYLERIFHVLLKGCKTVETIIDKGRMHKIKLVKFRFGIWIFLSVVCLAELFCIMIFVAPEIVRGLHTLILFLLFMMLLLLNYADTGHEVTGVPYY,18.1991
4
+ GIISVIDLGAKLIVPGDLFFVLCGKNRPPTSGALQYTILHTKKKLFCCGPTHAKHICLINGECIRDGREKLQNLCKTSGKWTEVRRPKSTYSCSLVYRRVQRRFCGPQKARAPVWILYIVLLSAIGVIIAVTINWVLQVCIILGAVVANGFLIRVLSIVDTRNQIITRGLRRYGIYRNSVKVACTSGSVVIVRIKFMEDQISGGWRPASFIRTSFKEFLPASATAFSRLADCNEKLIEALV,17.785
5
+ RARILNRSLESARKYLDFLKIDKVEFYENEMTFRVFIEEAPEFKRMEITEDKIRLRLKPDKIRKFVELGNLFKYTDALQLAVQLEKQNKEELVIAEPEVIHALHKHHNHMPIFHLLEAFNDETVAEIILANIGKMPAFLFWVWNRMSDPTEDRGKGFNEKVKRKNPTIVSILDKQVYTIKGGFGAALTKSILGPLLATQNIKIKADYDESLADVFVGLHFVDGSIILRPWPITGKEVALSEEVTATDKVISASEVGSEEDKFQLTSIENNFTKTLLEIKKRRENAFEGAYTETGSKVSDKPVRELKANLKLIPEYQAERIDQTYWRKILRLSRSLISEPRGARAYLLRIGERVDPHRYRPAIGAEMLMGIPNSITTGFKISKTLGLQAAGLDLIQTFRSLSIRRMITKNFMAILIDKPGLKAVFWFIPLVPFIAVQLLIYGVLVGRAKPGNLVEIIDSMVDGKFETRPNGSPAASHKMVIGVKYSLIYPGNQAKKISLVRWNTALSKDVRGQAKEIDLWQLISYYLEKMRIGPSAVSNVFQSVHDGLKRNELAVLLIMDPKTRDDSMILDIMNLRIERGFVSLIKSYIHDYDEEVYMTYREILNQNVFLMKYEEYATMSADLEAYWLKSIEETNLRALRLPGAMRKQLFLANLCRISEHLDTPTEQDAFSNPEGITIDEGFTDEARAQGIGFVAGFVDEKEFEQRYALLAKVYIASLKALAAALADSGVKTGIKIGLGTVERIEVHKDGMMIDHVKMEGPGRFPIIVGGEVSPIVNGATIIPSFIKILADGKVDEGKSPNKTPTEKGEITPQSLYRGMGKTVVLNDNGGLQAHALTWAINDEYSYFVAMGTSNKSDEKQLAALSNSLDMTTFEDSAGRLFSSIVKAKTLSENGVITENGAEFL,17.7595
6
+ DIMLPKSPLFEEMATLGFIGHTLLAPIKPWTKATATMVGITGIGVTIYWGVPDMFPFSPTNHWWVKGMKAIVPSIIALQIIDLFYVVLTGLTSRFVYPIVATFYDHYFVNVQILVTGIACTLVYPETHGDVVSVDLLQCRTDGKLTQIPMEEALELINFIDQIMEKTKCKFNISEGYYEVLMTKKFIVKGGKGVGPDTDPEPCEKYATWGRLRDPTGPIRPNRAEKSQIAIYAVCGGAVLQKLGVPLLDNEAPIPSQLLIFAIGTVGIAAITIVALIFGGIDIAMSNPVYRPILAYSPHNKLLYPLSTWDVGYYNVPNVTSTYVVVVVPPALTIYSVANEAIKVETTPIPVKFAEILPTGETALLSSYPLTIAQTDLNARYERREADIFTKHEGMQQTFKGEVVPLVSNNRLKSPSGVQIGCAASLMTVPEEDGMTPPRIATTWFEGPYSAPASDPLMRMPGDFYGYGKGTMDGTVSNEMNGISHRPVTLATGGNVKFSPVMLPWYIGARYGLDIQHSGNRRMAHWLTSRAVMGFFKKNVARLADRVNAITLQVPSKDPDLNNRPNHAVMSRQWITAVEAIKELAICSLLNQFREGLGHKNDTIEADLNPFSGVIAQSSMAILKTAMGATRSSVAQLSMGMALEAFRHQMTGTAGIHYLMSVTGNGPGRISTALNKLDSPLPPAIAED,18.2088
7
+ MASLAAANKRRSITSIAVFLGIAAAVVVLGISGKVTDKNTVLDFSYGKNRGFKPQHLSEYVRSRYPAVVIAKGENLLRGGRFLPWVRPPGLRYVAYYDAIESWDIEDSLKELVDLRAQGLYADVETAAALKKASVSRSTITEMVGMHTNQSEPQFRTEQTKVAKSNGTMFVFATMSFTRGRYEIGSLMLSAIGKRWVEEPTPDTKMRYMKLRPSIQLLCRTGGIYRPIFQLGGPEGIFYHDGYGENSYQVDDFIWKHLERRA,17.9845
8
+ VYGLLTTGSASSAMATTTLMIGVLFGLFSSFALPLWSWLQYLTTTGITTATGARYFKNIMIEAFSSYSAAIIGTVSIVPLGSSIPAAASASAVGAFGAITGFALGIYSMLFKKMDSFTHWLFSAGAGLSAGLAGLISGIGAIQIGNAATATSGTAIPLFGLVLRIIVSSVQGLLGTIAAILLISN,14.5579
9
+ IGHVFHLLHMALPIWRPPLPVTPGEPHPRPIADLVTPAFEYKTLLRCPHPHVSPIFLSVVLWMIVALVLAGVMVAQALPAPTGARLLACYP,17.2759
10
+ SIIFIFMYLTNCLQWRQNGHLQMSGLLFATLGVSTVTHTMMLASRGLCQAQKQRIRRECTLFGLAVHFCLAVGLFIASVSFAVWSSLEGLDDDANTVAVMRWWGWTFSFERYATVKVLFDQGIQSTYIMSWLLLMTKREDFRLLLFFMLTMFASILVPFSRGAHFYSLSVAFSNFATVILPGVNGVGNEIVQQIIFVLLFTFPMFLLIVDIAFFVNLIFKAAHP,17.039
11
+ LPDVLYHYEERKFVITRSEVVLEPNELFIGKIAVDVANYNIKVKVDLRISSKYVVFNNSQLTWNDHFLQPLISDRLRWVIFRVCVGTSSPQLLIHIDMIENFLQQLLSFLKGIVVQQFVGKVTLIQEDMKIEEETALLIEIETYPEAELLKLVRNLIIKVEDRTIGSIRHEAQLAKINDWSAKRIISDLNIGDIDNGEHVLVSAQEELESSIMERLAGHLRKFVNVGTWTESAAIDIVARIYGSLSVELHEEWLVMLEYLFTYPNDYFPGMYTVQYYQNADPGELLKNHALIIDELQRLELYEG,17.8596
12
+ SQGLDDLIMTTVADSEKDTDLTTTADLNMVPIYVGSNETATSQLGMIVKRKRPEKPIYVPVHSCSKDDRACAFVNFFNLARDLGYIEQDEQRVTPDWRAIILTMAEDHIHLWSHPNVAILKLRVIGEKLADQRETMKDPLNTRVEQVALVQAPKIDLIRASYGSLLEYQGEQKKYRINNTLSRCESLCADAGVGYMVATDAKVCQVEGETVDNNTGKDGDKTEILRAHFKQPSAFNKGSGVLRGHIMMTLGIGLLILLLYVIIVFFLHKIQNATFKFRIPRVAIATSLVADACMSVLAAGIAYALANFPVFTAKIYAETAVVLVLLVKGRLFIGKNKEIPMTARITIIRLAGAVLGFAATIIGIVILDPVLSIDGVSLAPGKVPQLLCLAQTAVREGGHQTWDLQLFELSKLSGMKIQPGKNIVRDPDKSNTAEEPTVALWDMVDVPGDIDSALQKDPVVKAMFKPHTGETMLMRQDIWAVQRWVMNSLGKLRLGKEVAILRKYVDTTHPAGCCDTGDAAIRQAQTEGKTVHSDGVRVLEDSVRMVGLDGDGKTCVGQAEEQLIQKFPCEKKMADDVFTSARALALNASTLIEQTDNGGDEWGENDTIKQVIKTGRKVEGDAAEIATPDASWNDGVYRKSAVFSSVTSDCITDIAGRTNTIVTIKELKGPRSLPMITNLRKRTALILAEMKTLIGPGGLYIDKTGIDCKVKDEGKIDSQIKYEIRGIELYGNMTPAPGIKPVAFTGKGGSGKSTIIRVKGSVVPSFVPINKFGKGRGERRTEKNADALLIPFSNAKKLEGETKVLIPDFLFKITTRNVVTVGRIVVGGVLDNSDVFEGFDSIKLVQAAMVEGQKQVTIVGINRKEGPVYGDNLLLSADIETEEYMTYGADQALAKAAILRRSGAVLFALVFGGNPNPRIFKGTEIDDVWLKIKPRAQMASVKFDEYIKQGTIAVHGGGINNGKYLVEGEDDPCDPNDQPLP,17.9744
13
+ MDILKKLIGLSSLLALFLLTPDLLAEIVRDIVGVSIGEMPEIYIYLLAFYLLGLMLASMTTSPPGFSFLTTRIIYCIFYAWYYILLTLIVTILLIIGKTEGNYKISKQFGVTENGVIMNMIQKAWNFLSDISTGNYLITMWPLNHFGVVPWFNKAAGIAWFIGPYFTYRLSQRPVNFIFSALSFVIKKWLSKIWGKFVRMACAFTSWVFLMGVATTLVLVIFNEMKWIKCSILNSKQWFGKLLGYMRNSLTVLCQKTSIGINMMLVSVLILILIGTIGNGDTAIYWHILFLIYSAIGIFAFVFVVQIVVCNKDRGKKTDLSPAVYAGLAELLPSLSTSYDNVNLAPQDYLTALNVIDSLLIKLVLEIIVAGFLSPLLYDFRLSSDTKLIFCFISLILFGYVFLGFEKDKAESEIGHSRVPSIPRNIHNHTAATVVRLREVLYELFTSQDEAHLGAHEKQNVSKVLLFALFFLFVSTYLTISTPVSNVNCTSYRLDTPFSKRKRQLSLALFSIGCCLDGFSTMQHMVCGEEFQLDSFFKFFVRFGKVIGKRVAMFFFWTLAKALASYSDAIIAPGYSEKMAHFPPDQFNGRAVDFIDVDEDLFANGFSEGKTRVAGPGEIVIFYQIGGNKFEAMFTVSEPIKLLYYDKGIIQAEYKSEFGCELITVGFTTPTVYDYLSPVPAYYFSLVKDPTGTFFDLISVPQGSIGYVNAKFQAYGEFWFGRIVHTGQNRNYITSLPLLIHLKAGNILFWLICVVDLTTTSILGKGNKRAVEVYGLSLLSQCDSFHTEVKIIEEVKRFFSLKQRKYLSSIFYTSYMNIFLALQYKAFAMPINAGVFVTDLDEQAGILIQAKKTRRRIPPRLIFVRDRVSDPNIKIENSSPLFNVYLLSCGTDYTSKKIISIDNRIWALLDGIHKEELSYEFNYE,17.5258
14
+ FRFCYTWVVLILVPSIFIRSFLRWESRFYFLKELERKMSGGDDLVQRSEQVETSCPVSSRCNQISEKILNWIKCEHKRVLVGGDVEQIIFPYTSSPTQSAEFQKMHQFQFLDDSGTQEANYVYQRIDETGYFRFADAAEEFTGALDVEGMCENWNVFLCMNVSTQISLILNQAKAYMFTQVLLQDGTPLVQFLDPDDQRLLVNCEDNEASNEMQDANRYQQILDVDYLLLEIQNQYYPAYFLVNLNHADCFKGTPLFTPKILEGVQDVVTCRRLVWLKFALNRYDPYDSVGSLCNTPRYMRLTRRLMENWDLSGVFWTSLTFLLGRCW,19.2309
15
+ TPGGFIDQNREISHATRNADVNYSLLLLGHYTALAGMHAMYLGPDNVVILTEGGDFATLPYTAAPTFTAQFWQMKILAAGFSVVIAFGHFVFGVSRFWHGMLDVTMGHSSALSTLFAGHIGHLICHAGGSLIFFNFDSEPVVGTTVTIVLPLWFTHHNLVLIPWHEWTLNHSQLVQVLFKPNMSFIFAFGGHFRGMHWGIGGVNEFADGHNTGLQYHHSFFILLSLLALAVHALQISRGIIWPARNWNRAKDFWNSDTVPWISYVVYTDAFWALLAYTLGAYWAFASSGNLWTSIHQRYLQSEQVTVTTHATAMFGNMAWGGFITPVHSWIFNQGKLWSLSQISHVSRGWRFVLNSFHHGLWFIALVGIATWFYWRLHFRWGDTSLAVEAGFWNIHWVTSNAAPLMFFILAYILVATETSNKY,18.3108
16
+ RVFFVQNLAMMLFLLLILIPLFGDKYDVLVSCTDEAFVELNYLIVLAKQWEIGCSERVVPMAAILAFLINFGLICITLVVGGNVIQWYSPKLHEKNHFIWSCLETIMVITVVLIFLQVLVCLGFTNLITALCWLPGWKLVAPWQSCALRQRITLALLLLKVPAFLGILLHVFSKQGWLIISGVQQLSYILSSLTMIAVDLWGGSIAIQDCRGKHSLIVLKVRVLMLYAPLLSSYVYWFEYAVGTCSRYFLEIVDFLVLAFMIVVLLILYGREYVERLDNIYSLVDGANVAESLTHTYILILIAYPRTNPNRIAVHIKLISFYVWMIVIMVFARKALRRLIPCSHPFGPKPTVSDKINAVQSGSTKAEWESIEYFVLFVFVILVLLLVGLGCIKLYDTFWNPQEDLDTDIKTDPFNNSLMTIIGVPYLVTVVIRKTMSLLPLTTMFKIVGVLIAGRLNLTIGWAMAYVTAFWLVTIECIRFFHPQPGVSAKPECLRFALTTATVLVASATIVFSADINRNHKGEISKAAGFAWLFAYVLLMFLAVAFVIWKIAERGVLGQSATVSRSESEEAVVYQTAKDARADIFPLVDDIRVTEPKDSAGRVINLGMPLTPSGQSKNLLPAPGANPVPTRPCVFNACWTVPDTVLVFCTVAAQMTLTFRELVLERVKDAIHTGRCVHFWPDLAICADVFDKTGLDNISGVRMNIAHGESELSAIEFNIKFQ,17.5525
17
+ RWSLATLFMVISLLPVAFVNFFSFKEGFHFFGFVFAIITLGLSGIAWITKLRDPVDKMFFFRVRMLGWLRPTLVYFIMYGILGIISRLTALVRFKAMPIFMQLGHFFPVVNGILIFVANRPMKAVRLQARILNRIARGRWTGASYPEDKPGETMTDEEFICLTQSGMNLGDSFQIIENGTLIPNWLTSSPLKVEVLLYTYFLLGLFGLCVSLRLAGCGCLPEVIRRMWCWIWFALFFFSSFWQVFRQLSALRIALGRARWKKFDFGPVSFGLVVLFLVAVFLSQVLLVAILDIDEMRQKFAEVQTALTIPRNLPELKDSIKSTLLPFQGELQWYSQWTSLIVYLTHLIMTGMGKAMELSWQLFNLIWAFLCSNNGFLCFFQEYFLKLFLWGSAASILLFLPSILNLVQRLVPFTILIFFCVPPLIAVHSLYNRGLGVFENDVGTLKAKAVQTSAAPDWQITETNGPDEDYESHIMAIVTFVNLLCLHIIALLMTGTNSAQPLLFELKFDAKVFMAVFNGLIRTMSVLYSRGCETYALLNLLASILLKLALWFWAEIEEDEFASNISLGTLLREATRHIPVLITPVAMVNGAAGAGLTLLWPTRGHVYLRGAGNKRPAGRTSLGYMAGPSGEQFYIRGAFWMASIEISAGTGDVEINGSHIAFFALKGKMIKLTLDGKPASKIKDLVTRYADVAVDDKIDFEWDALAERSIWDKKQLKKGMQLNGSWPKTNVMPLITGQDMSEKYLYARVDLQNFNYNVKGASNKEGIHIIDPFGSLLPLVVFSSFGIIGLGFLYKYNVWQDTSTLQEHFQKRKTTISSSKPVKFTMDEPKLLGPMVFLTFQIVVMFLLGFHKFPWLYFAYIYKN,17.5249
18
+ VYPAGAALAKAAQKALENAIQEHYEVAMREELEANPERRLIAILKGLARVRSAYAEIDIMRDQAQNAIEESIEYANDMYKEGSYAIVTPTVRIHHSIAPVEMQQAIELMADLAALGYGDAGPVVAQVIQLPGLKFRGQTHPGASPYKIDVDVAIAAINLAVERLLDALQEVYQQPPKTVKVHRVSASHDVPLVFQVQVFVQFKINGAREGDFIYPGRDVSPQNRKVEKFDERRKSTRIIPIYRLRLQNPGAAFALKDHEAITLGFAQHYLLGNWPIEVGETPRTTQDGLPSMAEKAADSTNYLLAAANFMHGKPDLEVQMILKANLCSTEKKVTKLDRLALAMNYAVYLVMLAQDLSLFLKVPQNVKVHDGMGGDYQIMMSTLPSSVTEAEIEKGGDMHVQLKALLPVSEPFDAEDVIFGQTARVDEYLVDAKMAKLLRPPTNCGTNYKENSENAQYFPLGRNINLVPCTMEDGSLGLTGLEGFSLSQAGNQRMWAVNLIDRQGKVALLAEFVLNELISLSDNAEQIADSHTLKVVGVRGNVGNLWTTGTMSDKIEYTPVLMSGETKLHASLGHLIASTPDLTATMQEKKLTLLLSPPAYDETPPKIEKLVWPYEKNAEVTGPWRNITKAIGTSISDLLSLSNKMAKVEQERDAKMETSELQKYDNPEIRISRKLVSALVIIISLLIFALRHRFALATWRGCVVAIGTPSTPLKRLSGIVRQSADAGITTAGSKCSRIRIIIKRGIFMTTLGISSTIITLVFAYQ,17.9413
19
+ YMKRGMVHRTLTLLLIFMVLALGFAIDIRGWAMFLPEATLMVLSMLGFFRQGASDPNYDAVMPGVVCEIHMRTSMLFFSWSIALAYLAMLVNSAGQITQSPKIVDSFTKIVSAGKGLLALIINGITVAKPTDGDELFSQFSLTLTLTNIGSMSIVPQIRLQIYRWLMKPPHGFLGIFPVLSAGTSLAIALFNFWRNSLTEQYFKFLSDMTNSINAQVASMVAHRSMAFNWIGTIWQYCMITVFLLGFVYFYFLSETNGIQLRLDIDHSCGFSNVPIVFAHEFLTIACKAAAEILKSDSDNKVKVHVMSKAENIAGSGQLWLEVFESGSLPGANASIIQVIPNRKREADIVGPGTVDGLDGATLLLSPNNIFVPPGDLVAARGNKAAAGATLGANGTLTVDARKYGASKINVGYDTEAVGLAVCTLVIGTGDSVLPTAKKQMDNVVAFLIKEEDAWALQQPLKRSAERGYFALTMAMNPDTVAFATETGLRVCDLMNTLGDLFSIGPAGLDAEAVGAQGLSTTIDMNKHVFFGLEAKFSSSNINQPSSWLGALEAGLGNWLSLRNALRGDGPQPQGP,17.4011
20
+ AAYAQKNAKIKRKLEETVLCGGCDEGEYRRESSFGAISASDGFTPDWEHNLDGQPGLYVTKLIYKYIQHPQYLYEILAVALLGVIGAKTSLFEGLSHPKRRTESLAITFNSAHVSACLTVLTDYTRQLTYTLSACLVTLVSTLYAVNLIVRDKKIAADIQFFVEASDYLKMGLEVTRNENVTPVNDDDFFSHILWLIDHTKPTMIEGHFREYKLVNKFFILEEHGLVGKRGSMMDPINTFIKCEKLLQLIDTKYGGSVKKLKSSKVAFYNAVSEECAPVKITLPKTSDILAHRYVSVRDIPARGVPYTHSFSSNVVSAITDGRVMDKAGDFDEDLAIKIMGLKLDGFTVMVYRLDGFRMGETSVSKIATLEALIKDDIVTHELITKSSFTRDYRSMERHDFVLGSNFPYCSPAHEDTIEFKQKRQSYVGRAVADAKVEELELPATGDRGEVKDQVAKNMKVLTNQAMKVHVGTMLAPDGDIYSITKENVLPACYVDVKGYLTRRNILGKLKKFMDVFEDFAKVINILDDTGSGNHRFNRYWTRRDSRLGKPLLLTHEDDLETNVADNRRIRTNKQRERCLVRVLNLECEKCHLPEMVVLGIFIGSSAILFTLFTLMSINGVNVLLDQVPPSGFGASIEGAMREAKVLVRLGEFVANKANMFSQERGGDVPAIVPMTEEQRSDLNKPCKEERKISKCFTRMHGSWGGVKRMDPPFTRGGYLMMRQTRMGIWISFDKRKFGKTQKFKYLDCGMKDPNVWKRNINVGCHLVNTYADTNFNCCTQTIQAVVESHWTEPLFVARTFQPVSICLIGMLQFSYGPVMAGLKTPKPHPGTLRVVNVSTVNLMLFVLFNYLRPAAYNGFYGKYTKPFTLGVSQKPRAWSHKIITPPGPKQDLFISFFSHLVVLIFMVVMWIYFAGTVTPFDFQYYRQVSLDVV,18.5249
21
+ MKYNNYALLSTVTILGLVFTIFNWANDWNLHLNFGLTTYLFVGGTFLILTTFGVGQDDPSYLKGFTINLAGKMIIGTHLPPLVPTPFSPFIDKISKHLAGAHVTISAVTVDNIIGTLLKLLESGDNREGNWRAHRLAFHAIGATIFLWEIVISIYIGFNDGFNVDNGEKVSKTAELITTPSGTLDHTGSAESWSFDDDSSPLKWYNGFLISKEIKYKHRPLFSLTTQLSILMYNRIFIVLQLVIVHSERMKLSLFDLFFQNFFFFSTIPLDLEGLSGEYRSIGGKSDIRTLIVSCFGSLLHG,17.1279
22
+ KNEKTNSSSKKVQVADEIYAPGKPVAYVNTGTTQETIASDAILWLASEFSAIIEIKVVLFGPVTNDVYSCNIANYSPIPQGLEVVHKKYTNKNNLWLFTTGYDLNITFLNTDMLNLESSFLIIEGAISTSRMTSDKEITNFEVPGNAVVLCTYNAPSITSKGAKAHEASGGLAANLPREEQLQAILRSHEQYVSRKMKADCFPTTKAVNDGRILLFYLSAKNLVDSLPMERGDFNLIYQKMEVKIYLDDLLKTREEIQAARAFMTEFIVRQNGDIKLLGLSEISDTSDGRAEVLDLPLESGNSLSSEVDAVLVVGQLRAMIHGTGTFTGVFIPHDLISSGIDPESDREGIGNFSRFDRESLVLFGIGVYIDGVNEIGWFKKTPIAIGIGNASYRRSNCLQISFYCDVDANTHEDTGTSKGKTLIMATNEYSIAGAICEACGYDVEGDDKTDRQIVVNQPSAVTVAGMPGLVAKTHNGFRNFEKNFEYLNFPVSKVLAEEGGLDYFWTIPPGNYQNNVPWNPVRAQMTSWGVSTTATLVFGVTYSRTLLVSLRVNATLTTNSLFAFFASKLSHINTFRTGGIISGGLCSVLILNFVVAIYGVSLRAFGGALLTYAMVMIVVLFCREVWKVMYYADIYGKQDLIIFELLNFFVNFGFILTIPLLSTASPPGSIDIKLPGILRTLSLYNDNQQRRTFIGKLLWDPESKVYNLKSGEAKLLGANASGLMAGGSEGAVHEVETDTSNLVFRSDVSSP,17.7156
23
+ EGEVNRIVLDLSGTSDGSVIIEANKVTRDNVSDALLKGKNFNAPAKTSSYPAYVAASLERQDPKKTFISFFKHAHNNAHGGQGRIINLAFAHATQTKRFNVFFEAYKKHGLKFDQNTFKFHVPEDMSRKGTIAFKGNDGEITLVDVFTSSFRQQISQITIRQGLWDWKSTRKNELGYFNNTIQFQGSKTTGSADLIFALSLLGAIRTIREYYPFKEQYVLLHRTWTNLQKKNKASWEWASARDKGQLNTGTKQTFATSLITELPIKSFELMTSARSLPEMEVNQQYEHYRIREYYKCRGAGEDTLMKGITGSGADATKVLMISYMLNEGLVLILDYSQQNIKTGNTIAILKEQGLAIKTSPSYSIQRLTKLHIYAMENLEIFPLHREQVNVMNAVLLGELGVAADEVSKANFNNMPLPSRQATVLSINLQDKDKNRKVLLRALGDQNSFIPPFDKSDVQNTVNLMESITKNQAITFDLRQGNGRSNQLIDVDI,18.2889
24
+ NTTRNPTENMPTPRSWLTEGRPYIAYACAKCKSETDKANKGLLFVTKDKIIIKSVPGIADQIAREVKEFFNVQTPAEGWDLVVGDVTADASAGVRGVLGGIVFTQKGSVLQALAVAVTSIGTMILFLNLFSWGGGWVTMFGAAENIITSLAMIAKLVLENKVLLLNIGMSGAGICLMMTTDVSPSVLTANFLAYAMIDTIAFAGDAITYPFTIDIGDAFFKFYGGAEIESVYNKQSKPWPSWVAEQLSFASASNTGAGVTWTFSLTSINKSYTLQFLQAVGLLLSAQSRPPDLLGSEAALTFNVTYVPLGFEVNAARIKTMLSPTKNVNQIGNLKPFLKHLTESLGNLKTLLKQRTQITEDDVDVRKLATSIEVPEKNLLNNELNKIRYANFVSKRLAVALNDEIPDLYKVNDLKSSHIFLKSNANLGNGIERLNTGIDIVSNQEPQMLTMLFLKGRLIKNNGTTAPLTLWLKYISNLTTLTNIGISVEESANRFRNLQNSFFKNNNILEINIQVIANPTDAEKELNLVGY,17.3426
25
+ GAQNTDVILGALSNFILGAFGLYYWFQWGNVILHQAVIMSFIHLVLSPDWTIWFYPYFVSEGCLYRVVLAIVQRTAMTLHISPEVSKYGIRAALSSPQEMYSLSRGDLRWFFKDLAIQKVRWRRMPPAVMILVLFIAYQLLQTKTITPTQLLLIQGLLFRVYGNLMITITILGTVMGVSPFTVIYNGWGKPKGITYCEFPSAFLFLLEDYGSGEEMTSIALPASLFVEYTEKASVIRAGYILSQVDEFSIKNMITRERNLPKSELLYVAADSGVNHTLNICQFPVSDTYLIKYSFIPYKLYIEDGKKVEMPPNKVWDAIVIGHYSQDDYWQLAAFCNQEWDFANFEKMLARPQRLVDTCGMALAATYWALLVQVLGAPILDNCLWINTFAILFAAGILWQIPPLRQDMRIDLSARFKHSVIVVAAYPYVLRLTWSGQSQQKFDLFIYFFLAIFTLSFNSVHYTADPAREQFEWRDSTGKDIPCVFLFGLTVTYWYGALHTGHDPESNTGLSTAKTSFDWKSQFQPFDNQYTRQATELLGIIPCATLHRKCRETWTRQRVFNVMVDMQQGSARFIFLIQDTAFNRNFKGGLIQDRQDLRKMLAISPGEALRAVIHRREHAAIEKQLNDVRADELVVAAQTAPGERVQELLRGSGVSYSLTNFVTFKKNISDDERRVPAPELVFQIVIVCCWDSRIVKALLAIITITSLAVGDLSGVFILFRS,18.2048
26
+ GELPALAGNRCGEAKLFDILARPDLPRRWYIHLGSVFTLMLVLTFLGAFIGTGCWVDGGGFGKFIDRGLSQAPTFGPQVLTHLYPEAWAHFFGIADPAGGYWLYHIILFSGAHGVFIFAGGALARTLRLGRLLGMARALGMRPKHCAVGAVGVILFLTAFYYLPDGNPTFTPDQGYESGSTGTIMVIDNGAVGLLFHPLFGAGLTGTFHTLTLAHEGTASGEGLSNLSEGGTESETYAAARLNALFRLVANQGRAWRALHIYTLPFLSLGVCAALGLTVAHAWTAFDYNNFVAAARADSFKFGANNWVLAANDIRAGAGKFVHAGDEVLPGELIR,16.0959
27
+ ADFFVRRQSTKKLYGLPLDGSVNDSVACIWGFAVFWNGLVFPWVFAFVGLIGWRLQIRFVPGSVIGLFKFELILSLIPDALAHFGVEDIYANPEYVFNFPRGVLTFASTHGIRTLRALRFAYPFVALFGRKAAGLFRRMGVVCLMAMVIGVGFAVAAFFFGELMPTMRWTFGEGGIIQTPVFAAGFRSSDVPATALEAAHFLVFFLLGLIFMAIHTGAAIFYAGESAARKNEDSQTFSWSSASSARLTRQRDREILVRRNGTSGESPGLA,16.437
28
+ MSYLYLVFFMILILFHLNLLTYNIVKKKPPFNGKYKKWEFKRAFDRYPVGYIYYGHGQWKDERNKTEKHPRDQ,29.4763
29
+ KVAAIGVPFFGLLIALLLNITMVFLSQTTLSKYWFAWHIFAIILILLGLLVNVLVNQGSSGSTTSNFDSGMLAMISVGKALGWNIMARYTPWQTGTLNSISWFNIGGAVTVAVMGKMAGIELIERENSRTPEGFSSPWPVGQTPAWMGAGPIGGVIAIVGISVSAVAVSALANISVVDVSNISLLLEIPVNSIIMGEGVGFYYLIMVLIMGMITLAYSGGFFSAKFGGYSERLGADLAGARTPLNVYGENIPKVMRATASVPALFRRPVANLALSLWILASLGVMVTYFESVAIFNRTIENIGKVAITNGQSVDVMGFTDVYPLDVDESNFIAWRTAIPPGVLVGVTPPIFGRIELVAVNAGLLKLERKGVAQVIDTGPESFELEAKMLAPSMTGSFSTQAAIGGSAFAAMFQSSTGANSVFVSFSKGSVAFSIMAGVFIGLVVALMLAGLNWNPGTVMKKLMMSMTVVSAGVSSIFAMVKPLALTTSSFLLVESVVIFSNSIGASEFIGFAGGAAFMVNKQFVRALASGTGALVIGGPVFAIGYIAAGLGSVTAAADVGRAAFIMAGIAGVLTGVSMLTGSLVGSAKFPDRSEGKMKVLRNWWPGYSIARLAGRFETSNLLMFFTYVADQLGLLSKDLVRNAHNFAN,16.5548
30
+ NWYNIRAHNYVAGTTMVDAATKPALATSIATQLLGTSDYDTISKLEHNAKEGGKINLIMTNQFPASGKMVIQQGYFGRGSAVPYTNRLPLIQLLSLVDSAATADKEQVLSVGWAIDAIVERRASKMVLYNASKSFLLGKISNIMGSMLVNIQISAAGQYTILTSYDSILTSKFLSYNRPVVDQGAGMINMATGTTVGANGQLLLRKVKEYITKVQGIDASLLAFAQRGLGSVTQASISARRPTRNRMEENAQKGAPGEFSKVTDAGGGHLPGSKMVFKRILIPVFMRYAIMDVRVKMAKTTYCPQTQTPFDKWYYTLNFTLRGTGYTTVVANPDKTGKDVMRTTMHRADCTGFEVAGSVDLGLQDIQVLEMGQFKNFDVYLFLGQGEGSDKYAVAKLTNAPPIAILNGFSSTMTLKAIWYTWRWPTMTRFSLAVLYFAAGHIMTRKFQNTAFMRDGQARQV,18.0029
31
+ DFDMPDGGVVTPLKAGETVGNLSAKGTLFNPPDDLHMRGDHNETLKYHSVTAVVIAGLQHEEIIGTAQDESCGYSAEQNTHCVAIHAAHKGDHDSSIALETEKVAVLCGDTEEGGYIWKERRHLSDSLLARIKAMFDVRFYDSHYGDKPGMSWPALRPWMKRGDLRGAWPVFLGAGGFAFNLGSMLGDGYTWNIYAILPALNGLQRLLFALGRPIRAVKYVKDTFDGTATTSFLLFYPAPSVFFLIAFFFGAISALAAGYMFLLEGRASLPQAITASIVAVSVCWQYNALFVGLMLVGEFCPRFAGTPAGVMAILGQMHDVLPHLLMVNEAVLAFIKTILYLLSGSGEPPLEASQMEYSAIVGGLVRITPAKDLDDPADYAVTGYAMITLVGFAIVLAMQVHLDGMCGDFSGVRFANPLHVGVKVIFNVDPDILCGPDTVTVGTLLFWAGGRFVFFRAASRILLPVFLSPVYKRWGSRVSVVATAFLTCTIIVGVRIRYQNDEVYANGAIYSRSDCAPGMFEEDKRFRNLLPTLEYLNINCYFYKLKGHNQINVHTFNWASMVFALYKKKEFIKQALLGWLNGDKIDLERQKEKSPNSENHDSDDWRGDVTVSGFTRPNCGHQRTTTLLQKVRFRTRCMMSRLLHVPFRRVAVHFFSFVFIMRLFSK,17.9999
32
+ GRFRTYVKFYLRFGACHLPVTVFVFVNVAALVPFILIARLKFTSDPVHVTVEMFVEGMTFLTGSASIMLFGILMAFTDRRSELMSWWFESEGATSAGLYNEIGFWLFITIEFGTGLIGFGLRTVEIARALGFKPVINFMYFAPLMGLVSVLASIRLGMALSLALDMSPVVIVLTGLSGRDDGTNFAWLYGGIGGSGTYGTGLGDSPGGSSFLAVMFARGVAKLGSKVPEIAWAIIYALLPAVLGLGVNALPKYYLGELRVTGIRGIPFGDPAIVTRSLTKLLRQEAPVDLLVEPLLIRHAILVRSVRTMKIGELVQIRVDVPLESFEDSKIRSVDDPLLDGDDVISTTGQ,16.3933
33
+ GVSKWFDPSKVNEAYSLSLRGDKYETTKANKTELFGEISLRVKEYANLSSIYYSSTSGYKDGFKWSDNSSKNKKVKLFNHFNAGDYQAMWEASRYIHLNQAKDCTLSYSAWNGTDAVSVTQAAGDSSLTLYRTINSTNDTTYFLLGSMNGGFSHQEQTDCSTSIPNCSAQFPAANVPTQRATYCVVCSLHNDHCKSTDVSEGCAGKNLLKESCQASFTNYKN,20.0769
34
+ YLLWMHDKSAYMQKSRTPSVQWGYGVAAVEKLAQWWASAKGRGGWFVDPPSPKVQAIPNGCLRNIASGFWKPPVNYSHETSKWKFIYVTLAFENLYSAFWRFFPGFMGFLSPEWNRKANKWNVVGKYDYLAAFVLKFGASYTDQTHIITWARGVRDRISNISLTVYVGANKLGNVLLSLGGGLSFLRGEFQPYNHYRAKFQAVALYDWRMSMTYSAKYLQVLSGQSGLKETVMTSGFHFFRLTAPASVFRTSQRTEVYTLFLGGLGEAQKDKEVYYITLPTLGITYYSATLTGSFDFSFHVGLKEDWRSIRRGHITLHFGAGSHDGKLTLRNVVDITRGIPLKYVDFRGLEFKWRDKAFYIHAKPDPQAFWVGIAPSDGVKSKIGPLPTITRLTPQLLVAIDINYPMFPKDGVDGYGAVEGESRSYYVHVFTAFDMQSLFNGQVHANYQKNKPKKDVIVTAATTPSSEELIKQLTQKCGKRATFMSIDMQDK,18.4373
35
+ MNPLPYKVRFLEWTNDSAPDTCSEAATAEPALRCSNIVGVKNPREFDTLWEKRKTRLESGTLTTKLESPSRMAILKRSIFRIFINFVVALGALVLLVISVSLNVRNNLLDPAYRIGVSQNKIARIGIDLFNGPKLQVAEFKICLGQTVFHLNVLHTILGLLVFYFTLGGADEDSARYDHDQYLPFSFVTNYTFHFEVAHYAMEQFGVGALANLLFLILVAHTIFVVSEEIRRGMANRVNLKKTSKLNPSGPARIIEEFQYCAYFVNQVLKIGKWAEPAAAQFIGRHDMIARELGQKLFDDNPSQSEVNEGVTAARVKVINGCSKEPCGKPPVMAQDLASKILDQFGTYSDTPIIGRINTIMLNGNTENGQTVIDGWLHHLQQRLEVHHIPLAESYDNFIFGLDNTETTLFHPFWTDMEEGEYGNPNYITSGERLINYRRALHNTWGSVFLPLYVFFWNWSILRPPPDAETLLKYQISMPSSIRATAVIHYHIHWLTDEEKHYVQGKITQCQGATIICESTATEDLIEFVTLDPAWSHLTGGRN,18.4536
36
+ MRRANLTRADSIADGEVDSLVRASPSLPRTEDDAVYLDGFERRAPDFEAIAQLSKMRYAGMSGLMDELKKLHDATDLNELISMGEMALVESENRTNAIVRQGLSEVLAAEDLSICDIQIAGESGSVGFGRGLRNLTNYVIDVEVRPNGHLIIQAQCFHTEDKSYEKADSKPLDSVQYDDRKVGYQGDSVNAGIPEVAAAGAGRKVLYAEIAVGGDRGDTGWKLAPIGSVLGGGDGAGIRGWATAAAQIYNWTRLAEGIASIDRGLAINGGARLDGTQYALGVGDANQASPVLFTGGLTGAGPAHVRQFERLVPDHPLSKTLVVLSSINGTVLADNSAVGHVVARGNTGLEILTADTAKVANGYTLPVRGEFDVSSAGNITAVTAIAGPGEDISRQAP,17.1274
37
+ GSTKDQKQTFTSFVGWIIFCSVATLSSFVYQQVLLKGLSQVLDYLAVTGSFGGIGSILCFFISTIGSGSGTVRTNNLYQHAASIFWTIIGFFGIAEAAGLVASLVFYFFQ,15.303
38
+ SGLPAFLAGIYPVIGGSLAVSIAKIGPTVPILQAGQAACHSKLLPSNEKPVTIPVILSLAYGVLGWTLGGLGEDLLGELGQVIGIGGPKL,14.4576
39
+ RTSQIFEAFLLRTKALKWCWIVLHLVTLLLLTSLACAYYQVESAHSQQPVLDCAYHYKRLGDGWWVGYSQGVIGFGVTAFILLISHQEASGVQDETGKFARYWKLNCTIFLTFLVTWIGLHFMIEGIDTFIGYILMVAVASALLGQVLISINEVAKTTLLGNNLDGITLSYGASPEPVSKNLEGDPAVYAQIANSGISIRLWWIIWALFAALGILLFVMLTDRHPTPQPFVEAGYLEKGIMTVLLLALSNYPILPAVFLIVLTSADIRTHRNKVVYSCNDSKFISKLSAYFEQTNKEVTVMMETAEPIVHVGNYSSPVGAIITISAIIVSTLGSLGKRKSAFPVTLTFVVVLITVIAIANNVISPSDQPVGDNSFFLFEITIALGVDFSSFILAICSFLKLELNTIFGSFPKCCYFLLSFVIMLFSSETFIAEPLFSQILLALISVITLPETTSYFGQKAVSFIKFPCIKDGFSILPTLLAVLELFGIVRNLRLLRLLRSFRAFRIVSEAKVFCITKTVLAHFYGPLRHRLLMHTVKGRKEKLMQALMCLGILAFLVSAIVEAIVLLFASYYLSTCYLLPAFSFSTVTLSLLHVYLSYIHVNTILVALVVSIFVIGILMSLILRIHKNMKAQANN,16.9812
40
+ VGSETIGAPIENLPDPLQAPAITAKIPTGATVQYLAQEPGIVGVWLQPRMVAFKVNRAIGSISFLIFFFLTTFAWLYITPGQINVVGTCVGVSVGGVLIGWGILIPGDPAKASFKADKYRWVESLALKFGETAARACYGYLFLSVAAGLEYLNLFIF,16.1419
41
+ MNAMHLVRLNSAGRGSSVAILNDNLATGAAGVSSHMSEDDRIDVIVDFSRGGGGMQQEALAQYLTARLSSDGFLLADINKPNVNIQSVATSSQFEVQPRIQSNMDVLVINWLIQGKDSDFSTLIIQRGKTPYINSAHREKILLSLNSINVADKDIELDFDGYQTGPTQQLPPNVFAASIGTSLAIFFAKGEIPLRYMINSETNGIKLLQYISQSSPADMEREVVLVNHEKEIQQSLNTEKLADSELFLEGWSEKIDNSVYVANLFEDCFHRAVVGCVATARLDDMMGTVEFAAWLNVDSQGKLLISEIYTSFTPELVAGQAVVGGKFSTVDISTGEYEIFEKRAAFGINTQTASALIYLPMPRALAPRVEFWQLIEKLMKASNQSVMISAGVAGTFSGGRGLLYVNGLNAQLVGMLDALLKLQKIFAANLGANPNLSNVLIIGDTDSVLALSQGIKLPNGMSLELKEVNKLNNTFLDELSEIIGDFSGSSEVRSKIWTSTQEVKLGDLTEPLFVGVSSDIVALVANGNIELIIANAGVSPRANLDTAQVFQRGKQVIKSRTGPSLNAKGLYLVLSDQESIRSCQLTGAQNLLAMNIQINLKVVVRDVLSAAAMAFLAKECAIVDIGGCEVSAPAYPEVVTLRYDTQTSRSFGQRIIQKQTLGNAAVNCSVSDAGQSAPGSSGHAKGNNTAYISVIAARVGGGIGDLAIVLAGLIAGATAATAPNLAYKWKGNIAPQAKDVLSSVKNGDRSLNTRDLSVEPVKNELAGTTTLNWHTTFAMNSDSGWRNVHPYPSNGNFP,17.5854
42
+ AKAPGLVGLGIGSVSGLVVGLALSFLLGCVCTDHRWAKYDGAGLAILEGMALNDALLWVYPLQWTLIGGVSLDSSSVSLVLVIVACTAALAGVGRVLRAILRFFAPRTRSQRLLLALVLSEVAVQLVVFFAQPLATALPLITAFTDHTLQVCYGGYTTLSPMDTLGQWVTYVKANSTGGTSLRDPYRALSILLVSFGLVTVAVGVTLKRFTASAGDCQ,15.0093
43
+ ALAQCVLLALASGVSAVLAIIPRKETYIRAKIVSIKKAKYGLSMYERGGRLKGLGIPPWSKAPRSNHHLGVYADEIGILGTIFGYTVPMGALVIAILITFAHLMPSYIKKYVYLTQVEIENYSPVPHQVPAE,16.8979
44
+ KECARRIKGCLNFTGSASWLSFVNLFVKQIYTGYVFAHASLMTLLVWQAAMHHIVNMNLCDEYHWTFTTATSGPLGYRNFTTLAWIDSMANFVALHRHFLVYGGLYHVASASLFTAFVAHFIRRRSPPTSFWAYLNFEQKKFLSAYSHGHHLILGSFLAFLTHLDYFFDKFSVHTNAFSQSWVFRGELTPELAVNLGLMFHVKHFSLFHFSNSVLILALHFSHSGAFIDEMRSLTALESAYGTMRWVREGMGWHRGVVERWYHGSFQVKHTEEGSMQFAKNFLLYLPELPRAECYAAFYLRTDFKGNLALRRHAEYRRKFYMMEGKTLFWRATQKGLECQKTWGAGFARTAALTSSTHAVAHVANVTTGFVLGFFFVWRQVHK,18.4019
45
+ FIATDRKWIPLWILNMMYTLSGMGGVVAFSAFLAITRMGYDIKWTGALFVAGSVIEYVEKLFPQAGPAGTLVVLLIPAAATGHGMLPMICVVCCMRIGFIGWGAKILILPLLGNGKLLAIYGIRSPWGVAFTVPAVLILVAAGLVFEHTWKLVVHAYDIGFLLTALAVALLALSKLLWYKEPILYALLAFSVTALVGFIASAAGSFFVGRGCTTCQHPPSTIFSDNGRGKSVWTFFLAIGVTIQLLALFAFLPKVGTHQTVKDLFNIIGTGDITIMLEQAAKAKRRGVYVLNLFNDKCPKSPAVLDRTVSYLPPNVSCAVKATKDTNMPLVTLTDDMHFHLEDYGHRALQEFPTLPFNHTKCYLSQADLYLTGTDMSGIILVSLNNWMGEIGHLAAHTLECPDERAVFSLPIGDDTFKYLLYHEQTLKGIRLFSHLLSQSSRQVTEGAGGRDILMTHQTALITLRSLIAAEVFVMTNGTLKLIPIGRTDVLWEYRATAYHDCTMVGIPSTNHLTWDGQVVESEPLWPLSGYMSLKTGSVILVMIDTVGRTTYMLQNCIIYLGLLTVRLPASVATMEDRDCVLMGYLAFLVKTLLTEKGYCFLRRACELIAIGVFAAWFMSIKYIAVGAFTGGILPWVLSYAVLGMMFIGILYCLIMFRMGQMVERGIVVYGRSDDVSMQNRLPDVADPMPATSLVLSHEMFSGCLPNDVHFEIREPVGVPMIRFFDWYGERVLPCQQPFKEVSKLIALVLQQLAHMHEKNLDPPIWNVLRIHVTPARPFRGLGAMGVNVIISYMILILVKFLGITISEKWL,17.7597
46
+ LFKSSKINSRNPISMLNIKMNLGSRYQVLAQIQLSPNKINSDDDTISFCINTENLLSWFLPGDHFNFADLRVMWALLIVTIICGGILFATLSMLYGIAPTRSTTKMIKINDQPAFKLGLIRTHVTFSSAGILLVGVSYNLSEELVKVPYIRGGNLYFQFSTPFALEFTSICFHNSYEPLYNWLAGYDPYTGTEVFFNFGPFLAAWGAGVAGTIALVAHACVAVELFKQLKFKIKISKICSTRIILPVALTGALIAWIVPLISSPDILKTTGKNIHDGDTLIMIPVLLKRIFAQMGKPESHSIEHALAHNHAAPSEAQFRLAIDDSYYNQAISTCTSRELKPLLNRNVVRLLCADGKKTIRDPKRILESYCEAINRVFGGTFKDFLFGVVENSKLTKFFKYFLGVLDIADLSNYSNGALTTEADQFLVEFLDIYPEYHKFSQNKTYIRK,17.6359
47
+ FWNWRRRFLGFLIGVVVTLFFVEATGTFVDNWSTIRAMHKMTGMTFGDWLGTIEALLTFGFLIAHLTGGGTPFGWVDDVFVVVTIALFARQRIFRLALVGLRGFRLERAGSTLKAVGALRPLSSTRKLAAWLMGWLSMLAFFGLVTGVLVYVDVRGNWFETAPYTFETVTVVYNFYQEHGYGDDALRYGLALSVLAVSPFIIGILGISFNWLVVPLSGWDYD,15.5532
48
+ TLLICYGASASNYSDSTRANAYLNMPITLSDVVVGLIYAISLGSVFQVDAILLAVILGNIVLGAVAFVVASAVATALERLVGRVSLIPAFETAVSGSISGDLSSYPDLYKRTAQSVIAMAIVGEVEEQVRGAENAGEGILDVLDWQEGGGEARTTLNQIGDGVLQGVNIGEEELRSLKPLEVGNLDVASDVTDYDKAVKIDIQFALRRARAGGDVVLLDARNKSSIDFGTDIIVGTAGFGPVGTAPFIELAAKAGFNVMVRGGIEDGIALTDIEVVKHARIKGVAISGGTTASIGSAARRIGRARISVSVGKARFQSLKKVCDVAALDIQETFAVEQILLLATGGKQIRSVSSAIGKPYIQGPDGSLGDLLASIENTVTVVSVKQNKAAIINDLGPSDLASIEDRTPEEFLETTEDDVAEQNDCVLMNALGLNIVFEDNVVLIMDIELGEFIPAGREVQLLNSRLEKKQSKLRIAEVLLTLSSRALPGITRGNYDIEYHDLAAFALGFQPVFIGSAAREGTTREALIAAIILLLESLILAGMAILAVGVRKLVGFQVQPFSGLFRAIFSVVIGTAIVGLGLAWAYGPLHRLGEEEEVAQTKVGWGGSFALIISIVNAVLYLTAAIFLIILLVALALFFVVETIDHIFTFEIENTVSAVDTFLAGFGCMTPQLQKFNRQLHKIPNFFHTLDEFKGLMAHQDIIADFNRSIASLFLDYINAVMIFLDGEIATKILRALDAILGGVVFSAIIIGAQASVADSIITGRDITLELAVALLLGAGLAASVLALGVGVTIAGVGGAEKTASNEGQARNCRILFYFCVGCTSVIVTGVAVAKSIEIL,16.4406
49
+ TLTFMMEGTQAWIPWYIIMMVYHLLTQTYNMAGLLLFGLLFAGIIGLLASRPRLFDLEQRERINWTMQPLPRALTLVIYMLLPFSSLVVIFAIAEATTYSPPKQDEHPHRLTTAINVVVAPPYNFDAGVSWIPLALLGLAVALLQKVQLTPTNRYNRLFKLVQFSQININLYSGKAPITIAMDSKDTYPIDETMRTFAVLRETSKKDTVYIPVEVENCLKGTESLYPAADTSVNLYLVHGGQNHFATKATMHSIFIVPVGIVGPFLAVVHLIILGIAEAGKREEYYLLYLFGYLSVLTLKLNGTAIIDALIRDGIHCARLPGRYNVLNYVVPKVSAEMKIIQDTLIYWEPAATQWETKLFDKSRILRNSPGYKFAKLLSVHLITMAAYCTLILVLPTVLSEYGQRNSGPEKRVLFSCRLLKSRVKGKSRICFHQVPGRKMTDTAKKLTSGVKNIFRNPGYKYMESNEILLIYTINLDYKNNALYENGPAIQTAAVINNNHAGTLFLQDIDVIPNLMALSPFVFLVCGYAPEDTELVFCWVLNKCGGKEVYIAFSINRNQIEDPLSKLEVIANNIIRVIKDDYNHRKAAAKEYAEWIAVAEASIGTLPLSIAKGFFASEETPKELRMSFILRAMKQWLVVRRKHKHDCKNMDFKQRCKSVATIKRKPMEQSLCVPIEKHQPAMRKYLIITLLEQNLDRWAHEAERVTSHFLPFFNNNSETNCHICECLNEYYQDAELLLQNAKISSGCNEYGAIYYSGIPISGAVAQKMTNIFISGSSVVLLITSYGGDE,18.2811
50
+ VAQMEFPEGTTSSCIWKQGYHNPAVVIQQLTLHRCSASSDTICTLMTSQSNSTQLMDDLLASVLKILVGLISSDYTLIDVGGFTVDVDSLSLVYRNFHTNISPCNIDTVTKTPDDTLTFEQYRDDMRAQVEQTYKAYVSADPPETRSVKASSYTHVYRPIGMPHNIIQPIMMIVEDTPQTESGTGIKMCSNQRQDVVTGNPVEAFQTLAQGDHYKLMDSSTNKSILAVSNGWNLCLGSFDSLENNPTITDKEGEKHYKFMKNADDTLNSYLYLNAVYINDPTLPVTILSSNAGCKGLLEAIYKNNIRILYDSYPAPNAEASGNNVKSIGTITVASCMGKGSMCPCGDDYQTLAVAVSLVNYEYWDLNGSKSNNNVIRTSGTFSIAILTDRGNYTANRSALLKAYINLLENYAEERKKQIKATIWLYQRDGRSSGKKEMSCNDDPSDTGYVAAEYPGAAQVLDTDDLETMPGSILPSFQNFAQVKLFKQQYKGKVPVKWMHGYVRHNLKANYFANGYYFAPSEGSIINPVLGGATE,18.9275
51
+ TGPYKKLADWNERVPTPSITQYIASHYNYPDLVAVRRVLRVPVAVDATGKEVTVDKQDCFFKSDGVVYTTNYKSYPKTIISESYFAYAIPGDVQQKMHTIPLTSNVYKDDREFFQYKVSFQFTPPTPPDIQYPARADNDSEGVLDWTKEVPWYAKDCNGPLAKCYARVNTDEFYESSAARLHPWDFPWASRLHIPAGIYYR,19.3561
52
+ SRKILPIVGLIIGIFSVIAMIFYVLLKDKNHATNIETTPADVETIWNMTGLLSQSIEKAYTNPTREYITHADVLEQLKKTFNFDSEILNKAMNTVTQYMSENQGDAAVKLTEDFFQTCAIETQTRNPGQFASSYGPDHKLAKDQATDETIGEDNKSPFNDPTVFGIMKALLASMTNIIKIAMETLNLFTIESNVIQLLPLVHAMNPRSIEELRLTLSYFHKNLNVTLEEDRQKLASILEILRHLLQLFFLYLFSVQDTLQNWLLNIHFNPPLETIVPTIPPNDNEIAQMLIQLNTDSSSHLITILDKASPKMHMIVLGEQILNQSLKDFTDGLHSVKDWAEPTDVLTRLGISPIDNPMSELSKLWQNVLLYIKHQFTSISNSSTLIGQLKTLAHVRYQLLEIKPALQSLASYYLNIDTTMIMSYNLYAFDELAIKENLEEEEIHSKIPEEQDYLDIIAQDDLDLYLKNLIEYNGSIDQQARNRIGFSVISFVHNKLFEILPWLFGKDQRTIKIGLVIKNIKGYIPGLLAGKMEQIRNLSTDENLQLNDKLVVFSGMKQTNGFAKLTLLNMSPLISYYFSSKAAGLTWTSDFIPVLKISQLIALLQVYFLVMKSKTITGKYMLRYTDTAVKKNHVFSFHEVAGHFEGQYSSPLNAFFNHISRNTIPGNRKIIQTYPLLFGSLLAVIILLLILKLSLYPVKLNLATLFALNIVLVTAFLVKTGKDRLKATALLLLGLAYAADLLLGFKSFGGQGESSSREHKLANLIIFPLLMIKTIFVIVSIFALYYIG,17.5915
53
+ TPDSEYMSQTQNRYSENTCNHQYPTEWSEVIDHTSVGILVVSINRFWHQDKCHQKASFLREGAFFRAGILLGALSVLLCFSKWSVPPIPLTLLINVYVSEQWIFLGLFVGDNNEIANHYQIEVLLDFARPYKRYAYEILILFSTHVIIAVVFRNLVVYSPDSLLISISQTDRLQHGFCNLSAVLETVGILDIIVSLFLYSLAETSVALIIGLVVIGSAFAVHQAWAGMWIPGRNTSRVLREVKWFVIIFVAGRCTLWFYLFSCSQNNLIRQTSMHVFVTGLQICHLFQASAPHLVSYLVHLVVRFTQISSVLRRNNVYYFLGAPFTSSSSLIGGIAICVFPDYSGFEKLIFLSENAILMIASNLLLRDGPRRAFLVAREHQEVTLNWLSRSLIWRKEVDIVLMGLLMLLLIVATGNIFTIGEIARVVSSSITLNSVLSLIYWFIFNGEHVKPFTWVSLREMVNLQLNLSVMRSKRGATNRLQKMNAIQEIMAVDLSGGQRRAVLIARELAIAPRLVILDEHTATIDTVETSVLALLSPLLRKGTTAIVILAMHGRDLLHQLIGLIYLNMRVLDYLRHKKWNDMKLFKAMAELLKKYMTEPGFLRWMDRLLLYQLRNQTVEDLKFTFVAQQPA,17.687
54
+ PRSRLRLFMLRLTGMSAKGASPTMLLGLGLLLPPVTLFYGGGVAEHGLPDPYALGNVTIVFATPSVLQHGVHWPIPELGIMALLSFIPIFAPEWRAPTMMAYGLLTGFLLGAIYGPPVVVLPLLWGKIKMWWKLAQALLGASQLYFTIQTAIPLLVTTESETYNPDSRFVMQLLWSHIHTFIPILFIIKAFTIGLQPLQMQHQPGIWALFALTMFLVSWTLARDPYITPDGYFADQKAMGDLLTFNLLQRIPVGNHPALSPPGPYSLLGHISTQIIVAPFIWYWRSA,17.041
55
+ NGYVEQISYHETITSDKLRIDCLLDLNLRFLAMVLKLDIKPLKGELFAAFDCAKMWNCPVERSKDGEPVNQDFVAEAQLRGKVFSCVIIEESQSEYIPCSAPSTVSLEICDNLGKMMPVLRATFQLQLNLGTIMKSTVKESDCRLPAYHLKLECPNENELVGVPQPGPVRKAIDPISLFEELAAHIKFDKNGERKFVQILSYSKKPIKYSVKFDFCNSAREERLEVASYKLEIVSLQEMRKDSTKERSLRTMILLQSSTISFQRLDMYLYKILYLCFLDTKYADVMFRFGVLADISRLCSMMPELKGTWCTGTFVWFIKWAFKVPTLNLGGQDEQMSQFLRYMFKAKKMVIHDPPDWKMACKDSFMPKRNVRLTLCNQKETSTRQALIETKLLEEICTDVDVVMRGEENTVEKSNLFWLVGDSKTVPRNDRLLVGLGQTRNLNASKFEVSHVNIPPGATAVETHPTRIVKLPQIQGALLFYLLYNCWALTPWFRLSKLETVTAITFFSRALYAKLYVTNAHCAQDMLKMCTAVRQLFSGSNFGYLMFHKLITTKQTMKKHFNVQQILSIAVTNVALVVTVGQTECVRPSIFSYVQTVNQAKAIQGVLVSILSPDQAGLAILFIEEGLTRFVLVDYLKMLGKSRQSKPFTLIVGTKETLEEWALYLTGEYVPT,18.533
56
+ SGSFNLTNAIWVRKYKWYELAPLRYLGRCMVMDKSGNKHVILSQVGLLDSPGDELIQGANLPLRITIVDGDDNDFFDQFGEVFELMNLGERAEFNNFVQPADIATTISVQDFIRYSRYLGKGGTFVNEFRDRYLEDGRITEASVGGGFIANLLDVEDLSLVPEREMVFGEKERGFSEAFGSLNRLVENNFSQENGRAEYLAGSNGSSYTTGKIGYVTNWQSQFVITRDLVSMGDFTQKLFSYHQGNIGSYRPGFEKGARVKFGDPIQDWTNGSDPVTSDWSDNYGFKYFVDTPTSTLWRGVVVSNPAIFNMDEIGKNLKVTSLTSYYIKADGNIGRGTKVLAGNNYQVNFELIYFGTTWTLFDANLYYDNGDEWGLSDWTDVVYNSMVAAYQAVDDGHMTISVLGTIYVLMMLVSISFGTIYVYDLYTAMALAASGYLFTRRGLS,17.983
57
+ NDIRSTTEDVLMPVPKDLAGKFFIIEESVVIATETLQKDSMISFHEFGMGSADVYYTVASRPQYISDSTLSLNDTAISDDVTVIKSIGLTVILDLTAYDVSRVTMADRQSYEDREKVSYREIDLFTILVAEAFSCGILTPTYLAERLEQLGRIDIHDGGWNKTINAYELVISASTFADGKNFSTAVTIVPNLPAVGSEIGRIKANDGDIRDALGWVFGETTEQSISPVEYGMILITHGSPGGLLTAKPPLDNSVQEKLFQVLASGWKRGLYLQGGTLVSRAAYLGLDHWLKLPRGLSIIDVSMIDNSSLGIPLYIRYQVSVDQTDKIYEGGKPIPERMDQNRRSFFGTLNLPLAITQAKNITNKSAHNIGQEWWLKIFFTLIRVTVCMCILGFPHTGIEASFMFYLCSQYYSHWFVKWGLEVLSWENVMSVAGMNKKPGFEFALFSDGVILGVVLFTAYIVILFVIMLKRPLMLPIKRMKFIGALLVWSLSVVVGFLQGSPRDKKKFLIKSAIWAVFFSLVAFPNVFLWFFTWKIARLSANASVFYSGTTTMFLSLLVTTATEFSVVQYTVFLEFFIMLTSGILVLVVWLISSQKTSSVSIT,17.4521
58
+ KARYVRLVVAVCLCPFVRYLEIQLQDELEAEAAKKMQLVGREKFNAEKLTTEDLIAVDAVGRAMAEAQMDPATIQRKIPGEVPANLLEEQLKSFLLAQEAKLEARRRRKKLQASGSKSNRVMARERQYLKRCDCSIDEAKRNLLDTTVDALAARSTIREDILLADSKISQLADTSPGIEYPNAFPEQLPYLKEEYYFIRTSRFAFDERVHALQSNLSLLGFDDDLTDATKAYTEFGEAFGMCLEKLDISGILDFLKFIPASSKWNPI,17.6976
59
+ SLIGDLMSDFSGYAEIVTEEYMMKHWMPLGLIDSENTFKYSYQAKMGLAGIENTGIDTSYVRSPAAGKVPVLPARDAGQRLGFTQLLMDLYLNSPGILQTLVYSWMEVQASWMRETRFGSLSNEFETTEQYLPGFKKARAPLEAEQIPKNGGRVPGGDRIVGVFEDSPVSGRSPEEHFQSFSILYIKWNAFWFFSVQCILTLILIIGFLLTVDGLHPCMQPMRYLSLTEFLMEFEGWVVSRRVVYIRDYDFTLTFMEIGNVAGVRLESYHWFLFWTAGLILGSIFFETLRHYIGAMGVVFPTDPPPSEKSDTFSGVTFIAGFSGAMRVALVYTAPQCCRYGEIAADVGHILAGGGGGYDQSCDEYLVIYPLSGGGWALERARKRGVIVFPYNATPWAGILERFLPLVGTARYIAFLVWLISLAVIIYGVYAYALIARKNPKGLMNEKGIKTARLATGWSWFILIKGLINMLPLRGVGTKVFLSQIVRWLPEYALGK,17.7286
60
+ LSFKIFIKLLIYLILIILFILSLFCKTTQTIGMPDLFVKKVDWIYTTYYTFYNDYWIVSVKGVSVEEAIRDLETSFELSKRNVMQLVDAVVWTEASDINPGTDFYHWQWKKLLEEDPLFAKTERLTLVTTFNCMFLAWFANVYALAITTMPTGLFIFVLRFFLLIYALFASISGSGYKDTWLVPFGGAPIRGNLAAPTGRKAFLDCLEYDIVVTNLGAATRATASVLITLFTILRLFTGKWNMIVDVTVRRVSMDCDEELAGATSTTSNMREIERATDVFASVCQLIRSFLDGRNYSQAVANMEYLLRMPESKIMLAWKWNEQAQYPVFRYFVLDAMNEMRVMNQQWMSELDGLFVQGPLRNVFDYLQEQVQQLRVAKQNSFMRFKTKFARGKELWNVWLSKSNNLCQCSDEGLTTLEVAAILLAVCWMVYGFTGTIRIITEDATPKSFTGHLYYQRLHYLRPMMQKIDNNPLVSLLPPRIILDDSTNWKNLVPELINVYIEPLTIPASQQVYELLVVLHHISPSFSGWRRETMVRPNFATDDVGWMKMEVSFINYDQVFYLTEMYPFSQAPFFKLLSQLRIMSQANFRVTIADLSNEIFNYQSLDFEAMKALDHLYQDLGPIDFLFVNTVLVRILNVLRYIRFLRVSRFILPYLRKIARGVFNIFDWYNIVRILFYAFGVSNLLSTIMCSSEPNEDTCDIMQPLDMYLTIFVLDLMLFLSYPQYGFIKALHGFLYHLNTLGTTMFGLAKNNLIYFTLVFSILLILFLGKILAFYAKRNNLEELIR,18.0488
61
+ ASIHFVVASLVATGLVIGTLIGNLIHSAGVAPVIAIALLIIFFCYIFHVMTTSYMNSMSSQGPVDAWTCVGQAIAIGISGFIAAVEGLVATIFFAGLAGAIISPISIYLIATIAIPLTIGLVLASLLVIVLKHICKAALPSVSVVKGISLALTLLVASSLIWRAADSAKCSNCLTASSFVHATFDAISYGAMIEVMAAAASLGEGVIVTAFAVIIALVFVEGLAFALTNIFCGLFDG,15.3219
62
+ KWLKSKEATKRARDKVYVKIMNRETPMAIYTGHHTYWELATNPVVPDKRFVLGEVCENRDDLPYYHWIEHFRSAIDKGARSKEDEGKDRKTSGIYTFRPLTQYQREEDMPTARVQLVCKGVTVEGMSINEIYFHIIHFAADDLNDMAAEVDWGVTEVQLDALVDEPSECEIVTDKKSRPKHIVITTNDKDLPTVRALVDKICLAEVGDHEIQMTRCVTEEQESYIKYLSRHKSDAVLMAGGAISDIQNCSEGRFPITYTDVCLKDDSKWSSANIISHFRGFEEILAEYINEQRWLNGVALRRGFTVQGVSDENPILLITDIQLDDLELAFRQNSINQTSLVSIGSHVLRDIGYFSRGQWGHEGPNQYRTRRHASCWVLNVRHNAILPTEVVIEDGWIHSVFTLYPSAPPHPLGYVQAQWRGFNKENVKDIREAFLKVRDLEWKRHEEVGKLINDIFNTMGYAGNTFWEAHFKRPLFSGLGRIKEAIRRKLIFIRTVISFENLKALVIAAVVTAYLIMAFIILEGKAWGRFEDYGNTTSGWFNLTGHVPRYRYFINEFCLSWHKQTRCREGNFITQIEDKLQACLKFFGDIINSYKGSLFKHSVWGLCPDIICLDKGISRWNVDWSPERTKEICGINPEPRASKSTWRSIQSICDLFNLDGDEYGTYDIDMRVRLTHNSTTPCFPISIGLPCKFDSAGYWTLARIIFEKYSLAFLRRIQIFSPGVAEPLVMVTKGLNTAFAILTLGLAGGLITALYLTFGKPMEGWIESIRILVLVLSLFLVALVLSGVTHGVQYRTFKDDRIKISVRLWVFTRRIE,18.7487
63
+ QEETPSSDRFICKNIVVLSGVAAILIGLGNILICVTTKYVKVLRYPNLRSVLTVVALAGFVANGLLVFIATNGSPTIGVSWLSMAVEVGFAVGLLMCLATTNILADNQNGETGPSDNDFLGSTQAELVMKGNKVAWWPMGFFVVDVYYAKLFAGVNNRILMGKIKGNTWEKNSWNKPGQIMAQVFYIIMTIAIFLSPLLVLVPMHRFPLNVVATSVSVSVLLGAAFTGLPDVMNWCTASFGIRYLGFTSGLAVKIISLILRISGRLGSIQLNFAEKLGVLLVSIAAGLISGIVWIGGLLVQLFTFIVDSFLNTKAASDQPLDIIIMLWFGITWHVLVLASCTGIFYMYNIFILGQSKNYGSISAVSTGLITANQGVELKGYPLASCCVFQITNMKVHEDIKECWTLIENCKDERNVHDIFTITVMHGKKILLTGGNTYRGVEIRVNEAGQVVPNHELYVLAKEVVYSNPRTLTSVRVAKNVELCGLTVRVADKAAMNSMLDKQISNLVHLGMSLHKMEKNVVLSGGQRKRFAIARAMISNNFLVLLDEPTSALSTSGENALFTDLPVKENGTTLVVVSHRITLLKFGDVVIMLAHGEVRVHHLCIYTKLDEFSLKIATYFMRHIGYFLDIVWAFILACIFGLAIFNLSVFGYNPSASVKLVPVITLFITSFLVAINQFFGQSAKGKLAYMHHMVRRDLFGKCH,17.4792
64
+ WNRAAHFLCMMLTFGVTTVSITTRDTYYTGLTKAVKDMSYTNWLIVFQFKMDTPSRTPWWRFENRWLNVPVLASVIWRGIPELFYGSTLVDHFSGVWNIVHWKHRLPTFKRLRGINSDYKPPLRIIIFLSLTFIPDLPRVLIVLGNIPKVTVRFFMLVMTCQPQTDLKQQDGFGFWRYKKPTTANEHNWKELRAADRPLMTYPTTAPKHMHPFGSLLFYGLRVATDQARVYMNEHPTSRAFLNLILALLELIPDPSG,18.5118
65
+ KDFATYDPTALSPGANRRHDTWKPMGTATKVERLLRWGYLTFAMLTTLTHIVILVFVPFSWSVWGNMRYGVEPPEMKDQGVVKFFLLVFSFFLHTYVLFTALR,17.4756
66
+ AFVPITKFYNLRREGTIFKTPELRKMGIKVWLVDLAVVPVAKPGVRASARRIIAYILEFNKKASKLIIRVDASTGFFLTDNLIGFAFKQGIRKVRFITDAPKSGSIIQALFGQHDVVISGADIVGTEFEVGHELEELDIAIGIGAREATRVFAAISACVPSQKIIGGGGTVLEYCATTGSATKGIFLIRGWMEYVNLLPELVRLSAVFSMARLMSTSIHIRRGQGSTPSYAILVGCFVLWIGILAWLVGFFDLSEQEWLFTLPILQLGLAAFAGLGLAIIAKELANITTAFGVLAADLVGGAFCIGGFNAMVHKLPFLYNLTVGIIGLISLAGYIHIIIGGSWWPGPRDREGVLANFFWTPTSNENEDFDILPLEAEDEKTSIDNPSKGGEVNRINLFDDQVLVKQSMTPCGSNWPHLPFVFPDWIMNNLFKAIFWKIVEGSINGAAIIAEDAIARMKVHVKPISYVNESRRFLKLAAFMVHVILEIYVFFCITLEFEQNLFGKSNGAGVPKLLILLMIALFLLAAVGGDITTKWATDVVQQLALYLTPDLLPMWWFETALGDAFECERPGIGTSVQYEKTAFEDKGEPTFDDTLARLIPEVLNVVFPETSPNAVILHWVNFLTFMLALAQSGICVLTGSFFFNQARSLRLCQFQKVTLLATSQDQSADRIVAVLKKWPPEKSGRVAYVNRIFVQLLVDPNKMRVLSGLAFTASVEVLSSVHAKRGAFITKPFTVIFVLLLVLVPLVGGYAFRILQNNFHYLQLLCFIDRDPNLNY,17.2646
67
+ SVSDVDVVKDKGITHHNTIVAAQSKIEIRVMSVAPVTVNQKGTLILDFSNREPNEISVTDKSTAGNCIYAEKHYKKDCVLAEEGGKVRLAGVGKSSSQSVKSKAAIAVQPQAGFACGQNGAGAFQREDELWDELITRNKIAATVALLLGGVMGKTVEKVNSIILLREKESQYIKSIAVQIKGGDKYQICVALVLEQDVLFHGVDKQAPLRNLIIKMKVCNTREMIPKIYETCKDAGKTDVATEVAQSHVLREELVAYTEIIVGVYSPNLLEVVYKMIPDDSVKLELPFDVNGSKIMAVDGKRVLKEKFMFGWAIGNRFGCIMDGKHEKVEKDVVAATLMGIDEPGTNVSELLKYLSYPRAVAAENTLSVEDVNMTMISNHINLGDVSKFKRECLALMDYLRSISSLVTLYNSENVSWQAKTKTRTFGFSFNGNGFPSSLQLVKLVTIIVANYDVQYAWYTGAEESEVNSPERFGCCYKCVRKPIRAGCKTSKMSPTFIILPEKTWEIDDHNLEYRCMGKPALSITLKYDRDDDNSKEDNKLYALAVGLVDSAVTVHGWETFQVSCWIPIPDKKTVKMPGFSDLYLAVSLCFPMDEEKKLRLHAPTLPEIVFVTHASTYIGDEAELVLHILKRNGVCKSLGFEDNHEIWSFIAWISQYHSTNWRHSGSVVCGKIRQLLQDLIPSADQDTQVQAYCEECKNQENANIDDSTLMLVIAYKNLQYLRAGILPDYYTFDNIIQVGSNNVISGAAMHFLDQIEPFFVANTDPQKNIQLIRNKEDEFYWRFWAFDAYANEERTSNNRDIAKFEIATKTIPRLQYERKYSEALEVIKGFSIAYNEKY,18.7321
68
+ GENFEELFARVRRQHPEAFFLYLPVILIIGTGVAMELFPFAKKYWRFSSALGRAFLILLLSILVLKLLLGKLDEFRIESWALADFLHVVQAKTAPISPTIAVLRYFRVFRALRDNMLDRTHDLSKPVIGYLFMAGLPILLILSTAIELGVIQLDGLTILPLLTSAAFWGILPQRVTSGGDGSLLAVLTGFAPSLKEGFRYGFIIGLFMALLGFIYTIAAENDNEALRQG,15.1469
69
+ PYVRSGNVLMAMPQWLQDMKKTLSSKRSQKELVKDGDRIGQKLVKERKMSTVAMDLYWMSLMLAHPYAHPVLSGQATVYHAVGDGAVVKVHDGDTLFGVALYFSENMWFAFFNFAPGMQAPNVSSRDGSIGLWGHLLPAPNFSFAQLMMIWFVVIDFLAGLSRLLMLYYYNLAKTFRFHLLFASTVSFLEVQAVATSFWAYSGNSALLVMVSFLITYTGTTFLAGSMHATGFYVIHLTDILSQHVTFLMTLIEAMNSHQSATSMSGSRADGKTTVNIMLYSASLLNFTTFKGGFAKFMCYISALLWLVILLYAFVDGAACIGFGGRLRRFSHMAIVKDQSWKFYVTQKGPGIIQAAEYMMNGPNIAWSFVIHVTASHHRGDIIVSGGWIGSLLPMAMQFAGQWAPLIVRAPKNPRVLKLYLLVTMLPAGILIAITVYTLWQPVKKRPQRPSNESNMLIVIVGMALGAGTACLPFVLGEYNSNIVVAWAVSPVLVRNCFIIFMTVPQMACMQDTICSVDRGEHVTGLNSLTVVSVTVMSMPSYVIAVQTVSVSKSMLGIPFPFVELSLKADASLEQLAGPINIKDTVLKQCGAVVIILMLVFGILRLTFGGVGVVDLYSPKLLSIAEAKVVFLFMTITWGSGITNSTKVFD,17.7269
70
+ KWAQGYAVLEVVFTVPFVFILLFMFITTCILYDAKTDFVEFVLSIAFFLTNSGIEWKVCAVTASSDSQLLAVMCLVGLAYYKIYDYDCCDCPFSIDPKKREKTVNLKQCSQLIAFELPPKFVAREEVVVSQVPHRFKNSEIEDLTFELEGLIYDHNYPIEDGFEAWRVHFLVDVGGGEIGELAYPVYHAPVMSIGYISQRPIGIKAIVVRNQKDQMAELINEKDVLSISYSVGLSYELNEYQLTTIRNLRNSAAGLSGLKIAVDSIIGLCRTPGLFPFNLSHATSQAITVVLGKSKRFNLDLSKIKGVPALKSFARYAVKSVKRLIADADSLPPTLVAVFYKTGNVSTVKGLSPPLIKLNVLSDASVVPLGKKINGIGSTGAVCTIFNGVCWATTIVSQDDVPTVVVQITQFRLGSQLDRWGKRAQTSDDMFWDTAAGVRLIVQMGVGSPIATIIVAVRPADMYNNMVENSEEKLKLRNNRNRQADDEIYVAIRMTGGNARQVRLGLSEVQQKQRFVLDIPTAGLIFIGKEFTSVIAKVAGVYPTILLAERTPEDNSVSIYLRNVNYIKGRPTSFLGTGFNNSKGEFLDPFFTLDPGPQDAVNGLRIAKEPESHKILEEQHGPPCLTYNQHESMLQILKRARLSIAVPDNRVAD,17.9063
71
+ KSVVILVGCASSDPSDSIEFHFFGDNTAITKGRIGRRRFVVIGGPSADLDDEEGEYGATHVTVFDIAGSIMPIGFTRGMTRLYGISHLTEKPLPGGFVMVLPPGGWRELQNLQFYEAEKYIRLSESVMQDVNGGLTILEDLGDEIIKTSFPFGQPPDKG,17.3791
72
+ ESNISKIFKDPICAEFKKVLVSKIRPIKTTAVLAGLAGFFCGGGFFLGAITADVFMIGTIVMVYFAAVLKMSDARGYAWFFSFFSRFLIGATNFADFGELIRAFLKDVNLRKEKVHKGNYLALFGVFGITWIVLIMTSLLALGEFIFMVGDIFKQSGKKAKAKLNAEETIAIANPVIYALIMIISFLLSVATILTSSTGARAKRIQSKRRNLGVVLVGIFALIVVAILFILIVELCTSIGIQASYSLLAERLIGTSEYMEGIPNTNEYWNAQGVKQMLGVASLWHTKIYEWWNAIFGFFVIKLIKFISDQAFRDWKNGLHSLQIFVGLSVLSAGAGSISSILVLSDIIKNANTGSFIIVPVVFFIGLMINVAIFAIYYGD,16.4909
73
+ KTYSEDMTFLNTPIDRYDKPLIDRVPPEHHTYVRKIITVFLVSGILAVLLLMWATPQMHSKVRWLEAGNSPGVGRIKLDVRVPEIHPQTMHILNAILRFTKKQKDGPVLVEAKSDGDTIGTADEFAPLAAIARIEQDLKASLVVRQIVPHACSVPYPMWITEGDKAWDGVFYKVEECDTMDDFVRILNFMIGAEYLPGSNTTNEYCGASRKIVCFEPVMIRNGDDAEWKASVVVTMEILELVMQQVITCTDSAEDGFLISQKGQFVGEGELGILSVNLEKQLYKAVEIRSQDDRLKTLIMLIVSFIAVAVGAAMVSGYIPRRRYQVTVNKVPYRLQDDAPVEGDVFEHGLTEMRIPVLFSLVDKLECVAAIDRQFKLRRAERALVSFGTYLQQGKSLAQSWAPVFFGFAEYLKTIGVCIIDNVEGKYLKNTVAATMTLFLLFLCWLTMCFPLQHPPRLPLADFRYLRNLPGGSTTMLTVFIYAHGLDETLDTEKGFWFWSDTGLLGTNGNMSGCYFVSFAVETQAFVAMMLGRSHSLSHRFHGLLQYTTLGWAFFTSFIPFVRDRNFTERHYVIPQHTKASTITCQNKSVRTTDKPQARKYQEDLSHETTCHCQTVEKTFNHPRVKLTTVKACGEVWTECPVDIAYTLASELFYSFDLQGGTLLRPQFEHPWGRSNLFAAFFHMDEGFSCHLGRDPDMQEYINSSSYLLNSTNVSEVVLLEFCDNIPPNILLITATFFYGNMMGKDAIAIPYDEYVGKHAYELAPEAAVIVLIVFAVKFLLRPLLLKCLWAAEFADHALNRRSINTPTAFPVIHLFFDVSTVAAIIGKQKNYDRFFPQLAVDLEALVDEEGKEEISFLIREMQRFIDGVMVMLFMRKWRKRRTLAQLRPPAVSSPDAPRNCLNVPDREKACSLNNELKTNLAIAADY,18.5435
74
+ TDAIKVLVVVIVGIITYLLVMWYWSGVVFEYGPVFIFFLLITPFTGEEYNFVAIFDAIAK,13.894
75
+ LGRRRIYGGLFHVLFFFTAFIFLVQGLRDGGRLGVPPHTLVALFEVCVILVLSASYASTFYCDYLSTTVHIMHIIQQLLDCYYYHNTVLTDNNPWAIDPVTLSLDKTYEEEEDERVEDVTLNILKKQYWHQAFQFEKCRMNMRQEEEESWLLLGNPQKQCESCKVRVSDPIGPDSPRPDVTIRISSVDISRVLLILRGSESLACEDKVRFSLYRAYQEPSPLFAEEFTEDLRCIVHTVRVEENALDTLDASPELFYLPSCNAQLIACSSFLRLNFLLKWTERLALNDHFWANKGNLYHKSWQSVKEHEYVHFPYCLVRLGGQDLERVEAHEKKKPLNICLSITVPIYWGDPRRKEFCKICLETDYTGYDMFKKRRLAMIFFLMFLFFWVFSLYLVEHAGQAMKNKGKAVKLKEKLDRLLDCSEMKPKPIRDGNLKMLQMPGTFIDCSSDKGVLSEFVAGYLLIGVFFLGFLVYVSCSCTILLWLYWCIVIVWIIIMFYWWVSAVTVDLQMAKFARQRERKVVAQIELAWRLSLFWVLYPFLLLLFLLSYFGVSPLPKKVKGIGRAKDHPKLFYQLKILFNLTLLLFGSLGFNDSMKFPWDLVLFLFIEFVLMLYFFTVANSQGFSFLYRLTKPLRNVSALLIIHTFASFLSHVVKRIYESALFSMMLQTLVVSFFQQYVVIIYQVTATNFISRLIKTLKLNLPTFVITMVFAFLANFVCKLLMRVNNENYTFILMAVPLVVPSLFVPFTTLGLNSVAMGYFLRGFFCETLQEKAKQSVVKFKKDEPTSRYELTPTR,18.1273
76
+ SEKSISRALVTTMRYKVKFTHGQAVFERQYRHVLDGPFGERDVTGKLRLPPDPDRQLSLKNVYIVSAHFDPKGVEAKNDYVSVSDSIAKRAACVIADLRHQGCRIMYPGTAKGVEIMADSDGVHLQLLLQATKGPGGKKTAIADLRVPLIDYDPMATIIHAHVQGGPVFLREWTVIPVYVQLKFKNDNKVKINFIVPGDIQPPLIQDNDHWDTNRYKDDGQYKFDLLPEQIEIRGGYEDIVIDPGFTNSFGRVLTHCNSDAVERRTLPLPLWKGLYTRHKARSQDICEVPVLDQRVVLPSSRVRSIPELAIEDMWTPSLSDSVDKNVKHAGNKCNMTNMKREFTGIRPGDFKQALLGLTCHTGREMNINCLKSVLKGNKAKTMVFLHGPMTNTLGALEKLNKENPKRKYMVFKAFHIDADGLSISLMISSSGANLPYSTGLHLLNPDGQIVALNVIAPISIGGQSVEHLQENLLQKVLRNNKAKYSINTCVLSIDWMADLTSRPTKLLPRLYGSGYSISDIVTSPSDALFDIGAQEFVAAPLMQGPIDWLRAIAIDEAEHEGMTMSNVVAEAIENARREYTLGLSGVDTSGIAIGHARETVGREGTASMPTSSAAAAQGFWWATSILNLPPTMTALSDMIGGQVVNGGSIVVVGDGVSNIDEEQRIMAQQMVEQIIHLMSVNQVSALALKNLIEQENNTVGQDMLMRPDQTAVSLILKTSAISELCELTDYVYQHSVVQSQRILVGPSGTAIEQRTSKAALLDKQLTMPALYMEGDAG,18.4702
77
+ GPVSNYTYTRRDGLRAWFAQTEPRTIAKPDPADYLPTHLPAREKLATYKRVVIERITNSMGYIEYLDARTFNYISSPDANHIIIETVKMIACMLFAIGIIFSVHDQVTATQRTAVALLVAIEVLPNGVCPSGTNHPSVFQKILTGEGIFCADVASVGAARAVFITPQVQGGSLLATKVIMHEQPRPTEIVRDPILNQAGVHALCGTKVEGDVRQSANFTISFYSYSSTEGINYESTQSDIYHDKSPSNPITLLCVRYSANGKRDLEDGEVLSRPVTTQVTHESDGGTEKVRKDNITDIVIVLEKAFPAAIDEFRITTIKILIDHPITEYIVVCNDPNPVGTFRLAKYILNTYPDGSVVVRHENTFMKMLSSIFIHVNPDPSRLLNVIPVTNSLVKSGRYVMGDSDVVEKDNMKAVLKPLFEKVVGSWMGNSTFAMVAGFASLASFVFAFGHVSTQYAGGIHSEPLSILFGVNFSLSTTWVSAFYLLAIILMGILAGLETLLEGEQA,17.9375
78
+ IGFNTTTLCVCLMVALAITLTYFIKNKKSAYTIRLECTQASNCEVINFPRGMTTLNSLPSDDIQLFHTYSAVRLALCLGGSLILGAVLKIIFTNTELGRVLHAKMLKNGSLSALAIVTIFVVFFIAMSNLALIHAKGSNKAAASIPCGFLTYSVLALLGVFMSKCWNRQMPMLYGLSKGHC,17.0854
79
+ NGQVRERMVVLALKDPANSDRINDHSMHIESYTFVYYPAQGGCIHVGIIRLRKPQKLSLQEVLAANGDSVILAGLGVDACSFPDHIMENFFEWGDTQQKMDPRVGPNAFVYDQAFLDEIEGHDLFFLREILDSVTIGNRLQNPLQIFRPYAQARNKITRSHLGCSVTKMMLRNLETVCTASFQTREPELEPFLQEMRADNVSPVLDLLEEFSFKIPPGANIVWLAPVAWPILQMVKRQLRGTGRCPEVNHVSPGDVPKGATQGTWDALDAAQLFIGGDSNIRGSRFFFQLCGLVRLFRTARVASVFTKADPLSRTALPEQGAGMERLVADIFESVHENERVANLADLDRELCDYPARVQAEEWARACGRAKSHAAYLQSGTVDTNVKTHAAHYVLKKEELDFAMGFQGKTLALSGHRCLVRKRVASTKPEIFTQLEQLRVNNPGIARAEYTELFAQGSFVVVVALAEFRNAVRQVQDDSLCQELIEKAQMFPRVLQEVRKDPTPKRLSFTLTLIVVGLPRSDFALLNDAFLACFIPNPRGVANVILDTNAYDLEDLTSKPEVIEFEGICDLRGVAAVAPKTPIPAPRPMEGGKMFLISDMEEDGKGYDVPLPYALGAAVVVAACDQCGHPTFGTNGIAERIVIYVYLHFPAVVALFNPIGWACFIAHSCDPAFNSSFCRLGLPMLIMVVAAIVSGTLVFTLMIVTETECLDSGESKQKALAGDMLIAFPLMGLLELVLMELAILKGSAPSSSRHVHKDDGFVAMSPLNGLALFIVLLIGTFHGSTTVSGQVRSGRRTDMGLQGITGRAAGVRGHHFIMLSRADNIVTKLVPPWATALMLLLLPFLLIAEIEMGAGPLTLMDGVRSWISCLLVAIATAAFFFLIGTFQWVLGYWHRSNDSFISILTALYLLINIAKLGFGFYLH,17.9518
80
+ VPISYDIKVPTGWFIDGDKVRYKPCQGLKVITLVNDWWIILEVFACVTLPSTLILEKQDYFHKRRCTTIFESVAEFTQAYQVSIQESTQQHLTMAVQQVLGIRNRVDRKYVILVANDSPVVCYLEGSKVLFTLLQGPKPSSTVLLGRNGKTGLLLRDVTYMKTNGFDVVGGDLSVGVKNAVKAGFYPVPLTQIVQLSPVLTQAFFDDESVTVLDGPIGGHMSNKVNSQAQQNWKINNDNGFMVKREARYLGVTVIKNDLVRGFEDLTDVLGGCSKSILGALEMRDASELNHKGISVVAETISNAMTLGVSELTCCTKHRKEVWLQRRLRGKWLRLMLNIVFAWQNDLFFTITFVPNIVHFQRKIFMPAEAVFNFLIASLLFVLIGEFGFFDVEWRRWHWRRFNMIFYVERYFLFKAFLANEVKRGIEEAKKLLSLAFTFVLISALHLVNRIVNLLADCSHSRFLVDNLIDLRFIDIYCTKKYNHMTFMLLLLAATIITFLLIGINAAMVCCARNDQVLQLIQSLESLFNLIAHLNYMTVNKFTFGLMLRMNAYSLLIIVNSLTGYAELRIASAIMLRLEQAFYDLMSRFDVTLNGVIADRVGVASYSELAVAILQLLELLVMEIYEYSIILGLVAIYFMVTIGCCVKTLKFQGLDAFNP,17.7417
81
+ IFFLLSNSQDAYADKFKILVPLLWFLLSVGFAVLLHWKQSIMKAIMFNLSLVAFLCWLVWAVSNFLYKLSDIKMVFCLFIVVIWVTALWSLSTPISYRYTVKNFVVNERITGMFPNLILFAEIVPAITYIYFLFTFLDFTYRLQALNDVTVLGTKPMRLIQVLLHLRVAIGFLVVNLVGTFYDETMEGGEVWELFNSLTPDINSSVTVIIAVLFVFFNFFFVVLTPQHTKPCKKADNSSKPLAILVNGVTLILVLSLGYFFGSLGICAFSANSASMLQAFSMHTTIIMLFKIGVASAWGQVWNQRTDLEVTDHDPPLFILTLLLAGWVIESTTNFGGDSEIMNLLGFLAGVLTSIEIFGLQINLPTFGSFVQDWGAIATTGFQSQEQFFWITYGLIFVLPVAMLFFKIFHEIIEFNTMLIICTALNASFSLLHLSRALKFETKMVGKKRCSADEEFGARMEDAMDGAYAFFSKLLNTLFVAVFRVIVIYLVAFFIFKMLWIFIPTIVDKTNEWSVGGFVCELLFSIAGNLMGIILLAAPNFKMWFLLDVNTVFLFVGLLGLINEISGLRLWEMRFSNWFIKFHMWFFLPLILLLPSAIITFSGFIGAFRIEIVYVFSLIGLYSVPLILSALRQR,16.9356
82
+ AVITQRVIGIVAVTLLLLIALAGGILTPMGETGSFRFPEVSWSVLTLLKETGWGSAEDGPTLQLGRLVTRAIVVMVFAALIGGWIGAILAWLVGRRS,12.8882
83
+ FISRALFETAVILILVLSGILQGMVLHSSAPITDDMELHKLLAKEIYTAFCIVAAYLVVCVGYPLEREDRLMIGVYGSCSAGPLDNVKEWGYRTIAPLLVAYFGLFTLWYYRVFGEDAEKIWIQTAIFVAAVLGMSILNLLVYLGRFPDAKARLSILLKDLAPNVLFHFVFFLINAVALMPFIYALIESIIQGVGIKKFIIEQNGVNVPLTIARALKIRIIDGVQITTPNQNIRRVSPTVDDRMGPETSRFHNTLEQDDSIVFIFLFVQDLKILGLGSQYFTLFINVPAFYYVIDIINVGFTVVIIFVVIELIKGRFRSLVGLFWVGSSIKSSDFLAGIKNFVIFNIAEVPGVLITTIVEIAWGADFNSAKITIMDAVLIVFWFPILDFAWTNIAFATTGNFYFLIIVAGLGMKKADPMLLATLIYAVLSGACTPMLEVIMESVYVVGAVDAIALFIAPMFLRFNLPILVTYETRRPNLIWLMALIYFVDSYHLYFKSWWFFILSIWGGVIGMDVVGLVWILGNYSTVSIIGMG,16.8602
84
+ KATQSDKTFPLEVSFGFTASSGNIVDAHAASMATYITLQAVADLVDSPTECPISKDISTEQKIWDCLPTVNTFTARTGRQAEIKSTSLGQNLGLPYFDSSKSDKLHVDMNADGRTISDTLFLRDTQKQMHDSRKIFLPNTAAPNTGTKDLIDEYGEGLIVNHDTNDASGYLLTDELDCHRPTMKAGSLNPDYPSILRGLKVNIEDIIQDENKVSGFYQVLSYLLSKGSRLKRTIKFCHERDRYIHSDNHKFVFSGIGDQAKMEKELDVAKATGVTIGLEDSMVKKPSTKGNLVGIIPMNGTFLLVPQDPLSGQYGSIIFAHCIGKLDEDTAANIHTYYKAWVMMGNLSLYDKFMSAKSFSALKVQLRVLIARAGYIPVLQVQTNILDLSITEDANIFEEVLSSVSNPFFGAKKTIRQIDHDRNVGFSVNPRGIDSWRNDMPIVLADVAGSLTLTTLGLGFRAGLSDADLCYYHDLLLEGRVDNEKNAVPEAKARKVSQTRAGLFKRLLLQMNGTDFSIRGCQGQQLDLIASNGYQSLKNLQACKKNGSVTLSISVVMRMYRFVADFAKNNEALNINKYDFLKHTDVWYYPGEHNRDIGDLNLREIKFKPFFTCDNSPRNTVAVFKQLYTLPLDGRYFVSTKRETSKLITGEIYSISEFGEHKGWVALANENPGLVSTIRQTGRVVVQFMAGKSVKDAPISKRIMLVPCDKVIVMYKSLFYRLPEQIDTCYESEEFYDTKKDCIKTALLLARKIGYGMNALGHDERLSALQDVLRTLKPCDVLTLKLVTDLGQATGNMILLIISVHRSKLRKVISILGNCEAIARVLQTMKVGGVDAVILLGNNINDKDIEPTVYVGYSIMKEGINVPFTGGINNGAVLACMIKILEPIPIFVAPARPFRYYLRALLGFLDLGVGYLSNADEKATFASSYRPGMTAKELLAQLAGELNIPSDLPRTMEIVKDL,18.0526
85
+ RPKTAEAIGLLRATMLFALILAFIALAELLAGAMGLPGSWVGLPVPLIVLAILLVLIGFFLGLWAIGGFER,11.2268
86
+ AGVCRVPEHHEMPTIFCDRDGNNGPILADNAQVMAKLEERQQGRKIRAYSSSHFPTRSVFLRMGSGALAAIQANEGNVRLLEFDSSEPRAPLSKESDLFGYGGIGQPNSPVKNDYKQKIFGGMIITGPPSKVSLSLSGYAQAQPTTSLGTLYGDAAPAENSKVKSLLAEGNTRWRFVDNTDDATIMAYGMRSIRFERPFGMATLNAPVNTDLAPEGRAIIKCPTAFTFKFTGDDEYYTHSPPAYVGAIEDMKKEKYRPPVSLSGKRPLVIEDKRAGVMDIYSFTPKITVVPGGRNDVYYDIDIRISGGIEAVDRQPMFMFAVAIALCAALAVGFVMAAACELMGRSPRKAGTQYRKKRILVLTSNTLICYLLSPLLLEATLIDDSSCDCDLDLEGNRFDVTYGLVSCDDLQFQTLFSFFFFNLIASVIFVELRHARRIYIIEYPLFESPMSLVGCVFLALFAILLPDTVADGSGDQPMTFYTDIYYGLTSLGEMVTAYRLINLATGFLIGIIVELAATLFIMLAAEFFRSSQHNSADPRKFQREQRSFLRLRRELLPTTSGVVPTNW,17.8225
87
+ NGKNLAERFVWQQLSAAPFTMIFTMVQIGSPDVTEYGWNFDKRALAGVYISGQDVRLIGGARNKNVTVTIESVVIGGYGRPSSTDLAGHEVLAYFSQSPARSRFTNIYARLQNTRGGYTAKFTSSFRPEAQTYETGALSIRFQGTADAPSHLQERTIGQLESDGTLTGDEKSYRTGINRGLIGKWEDAMAHTLGAVGTSGSALVLISGNHFGNGYAFYGAGNKSLTSKLVYDNPFTQVNTQERFAKDRYPDLTGLELLPENVQVTAVGNTSDWLKGSIMFAAGALAGLGSGQIIAGFAAVRNAAEGLGVALLIAGGTVVGSKAN,17.1103
88
+ AGLFPNELNELRRRLASDATTFIAPINFKVMLTREFQLLHLVFGFAVGLAWNLLMGQNWPFFPLIHGSADDLPKLTSFGVIVHMHEAVEPIWAWYLSLISVQIHSGKFLQSIANTRLVGSLNGMFPAWQGGKMIGRLIPRHKIAATLAIPSLPVLWGVTHIDLMPESLEWSMNLVEGDIALFQETGTFVDIFLLAGGPRYLFQVTFDEKINRLMRKRPILIVSQKIGSHHFKDVEEYAIAMRQGIHLEEAEINIPGGKVTYTPNYLAPSYREGRNRTVGIWQTFLDEAWESEAELAILHKDLVISGEPVLYPHQFRQGGRELVGKFFRLVTVDPRAFVAMQNGAISKEELVAPLITARERTSWYIFGIGAVSATLVATGPVNLNCTQIAYGPSLSAGLAHGLIFLLIAFHLYHVLKCSQAFQGLAAIKALNLIKPTEQALPERIDLDPLVLFNVGHTLIVTLFLYLSILGRGDVGLNLTMAGVVGVMTVLTYAKFRHCPIQSQSNLNMDRADQYRVIDGLQYVLKEIEKDFETATGL,17.607
89
+ WAQKLIILMLSVILGGLFYFSLLPLLHPSTTLRNAPIIMPLLVLSSIIFYWFIHDNMIHHFDWNFKEFRLISQAISLNAFAFGLMLGTPDTLKPGCQHIRNPSFILYYLVYFTWYTGLLAKQQKELLKSLWSFLIIDSPFNLSVMRITRANLIENFSISQGNYLGQILLILLTHHSPLTILSWRGSPDRINDHASKNVPIKVDNETDNGELDKLACGALWSYYSQLWIETMLYRPTNGTEKNQYKDFINLVNLESYSTNVTSDVREGSPKAALLVNELHTYVILNASVVLFVTSRRDYNSLKKRNEA,17.6565
90
+ KGYDIRSNASWLVTRADGKRANAVAEPSALKPGPQSGVGNILPKSRASYFILNNIGAKIIYLLDILDTATVGALAKAPPANTNRDNQAKFKFTATATGGASFSGTVPTDIVGIKVGPTAPILWGQVVGGGQAAAGGTKGVTVEGGSGYFVAGFVLDDKENSLLPNSENVATIYIIPRGNIIVNNISEKTGPGVIIAAEGLAAKGGEMLVARGNSQSSTVVDVSKKAESKSIITEELLKTAQGNNFRADINQLVTSLVDSWDLGTEFTVGLNNATPAGGIFASGGTAVNGKQSNAAVAYGGVQIPQNGKAYGTMVIYKGSSQSKLDAVKVRFSTTNAPEYWFLVGAQDQLAGNNTGYFTGKNALAFAAALAQTEANKVVLAALTNKDNPVPQNKSGVVAKGIAEAFTEKFTVDAVGTDSIVANFNTKLAPGQIVFIGPDLEITIAYNGTVLSDAVGNNAGEAAILNPNISKRIQEKVEIGFSPKKNIGEEYISANGSIDSIGKCAADETAEFSALASTFTSVDFLVKSEYSSYSSNNTFDRFSLRDTSFTDDSNSKNSTRLKASDSSKFYDNYKMVKTAVFNGVATPAAAALGSLSQFTGTTTLRLEFDKGAASLKGEKFSDSKGNSVTEKKFQDTLRVNTLGRGVAFIGVKVDSPKALVIAVAGALGIGGNVLLKGGKIVASSKLKALSAKQARNQLPLFGEYNFLSGLFSNGATVAFIDPLGIPATKAIIYPGESIPIEVFTKTPTRVKFLEKGAKLGNTLSALFVFTETANLTSSLLVRANPGVAGNTKPKNLSDTSAGTSPQFAVTAISHRLALTA,16.9213
91
+ VLGVAEKKDNDDQAQSNSSDDIKKADRESTLYGQISAGVQVGAVGTAQQQVTFQLGYASLWGNSKWYHGLNKRDAVASGYESLMGSMTQAGNGISVRGQNSSSDHMSSLNNNSNNQYAGDNLLFSGGNVIQDMGMAQSLSYQGPFSGIQYSSQSYTNTNIFWWSGGDNASDIKAKLVYKAVGYDNYLGEVPGEATIQVRNLKFANNGTLAYAVHSQILLNGGKVAYNGRCMVSNNSTVYYSKTLQSALAQTWYEQGLVDANTLLVSAQGKKSDLYSLAKQNIAGNRRASFAYGASANPSAQVNASLSNTFTDTYTYFSGTPTYSRSSFP,18.6621
92
+ VGIIMPDSAAAFSVAGSLDPLQQVADAIEEFAEKKLSHFDIDDTSFINIVSYTKQVVRALFVQPCRQMIQPFRDPYSEIKFVNLDISKQLMPSPRRNSSVVKQQLIPFGKVWNILHKVGLNIIFKDVTVVSIALALAANLIKKSEFLSLKMAGRSVGTEEKLGFMGFIFMNTENYTKGNIPGKEIVAMYFLYLQNILFHPPEIGSPEPAKYSEQSGTYPCADAAKKYKKYPLQEKFMFIHASIGVGDVGKKVFSQRPEKGHLAEMLGAAVLFFGENFPQADFNYLPSKEAEYNLSLALFKFGTQFVVNNQPAFCYNEEGNGWLPVNKLESNEILDCDTPTKGTKVSGHLTPVTAGWLHLLQNLGMMCGSRQAQWQVYPFHNATCANLQHTKISPMAGLGEAISTGGRIRPTYRKILLGIPEDHYNPSSDLNMIESQLVQLDKLKEYEAYHFGPQMYAQNFPEKTQLTRLMQLSILLSDDRSLARLNNKSLQKMTESPEKNVTKSVPSTITFGYASELRRKKDRTKNTWVNLRRKENGAFDELFVSQESIHSNLSVAEFTIDFKRNDGVEIRACGVLCVTDAFRVKSQFKAIHVVGMSSTTLAKVNCQVLSPNNVTFDVNNPQYLHRQQSVLANASTWPQFRWQGEVSGPTLYQLDSIGVLPKADPQKPKWQAGIMFVLKLCLYLILFCYLPFIGVMYLKPAIFEQSTPPYKQAEAMHLLICCIVIMLFYSLSPVNLQASKQVEGSGVNLLVLFMTLLSYWSNLFRWFGHLFLMLTIASSLIYEAVAKLKTITPKNLKSIWHIETWQVFEPFMVFYIVVYSVALMSLKVFVDTWWVTFFYSGPVIITGSILGHGVNLLDRKMPYSSNIKLALHNVHQLLLNINVMVDEFTGQPTSPFVTNVAEPAKRAASLIIIAVGDALLAYMLGYTVPLVPRPK,18.4627
93
+ KILLGSSITQSWLTYIPFVFLLVIPLFMIRHYGILMTNVLTILILCVGAKVLANSKGDDPTSVRNLKDVWQKAFATALQITIKYYFGKSTKDFLQSVSMIKADSTIFVRKAPSWPFNLSVATVRGATTNGMSFTLPCTGEGNGGFCLLSQEAYAVTGPLLEDVGVLAPEGAGKLTQAPELVVGKVGDVDSKALLSLMIHLLAKIGVATVALSLIKGELEQLRVEGTDIARELATKDSKDDNKGSSLATIINPPMNIIVTVSATKPNNTVGGRASRALTQYLMDAGTKVVISTSTYRDLVPKARNDSSLVKTILAFKEDRVPLEGIISKRKDTVELRVVLIRLIDRGRIAEWLDKDVKAIDSSDDVTEDLIVDSKPMVHPMRVGGTIRSDYVRNQTIIHIYEKEWDDLSRVIEEKQEMKEVPFMWIQSGKNMEDEILP,17.3175
94
+ MFFYSLIRTAPGTLPLRQSLIIFVSGGGGDGSVAEAGTSLGAPAAEVFHVTIAARFSHELFATILLAYCVASGDTLSKVVSDRAHLVQLVTHHVGLARLRMLVTVIHFTALCPFGGAILFTAPLDINTRQPDPDPVALWWYYIAPVTGQMTREFGGTIINPNIANSYHLVYFKVLFRHFVAEYVGWLHGPGMHPTDVLDIKAALKKSPTHGPEIPHYYSPPRVPRAIPPIFSVFNEIGDARYTTIYDGSVMGLLEKARTYDMEEVYTPRQVGYIFVHSKGHNVFRLVAELESAIGDLFTAYFDSLTSEDGKQQNMISAYLKGLVASHGCGLASAFSFGEQEKWRNAFNYLLWGRYQIESWRTVEAIGPDLLSFWRKRFNELKQAGVWITTSPTCWEAGSVKDNGLFIINSMKYALGRDAVWSANMPRVNKHITIEVKGAAEHQQIVDALIALVKDYDNLGFYSAEERADHRFFVAMVKEKGSAGSSKIDTARVWDVHLIRSRYFYYDASAWYHSAQRMTDPIVNRGYNIGLFAAIVAAGMLLLVLRVDRRKITCPFRISCPDERFSLKSHEIPLDGNLRVYGELKSELDHEDPFGDLTVFRGTDTELSSGGFPLHWEFVKEPEIGMLETLIQAVVGVYFTTSLYPGYADEPGRTEMGLYNMGPFGWWLVKYSDR,18.1742
95
+ SNTARSTQMVGTGIDINSTQMYPYNIMLTGFEVLIRLTPSAIENWQIRGEEALDSFFTSLSNAIGNACVTVFLMRILLAVYTTKSSAESDRAIGYATAGLPNNITAIVAQINAVVATAVVNSMNACLDLAPILYWETLQKIDNISNYYPPFDRDCLKARAMTYQPQEVRMDMPITVACQSGRLMNTAVRKETVIYAIIKEEPKNNFYLLTDPVYQRADTVVQAQYGHEPEFDTEDNLYPNKYGWIQYHEEYYEPIWWRWKIRSYFRTTQLETKTSLLARDSWEPFYASPFSRHIPISITDRPGMDHFMDDLYQSTPSFLTNAICCHNTNGHFPTELLGTIDTLRSALGGLDLHQSSHKSHLLLLRSTIRDLCEASGSGMTTQFTYLLLGNVYIARSDNLKHAFDANAKKGFKLRVIKGAIPPHVKMQVVIGASAGRILLMKTSKLKFVFSDGNLQRPLSEYVELAGDDISEAVFHAGKDTFEFEYEVTDDQFFFHFRAELIEPWKRENLYDNSLYLFRIGDKKFVRTLFTTLSCNKSVMLYFQKFAKLKITASKKKGISFTPDRPACGISVVPHLDKQFVLQVVLQTLMKITWKPCKKNRFQSRFVDHGGFFDFVTYSEIYLKLFAGENVVFTRMSWYAKLSTPHDFQPRSLVGVSTMGIFDEADGKYHLIGTGNFGFKIWRFLYVLDSVFSIEGMFAATITEYILWSGIVRYFRTFFTLEAGIPSHSSGTEGVYVCFKELIFEWPKDTPSVQISLAESTDPSAGIWIREIENRNQFNKVSMLVKTAVDVAQLVFTLEAFAPFEQSLNVIFDNEVDVSLTKALGPTASNTYESSQLALGNRLVLSEAGDVTALDRMVTTITCNTLCFFRHYNGITVVNCIEKAAAVVCIHITNPMPGFVQTQLGIGGLGPYICKSCSLAELQCEDRKIRFYSQIPGSAGDFFEDIAWQKRLEELKNLPK,18.5522
96
+ LVKVETEVKVYVRPTKPLPYTVETAYGGSPEQQFYNIRKLEPGLFADMGGFFTPPMSTASLGSTYQIFRQIYDGTLLWKNNMSDTDPNNVYRALQMKDTVSSLMFVLVLPGSDVYIKLGLVHIEETTKIDGTPDDAQPSSTDLSPGRFVEEQEVMSEDDELALLEDLRSLKFVCQDVLKRQKRHIFNNASITESLTIAFALNNTDNRLSWLMYLWIFALFGLVLIILVLVAGFDIWWSPTKQYGMIIFNLIGNFSSYKTMSEASMKSGIANVGQAWTTIKTVTAVLNNKALVVVNAGEDALALLKSLPKQTDAMVGHIQMTDRAIGMKPNDLWTFIAPIGYVGKGTRMFIIFPKSILSPNPSIGRTVVVVAGIKDVLQDMIQISKTKTDKVSGVNKGDQVDFRYKVETSKNNSLVAGAITEALRGSSASGIKIGSFVQDPNPLFGDLENNFAYGASAMLFDAFLTRFNKENNLIVLVGNRSALNTDTRRLVQWVDALHFNTQLFIIAMEKNAQVQNALTTANKLGILKPTVNVVDQYFPQGLLISLDINRAIHASLKGLPPKGFVVTVIGEEPDSSQLVAKVKAFGLKLFVYAKSTSDVAKLNDLGFATLETGGSLEFFKMDQFKLEIANEVTKTMRSFALIVVLDDDMQQGTKILTDIHPHHPQFTPGPKEASLEKKLALVLGMAVVYRLRLTTVRLEVVTRIPAVIVVNDIQIFTDMAYTNVSGNLPRLPADKVKLGKYSYHAADADGINYKVTGDGSKLKGSIVAQIMVNDVVVLNTKWPIETSKWKIAVNAEVRLDIGPFSTNIPRTMTGYESYLGNRIVLIGRKNRVFGRSTIVEGLIGFLFDVFLHVWGYVFTWITLAIHYWAGPRITHILSRAGDILEIVMSAMRLEKFNSMTKDWLRLLEIPILAELAERIVEGDKRFGVIDKSGTPYELIKKTVEPENVPTALSKVESL,17.6058
97
+ EFRTALGTLAAFVAIFFISVQFLFRFYPETWLPIYHLAFKRLSVPPAAIVAIASVTIFCIGAIFGLFPGPALALRRLTGNVAFLIGIATAIGIGVTFLIKGISNSNNTSSMSIIIRTVAGAIAVVLLTLPALVVRIHGNFGRAAVGGGAAEGANAAIFQSLTGSNNAFRDALFNFGVKLLFGLAIETREIILLEYIYNLLLTVGYDLNFASRGRLQLNAITGLLVVSAAIVSGYRTVAAERKVFDFALLKARKSVYPALRELRLVPLITFIGVLFVTTK,15.2478
98
+ KGLQFTRKGWNHKGRRHWRDFDTVAGNALLGIEGQTGPRMVETGENVTTEPGNRRTRPTLLATGTEPADAGIEETRIEQDVILPLTTKANGGMIRVHHYDVRKIAGVEIDLESDILEARLTDGEDLHNCKFTTTVKAHIKTEPTPVSAADSEILLKGQYVSSDFEVLDSDLDANVSRDSRMWFEVAYICDILDKQTLLNEGMTFTVTSDGYSSGAADVWVLSTIKTQCRHAQGQQWLYRAGNLKPVVEMEIVYSAARDVTGGSLFGAVNSAAPFTVEMLFFPATIEQLRPGTPRAGKTITNPENATSGGNIEVFEEVKHFSDSQFRNEVRFITDDDSVYTATERRRIPNAPQNGIIRYWMKNGYASWNTEKVYARQPDGDITRQESFENAAQSMSTADNYYNHYYKEALRMHLAGGVEDDLEDDVTQEVRVSKDGEVEIDLDLNYTSKRYREGISWFLGCNAAHGIPINDAGVGFAFAAIPQYA,18.6838
99
+ FSSVATATTTAIAFAIAAGVGGAIGGAVVGSLVIASLRGTVTAASALKAPLVPLALTVGAASLGATIGLAASWGVNLTL,10.8479
100
+ SSITAAIQLYKPDSISILDDDSPDDLFETVEFLTEKQKNKQTSDNSYKLFADSFLSIVDSPNWTNMLLIAARVLLVLYTICPCCRADWVGAIGTDDVSYDIVCDLLGININFFKITKVLTAQYLPGRTKVGYMKHPLKTSYFVSIYVEDISDCARHPYGFSYAWQYVKKPYGTVSVDIYNGNPREKLFCLEGLNWATGLGLVVGAGAYKSLGTSVERVNTLVIFLETGELFVWAYAALWFRKRYTEDSEAKVNLYIAGLIVLFAVEKVSYAVPDIVFKEQILKSMVIFAKFSIINLYLDALFDFVFICIIILLLRVLSKDLEAVVGPTLSVFL,17.2645
101
+ LECYGQQSSLIEMYRDYTIKVRDRYANNERIILDHYLVLNGDFYVRLASNKIVLGPDDANSVVAILQIGDMGLFLANGKNVTEMKRMLEKLEILYFTGSEAAVGSVTGHVCLMITNIWKDNKKLVEMLEFLGTEIIYNSVGLVFMIGKMSDKQGVYAKNKFSDSILEIAVKLQNFTWRNHVLFIGAYLYQWELYAEPEVVINNNISVIRVLWDPDGKSLYIIRPEKPPNIFEYLMHGICTFGGVGAIAGGMGVPASHIGGLIYKADFSISSWCEPGSVNVGALPYGSNCVVVQEGGNVVTFSLPTGSDVPIFALEHFPEPGKWKWEGFYWINPTDYRIMISGLKYTLAANAIAGIGAYLESYNIKISTWQYLVNGNPYDSVGVYNQHEYPLYPSLPMSDFTIFPVLTFAP,18.4071
benchmarks/MLM/config.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PATH = "/workspace/sg666/MDpLM"
2
+ TRAIN_DATA = PATH + "/data/membrane/train.csv"
3
+ TEST_DATA = PATH + "/data/membrane/test.csv"
4
+ VAL_DATA = PATH + "/data/membrane/val.csv"
5
+
6
+ ESM_MODEL_PATH = "facebook/esm2_t30_150M_UR50D"
7
+ MLM_MODEL_PATH = PATH + "/benchmarks/MLM"
8
+ CKPT_DIR = PATH + "/benchmarks/MLM/model_ckpts"
9
+
10
+ ESM_LAYERS = 3
11
+ BATCH_SIZE = 8
12
+ NUM_EPOCHS = 10
13
+ LEARNING_RATE = 5e-3
14
+ MASKING_RATE = 0.40
benchmarks/MLM/data_loader.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import torch
3
+ import config
4
+ import random
5
+ from torch.utils.data import Dataset, DataLoader
6
+ from torch.nn.utils.rnn import pad_sequence
7
+ from pretrained_models import load_esm2_model
8
+
9
+ class ProteinDataset(Dataset):
10
+ def __init__(self, csv_file, tokenizer):
11
+ self.tokenizer = tokenizer
12
+ self.data = pd.read_csv(csv_file)
13
+ self.max_len = max([len(seq) for seq in self.data['Sequence'].tolist()])
14
+
15
+ def __len__(self):
16
+ return len(self.data)
17
+
18
+ def __getitem__(self, idx):
19
+ sequence = self.data.iloc[idx]['Sequence'].upper()
20
+
21
+ # Randomly mask 15% of the sequence
22
+ num_masks = int(len(sequence) * 0.15)
23
+ mask_indices = random.sample(range(len(sequence)), num_masks)
24
+ masked_sequence = ''.join(["<mask>" if i in mask_indices else sequence[i] for i in range(len(sequence))])
25
+
26
+ inputs = self.tokenizer(masked_sequence, padding="max_length", truncation=True, max_length=self.max_len, return_tensors='pt')
27
+ input_ids = inputs['input_ids'].squeeze()
28
+ attention_mask = inputs['attention_mask'].squeeze()
29
+
30
+ labels = self.tokenizer(masked_sequence, return_tensors='pt', padding='max_length', max_length=self.max_len, truncation=True)['input_ids'].squeeze()
31
+ labels = torch.where(input_ids == self.tokenizer.mask_token_id, labels, -100)
32
+
33
+ return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
34
+
35
+
36
+
37
+ def get_dataloaders(config):
38
+ tokenizer, model = load_esm2_model(config.ESM_MODEL_PATH)
39
+
40
+ train_dataset = ProteinDataset(config.TRAIN_DATA, tokenizer)
41
+ val_dataset = ProteinDataset(config.VAL_DATA, tokenizer)
42
+ test_dataset = ProteinDataset(config.TEST_DATA, tokenizer)
43
+
44
+ train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True)
45
+ val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False)
46
+ test_loader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False)
47
+
48
+ return train_loader, val_loader, test_loader
benchmarks/MLM/esm_utils.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModel, AutoModelForMaskedLM
3
+
4
+ def load_esm2_model(model_name):
5
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
6
+ masked_model = AutoModelForMaskedLM.from_pretrained(model_name)
7
+ embedding_model = AutoModel.from_pretrained(model_name)
8
+ return tokenizer, masked_model, embedding_model
9
+
10
+
11
+
12
+ def get_latents(model, tokenizer, sequence):
13
+ inputs = tokenizer(sequence, return_tensors="pt").to(model.device)
14
+ with torch.no_grad():
15
+ outputs = model(**inputs)
16
+ return outputs.last_hidden_state.squeeze(0)
benchmarks/MLM/mlm_generate_utils.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import math
3
+ import config
4
+ import sys
5
+ import pandas as pd
6
+ from esm_utils import get_latents
7
+ from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer
8
+
9
+
10
+ def mask_for_de_novo(sequence_length):
11
+ return "<mask>" * sequence_length
12
+
13
+ def generate_de_novo(sequence_length, tokenizer, model):
14
+ masked_sequence = mask_for_de_novo(sequence_length)
15
+ inputs = tokenizer(masked_sequence, return_tensors='pt').to(model.device)
16
+
17
+ with torch.no_grad():
18
+ logits = model(**inputs).logits
19
+ mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
20
+ logits_at_masks = logits[0, mask_token_indices]
21
+
22
+ pred_tokens = []
23
+ for i in mask_token_indices:
24
+ topk_logits, topk_indices = logits_at_masks[i].topk(k=3, dim=-1)
25
+ probabilities = torch.nn.functional.softmax(topk_logits, dim=-1)
26
+ predicted_index = torch.distributions.categorical.Categorical(probabilities).sample()
27
+ predicted_token_id = topk_indices[predicted_index].item()
28
+ predicted_token = tokenizer.decode([predicted_token_id], skip_special_tokens=True)
29
+ pred_tokens.append(predicted_token)
30
+
31
+ generated_sequence = ''.join(pred_tokens)
32
+ perplexity = calculate_perplexity(model, tokenizer, generated_sequence)
33
+
34
+ return (generated_sequence, perplexity)
35
+
36
+
37
+ def mask_for_scaffold(sequence, generate_type):
38
+ if generate_type == "uppercase":
39
+ sequence = ''.join(["<mask>" if residue.isupper() else residue.upper() for residue in sequence])
40
+ elif generate_type == "lowercase":
41
+ sequence = ''.join(["<mask>" if residue.islower() else residue for residue in sequence])
42
+ return sequence
43
+
44
+
45
+ def generate_scaffold(sequence, generate_type, tokenizer, model):
46
+ masked_sequence = mask_for_scaffold(sequence, generate_type)
47
+ inputs = tokenizer(masked_sequence, return_tensors='pt').to(model.device)
48
+
49
+ with torch.no_grad():
50
+ logits = model(**inputs).logits
51
+ mask_token_indices = (inputs["input_ids"] == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
52
+ logits_at_masks = logits[0, mask_token_indices]
53
+
54
+ pred_tokens = []
55
+ for i in range(len(mask_token_indices)):
56
+ topk_logits, topk_indices = logits_at_masks[i].topk(k=3, dim=-1)
57
+ probabilities = torch.nn.functional.softmax(topk_logits, dim=-1)
58
+ predicted_index = torch.distributions.categorical.Categorical(probabilities).sample()
59
+ predicted_token_id = topk_indices[predicted_index].item()
60
+ predicted_token = tokenizer.decode([predicted_token_id], skip_special_tokens=True)
61
+
62
+ pred_tokens.append('G' if predicted_token == '' else predicted_token)
63
+
64
+ generated_sequence = masked_sequence
65
+ for token in pred_tokens:
66
+ generated_sequence = generated_sequence.replace("<mask>", token, 1)
67
+
68
+ return generated_sequence, mask_token_indices
69
+
70
+
71
+ def calculate_perplexity(model, tokenizer, generated_sequence, mask_token_indices):
72
+ total_loss = 0.0
73
+ tensor_input = tokenizer.encode(generated_sequence, return_tensors='pt').to(model.device)
74
+
75
+ for i in mask_token_indices:
76
+ masked_input = tensor_input.clone()
77
+ masked_input[0, i] = tokenizer.mask_token_id
78
+
79
+ labels = torch.full(tensor_input.shape, -100).to(model.device)
80
+ labels[0, i] = tensor_input[0, i]
81
+
82
+ with torch.no_grad():
83
+ outputs = model(masked_input, labels=labels)
84
+ total_loss += outputs.loss.item()
85
+
86
+ num_mask_tokens = len(mask_token_indices)
87
+ if num_mask_tokens == 0:
88
+ perplexity = 10000
89
+ else:
90
+ avg_loss = total_loss / num_mask_tokens
91
+ perplexity = math.exp(avg_loss)
92
+
93
+ return perplexity
94
+
95
+
96
+ def calculate_cosine_sim(original_sequence, generated_sequence, tokenizer, esm_model, device):
97
+ og_embeddings = get_latents(esm_model, tokenizer, original_sequence.upper()).to(device)
98
+ new_embeddings = get_latents(esm_model, tokenizer, generated_sequence).to(device)
99
+
100
+ sequence_similarity = torch.nn.functional.cosine_similarity(og_embeddings, new_embeddings, dim=-1)
101
+ cosine_similarity = torch.mean(sequence_similarity).item()
102
+ return cosine_similarity
103
+
104
+
105
+ def calculate_hamming_dist(original_sequence, generated_sequence):
106
+ generated_sequence = generated_sequence.upper()
107
+ original_sequence = original_sequence.upper()
108
+ return sum(1 if original_sequence[i] != generated_sequence[i] else 0 for i in range(len(original_sequence)))
benchmarks/MLM/mlm_lowercase_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
benchmarks/MLM/mlm_motif_benchmarking.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import config
3
+ import sys
4
+ import pandas as pd
5
+ from mlm_generate_utils import generate_scaffold, calculate_perplexity, calculate_cosine_sim, calculate_hamming_dist
6
+ from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer
7
+
8
+ def motif_benchmarking():
9
+ path = "/workspace/sg666/MDpLM"
10
+
11
+ test_sequences = pd.read_csv(path + "/data/membrane/test.csv")['Sequence'].tolist()
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(config.CKPT_DIR + "/best_model_epoch")
14
+ mlm_model = AutoModelForMaskedLM.from_pretrained(config.CKPT_DIR + "/best_model_epoch")
15
+ esm_model = AutoModel.from_pretrained("facebook/esm2_t36_3B_UR50D")
16
+
17
+ device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
18
+ mlm_model.to(device)
19
+ esm_model.to(device)
20
+
21
+ for generate_case in ['uppercase', 'lowercase']:
22
+ case_results = []
23
+ for original_sequence in test_sequences:
24
+ generated_sequence, mask_token_idx = generate_scaffold(original_sequence, generate_case, tokenizer, mlm_model)
25
+ perplexity = calculate_perplexity(mlm_model, tokenizer, generated_sequence, mask_token_idx)
26
+ cos_sim = calculate_cosine_sim(original_sequence, generated_sequence, tokenizer, esm_model, device)
27
+ hamming_distance = calculate_hamming_dist(original_sequence, generated_sequence)
28
+
29
+ case_results.append([original_sequence, generated_sequence, perplexity, cos_sim, hamming_distance])
30
+
31
+ print(case_results)
32
+ sys.stdout.flush()
33
+
34
+ df = pd.DataFrame(case_results, columns=['Original Sequence', 'Generated Sequence', 'Perplexity', 'Cosine Similarity', 'Hamming Distance'])
35
+ df.to_csv(path + f'/benchmarks/MLM/mlm_{generate_case}_results.csv', index=False)
36
+
37
+
38
+ if __name__ == "__main__":
39
+ motif_benchmarking()
benchmarks/MLM/mlm_uppercase_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
benchmarks/MLM/model.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import config
2
+ import torch
3
+ import torch.nn as nn
4
+ from pretrained_models import load_esm2_model
5
+ from transformers import AutoModelForMaskedLM, AutoTokenizer, AutoModel
6
+
7
+ class MembraneTokenizer:
8
+ def __init__(self, esm_model_path=config.ESM_MODEL_PATH):
9
+ self.tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
10
+
11
+ def __getattr__(self, name):
12
+ return getattr(self.tokenizer, name)
13
+
14
+ def __call__(self, *args, **kwargs):
15
+ return self.tokenizer(*args, **kwargs)
16
+
17
+ def save_tokenizer(self, save_dir):
18
+ self.tokenizer.save_pretrained(save_dir)
19
+
20
+ def load_tokenizer(self, load_dir):
21
+ self.tokenizer.save_pretrained(load_dir)
22
+
23
+ class MembraneMLM:
24
+ def __init__(self, esm_model_path=config.ESM_MODEL_PATH):
25
+ self.model = AutoModelForMaskedLM.from_pretrained(esm_model_path)
26
+ self.tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
27
+
28
+ def __getattr__(self, name):
29
+ return getattr(self.model, name)
30
+
31
+ def __call__(self, *args, **kwargs):
32
+ return self.model(*args, **kwargs)
33
+
34
+ def freeze_model(self):
35
+ # Disable parameter updates for all layers
36
+ for param in self.model.parameters():
37
+ param.requires_grad = False
38
+
39
+ def unfreeze_n_layers(self):
40
+ # Count number of encoder layers
41
+ model_layers = len(self.model.esm.encoder.layer)
42
+
43
+ # Enable parameter updates for the last 3 encoder layers
44
+ for i, layer in enumerate(self.model.esm.encoder.layer):
45
+ if i >= model_layers-config.ESM_LAYERS:
46
+ for module in layer.attention.self.key.modules():
47
+ for param in module.parameters():
48
+ param.requires_grad = True
49
+ for module in layer.attention.self.query.modules():
50
+ for param in module.parameters():
51
+ param.requires_grad = True
52
+ for module in layer.attention.self.value.modules():
53
+ for param in module.parameters():
54
+ param.requires_grad = True
55
+
56
+ def forward(self, **inputs):
57
+ return self.model(**inputs)
58
+
59
+ def save_model(self, save_dir):
60
+ self.model.save_pretrained(save_dir)
61
+ self.tokenizer.save_pretrained(save_dir)
62
+
63
+ def load_model(self, load_dir):
64
+ self.model = AutoModel.from_pretrained(load_dir)
65
+ self.tokenizer = AutoTokenizer.from_pretrained(load_dir)
benchmarks/MLM/pretrained_models.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModel, EsmForMaskedLM, AutoModelForMaskedLM
3
+
4
+ def load_esm2_model(esm_model_path):
5
+ tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
6
+ model = AutoModelForMaskedLM.from_pretrained(esm_model_path)
7
+ return tokenizer, model
8
+
9
+ def load_mlm_model(esm_model_path, ckpt_path):
10
+ tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
11
+ model = AutoModelForMaskedLM.from_pretrained(ckpt_path)
12
+ return tokenizer, model
benchmarks/MLM/screen_mlm_cosine_hamming.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ path = "/home/sg666/MDpLM/benchmarks/MLM"
4
+
5
+ df = pd.read_csv(path + "/mlm_uppercase_results.csv")
6
+
7
+ all_sequences = df['Original Sequence'].tolist()
8
+ seq_len_sum = sum(len(seq) for seq in all_sequences)
9
+ ppls = [ppl for ppl in df['Perplexity'].tolist() if ppl != 10000]
10
+
11
+ ppl_mean = sum(ppls) / len(ppls)
12
+ cos_mean = df.loc[:, 'Cosine Similarity'].mean()
13
+ hamming_mean = sum(dist for dist in df['Hamming Distance'].tolist()) / seq_len_sum
14
+
15
+ print(ppl_mean)
16
+ print(cos_mean)
17
+ print(hamming_mean)
benchmarks/MLM/train_and_test.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import config
3
+ import math
4
+ import sys
5
+ import os
6
+ from tqdm import tqdm
7
+ from torch.optim import Adam
8
+ from torch.optim.lr_scheduler import CosineAnnealingLR
9
+ from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer, AutoConfig
10
+ from pretrained_models import load_esm2_model
11
+ from model import MembraneMLM, MembraneTokenizer
12
+ from data_loader import get_dataloaders
13
+
14
+ def save_hyperparams(ckpt_dir):
15
+ hyperparms_txt_file = os.path.join(ckpt_dir, "hyperparameters.txt")
16
+ with open(hyperparms_txt_file, 'w') as f:
17
+ for k, v in vars(config).items():
18
+ if k.isupper():
19
+ f.write(f"{k}: {v}\n")
20
+
21
+ def train_and_validate(model, optimizer, device, train_loader, val_loader, num_epochs, ckpt_dir):
22
+ best_val_loss = float('inf')
23
+
24
+ for epoch in range(num_epochs):
25
+ print(f"EPOCH {epoch+1}/{num_epochs}")
26
+ sys.stderr.flush()
27
+ total_train_loss = 0.0
28
+ weighted_total_train_loss = 0.0
29
+ total_masked_train_tokens = 0
30
+
31
+ model.train()
32
+ train_update_interval = len(train_loader) // 4
33
+
34
+ with tqdm(enumerate(train_loader), desc="Training batch", total=len(train_loader), leave=True, position=0, ncols=100) as trainbar:
35
+ for step, inputs in trainbar:
36
+ inputs = {k: v.to(device) for k, v in inputs.items()}
37
+ optimizer.zero_grad()
38
+ outputs = model(**inputs)
39
+ train_loss = outputs.loss
40
+ train_loss.backward()
41
+ optimizer.step()
42
+
43
+ num_mask_tokens = (inputs["input_ids"] == tokenizer.mask_token_id).sum().item()
44
+ total_masked_train_tokens += num_mask_tokens
45
+
46
+ total_train_loss += train_loss.item()
47
+ weighted_total_train_loss += train_loss.item() * num_mask_tokens
48
+
49
+ if (step+1) % train_update_interval == 0:
50
+ trainbar.update(train_update_interval)
51
+
52
+ avg_train_loss = total_train_loss / len(train_loader)
53
+ avg_train_neg_log_likelihood = weighted_total_train_loss / total_masked_train_tokens
54
+ train_perplexity = math.exp(avg_train_neg_log_likelihood)
55
+
56
+ # Save model every epoch
57
+ train_ckpt_path = os.path.join(config.CKPT_DIR, f'epoch{epoch+1}')
58
+ model.save_model(train_ckpt_path)
59
+ save_hyperparams(train_ckpt_path)
60
+
61
+ # Validate model
62
+ if val_loader:
63
+ model.eval()
64
+ total_val_loss = 0.0
65
+ weighted_total_val_loss = 0.0
66
+ total_masked_val_tokens = 0.0
67
+
68
+ with torch.no_grad():
69
+ val_update_interval = len(val_loader) // 4
70
+
71
+ with tqdm(enumerate(val_loader), desc='Validiation batch', total=len(val_loader), leave=True, position=0) as valbar:
72
+ for step, inputs in valbar:
73
+ inputs = {k: v.to(device) for k, v in inputs.items()}
74
+ val_loss = model(**inputs).loss.item()
75
+
76
+ num_mask_tokens = (inputs['input_ids'] == tokenizer.mask_token_id).sum().item()
77
+ total_masked_val_tokens += num_mask_tokens
78
+
79
+ total_val_loss += val_loss
80
+ weighted_total_val_loss += val_loss * num_mask_tokens
81
+
82
+ if (step+1) % val_update_interval == 0:
83
+ valbar.update(val_update_interval)
84
+
85
+ avg_val_loss = total_val_loss / len(val_loader)
86
+ avg_val_neg_log_likelihood = weighted_total_val_loss / total_masked_val_tokens
87
+ val_perplexity = math.exp(avg_val_neg_log_likelihood)
88
+
89
+ # Save the best model based on validation loss
90
+ if avg_val_loss < best_val_loss:
91
+ best_val_loss = avg_val_loss
92
+ val_ckpt_path = os.path.join(config.CKPT_DIR, "best_model_epoch")
93
+ model.save_model(val_ckpt_path)
94
+ save_hyperparams(val_ckpt_path)
95
+
96
+
97
+ print(f"Average train loss: {avg_train_loss}")
98
+ print(f"Average train perplexity: {train_perplexity}\n")
99
+ sys.stdout.flush()
100
+
101
+ print(f"Average validation loss: {avg_val_loss}")
102
+ print(f"Average validation perplexity: {val_perplexity}\n")
103
+ sys.stdout.flush()
104
+
105
+
106
+ return avg_train_loss, train_perplexity, avg_val_loss, val_perplexity
107
+
108
+
109
+ def test(model, test_loader, device):
110
+ model.to(device).eval()
111
+ total_test_loss = 0.0
112
+ weighted_total_test_loss = 0.0
113
+ total_masked_test_tokens = 0.0
114
+
115
+ with torch.no_grad():
116
+ for step, inputs in enumerate(test_loader):
117
+ inputs = {k: v.to(device) for k, v in inputs.items()}
118
+ outputs = model(**inputs)
119
+ test_loss = outputs.loss.item()
120
+
121
+ num_mask_tokens = (inputs["input_ids"] == tokenizer.mask_token_id).sum().item()
122
+ total_masked_test_tokens += num_mask_tokens
123
+
124
+ total_test_loss += test_loss
125
+ weighted_total_test_loss += test_loss * num_mask_tokens
126
+
127
+ avg_test_loss = total_test_loss / len(test_loader)
128
+ avg_test_neg_log_likilehood = weighted_total_test_loss / total_masked_test_tokens
129
+ test_perplexity = math.exp(avg_test_neg_log_likilehood)
130
+
131
+ return avg_test_loss, test_perplexity
132
+
133
+
134
+ if __name__ == "__main__":
135
+ device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
136
+ print(device)
137
+
138
+ model = MembraneMLM()
139
+ model.to(device)
140
+ model.freeze_model()
141
+ model.unfreeze_n_layers()
142
+ tokenizer = model.tokenizer
143
+
144
+ train_loader, val_loader, test_loader = get_dataloaders(config)
145
+ optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=config.LEARNING_RATE)
146
+
147
+ # Train and test the model
148
+ avg_train_loss, train_ppl, avg_val_loss, val_ppl = train_and_validate(model, optimizer, device, train_loader, val_loader, config.NUM_EPOCHS, config.CKPT_DIR)
149
+ avg_test_loss, test_ppl = test(model, test_loader, device)
150
+
151
+ results_dict = {"Average train loss": avg_train_loss,
152
+ "Average train perplexity": train_ppl,
153
+ "Average val loss": avg_val_loss,
154
+ "Average val perplexity": val_ppl,
155
+ "Average test loss": avg_test_loss,
156
+ "Average test perplexity": test_ppl,
157
+ }
158
+
159
+ print("TRAIN AND TEST RESULTS")
160
+ for k, v in results_dict.items():
161
+ print(f"{k}: {v}\n")
162
+
163
+ # Save training and test performance
164
+ with open(config.CKPT_DIR + "/train_test_results.txt", 'w') as f:
165
+ for k, v in results_dict.items():
166
+ f.write(f'{k}: {v}\n')
167
+
168
+
169
+ ### Get embeddings from model
170
+ # best_model_pth = config.MLM_MODEL_PATH + "/best_model"
171
+
172
+ # model = AutoModel.from_pretrained(best_model_pth)
173
+ # tokenizer = AutoTokenizer.from_pretrained(best_model_pth)
174
+ # model.eval().to(device)
175
+
176
+ # random_seq = "WPIQMVYSLGQHADYMQWFTIMPPPIEMIFVWHNCTQHDYSFRERAGEVDQARMKTEMAR"
177
+ # inputs = tokenizer(random_seq, return_tensors='pt')
178
+ # inputs = {k: v.to(device) for k, v in inputs.items()}
179
+ # inputs = inputs['input_ids']
180
+ # print(inputs)
181
+ # with torch.no_grad():
182
+ # outputs = model(inputs).last_hidden_state
183
+ # print(outputs)
184
+ # print(outputs.size())
benchmarks/Supervised/.DS_Store ADDED
Binary file (6.15 kB). View file
 
benchmarks/Supervised/Localization/cell_localization_predictor.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from transformers import AutoModel, AutoTokenizer
6
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
7
+
8
+ from tqdm import tqdm
9
+ from datetime import datetime
10
+ import pandas as pd
11
+ import numpy as np
12
+ import pickle
13
+ import os
14
+
15
+ # Hyperparameters dictionary
16
+ path = "/workspace/sg666/MDpLM"
17
+
18
+ hyperparams = {
19
+ "batch_size": 1,
20
+ "learning_rate": 5e-4,
21
+ "num_epochs": 5,
22
+ "esm_model_path": "facebook/esm2_t33_650M_UR50D",
23
+ 'mlm_model_path': path + "/benchmarks/MLM/model_ckpts/best_model_epoch",
24
+ "mdlm_model_path": path + "/checkpoints/membrane_automodel/epochs30_lr3e-4_bsz16_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params",
25
+ "train_data": path + "/benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv",
26
+ "test_data" : path + "/benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_test.csv",
27
+ }
28
+
29
+ # Helper functions to obtain all embeddings for a sequence
30
+ def load_models(esm_model_path, mlm_model_path, mdlm_model_path):
31
+ esm_tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
32
+ esm_model = AutoModel.from_pretrained(esm_model_path).to(device)
33
+ mlm_model = AutoModel.from_pretrained(mlm_model_path).to(device)
34
+ mdlm_model = AutoModel.from_pretrained(mdlm_model_path).to(device)
35
+
36
+ return esm_tokenizer, esm_model, mlm_model, mdlm_model
37
+
38
+ def get_latents(embedding_type, tokenizer, esm_model, mlm_model, mdlm_model, sequence, device):
39
+ if embedding_type == "esm":
40
+ inputs = tokenizer(sequence, return_tensors='pt').to(device)
41
+ with torch.no_grad():
42
+ embeddings = esm_model(**inputs).last_hidden_state.squeeze(0)
43
+
44
+ elif embedding_type == "mlm":
45
+ inputs = tokenizer(sequence, return_tensors='pt')['input_ids'].to(device)
46
+ with torch.no_grad():
47
+ embeddings = mlm_model(inputs).last_hidden_state.squeeze(0)
48
+
49
+ elif embedding_type == "mdlm":
50
+ inputs = tokenizer(sequence, return_tensors='pt')['input_ids'].to(device)
51
+ with torch.no_grad():
52
+ embeddings = mdlm_model(inputs).last_hidden_state.squeeze(0)
53
+
54
+ return embeddings
55
+
56
+
57
+ # Dataset class can load pickle file
58
+ class LocalizationDataset(Dataset):
59
+ def __init__(self, embedding_type, csv_file, esm_model_path, mlm_model_path, mdlm_model_path, device):
60
+ self.data = pd.read_csv(csv_file)
61
+ self.data = self.data[self.data['Sequence'].apply(len) < 1024].reset_index(drop=True)
62
+ self.embedding_type = embedding_type
63
+ self.tokenizer, self.esm_model, self.mlm_model, self.mdlm_model = load_models(esm_model_path, mlm_model_path, mdlm_model_path)
64
+ self.device = device
65
+
66
+ def __len__(self):
67
+ return len(self.data)
68
+
69
+ def __getitem__(self, idx):
70
+ sequence = self.data.iloc[idx]['Sequence']
71
+ embeddings = get_latents(self.embedding_type, self.tokenizer, self.mlm_model, self.esm_model, self.mdlm_model,
72
+ sequence, self.device)
73
+
74
+ label = 0 if self.data.iloc[idx]['Cell membrane'] == 0 else 1
75
+ labels = torch.tensor(label, dtype=torch.float32).view(1,1).squeeze(-1)
76
+
77
+ return embeddings, labels
78
+
79
+ # Predict localization with MLP head using pooled embeddings
80
+ class LocalizationPredictor(nn.Module):
81
+ def __init__(self, input_dim):
82
+ super(LocalizationPredictor, self).__init__()
83
+ self.classifier = nn.Sequential(
84
+ nn.Linear(input_dim, 640),
85
+ nn.ReLU(),
86
+ nn.Linear(640, 1)
87
+ )
88
+
89
+ def forward(self, embeddings):
90
+ logits = self.classifier(embeddings)
91
+ logits = torch.mean(logits, dim=1)
92
+ probs = torch.nn.functional.softmax(logits)
93
+ return probs
94
+
95
+ # Training function
96
+ def train(model, dataloader, optimizer, criterion, device):
97
+ model.train()
98
+ total_loss = 0
99
+ for embeddings, labels in tqdm(dataloader):
100
+ embeddings, labels = embeddings.to(device), labels.to(device)
101
+ optimizer.zero_grad()
102
+ outputs = model(embeddings)
103
+ loss = criterion(outputs, labels)
104
+ loss.backward()
105
+ optimizer.step()
106
+ total_loss += loss.item()
107
+ return total_loss / len(dataloader)
108
+
109
+ # Evaluation function
110
+ def evaluate(model, dataloader, device):
111
+ model.eval()
112
+ preds, true_labels = [], []
113
+ with torch.no_grad():
114
+ for embeddings, labels in tqdm(dataloader):
115
+ embeddings, labels = embeddings.to(device), labels.to(device)
116
+ outputs = model(embeddings)
117
+ preds.append(outputs.cpu().numpy())
118
+ true_labels.append(labels.cpu().numpy())
119
+ return preds, true_labels
120
+
121
+ # Metrics calculation
122
+ def calculate_metrics(preds, labels, threshold=0.5):
123
+ all_metrics = []
124
+ for pred, label in zip(preds, labels):
125
+ pred = (pred > threshold).astype(int)
126
+
127
+ accuracy = accuracy_score(label, pred)
128
+ precision = precision_score(label, pred, average='macro')
129
+ recall = recall_score(label, pred, average='macro')
130
+ f1_macro = f1_score(label, pred, average='macro')
131
+ f1_micro = f1_score(label, pred, average='micro')
132
+
133
+ all_metrics.append([accuracy, precision, recall, f1_macro, f1_micro])
134
+
135
+ avg_metrics = np.mean(all_metrics, axis=0)
136
+ print(avg_metrics)
137
+ return avg_metrics
138
+
139
+
140
+
141
+ if __name__ == "__main__":
142
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
143
+
144
+ for embedding_type in ['mdlm', 'esm', 'mlm']:
145
+ # Initialize datasets
146
+ train_dataset = LocalizationDataset(embedding_type,
147
+ hyperparams['train_data'],
148
+ hyperparams['esm_model_path'],
149
+ hyperparams['mlm_model_path'],
150
+ hyperparams['mdlm_model_path'],
151
+ device)
152
+ test_dataset = LocalizationDataset(embedding_type,
153
+ hyperparams['test_data'],
154
+ hyperparams['esm_model_path'],
155
+ hyperparams['mlm_model_path'],
156
+ hyperparams['mdlm_model_path'],
157
+ device)
158
+
159
+ # Prepare dataloaders
160
+ train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
161
+ test_dataloader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
162
+
163
+ # Initialize model, optimizer, and loss function
164
+ input_dim=640 if embedding_type=="mdlm" else 1280
165
+ model = LocalizationPredictor(input_dim=input_dim).to(device)
166
+ optimizer = optim.Adam(model.parameters(), lr=hyperparams["learning_rate"])
167
+ criterion = nn.BCELoss()
168
+
169
+ # Initialize main directory model checkpoints
170
+ base_checkpoint_dir = f"{path}/benchmarks/Supervised/Localization/model_checkpoints/{embedding_type}"
171
+ # Initialize subdirectory and name it based on hyperparameters
172
+ hyperparam_str = f"batch_{hyperparams['batch_size']}_lr_{hyperparams['learning_rate']}_epochs_{hyperparams['num_epochs']}"
173
+ model_checkpoint_dir = os.path.join(base_checkpoint_dir, hyperparam_str)
174
+ os.makedirs(model_checkpoint_dir, exist_ok=True)
175
+
176
+
177
+ # Training loop
178
+ for epoch in range(hyperparams["num_epochs"]):
179
+ # Train the model
180
+ train_loss = train(model, train_dataloader, optimizer, criterion, device)
181
+ print(f"EPOCH {epoch+1}/{hyperparams['num_epochs']}")
182
+ print(f"TRAIN LOSS: {train_loss:.4f}")
183
+ print("\n")
184
+
185
+ # Save the model checkpoint for the current epoch
186
+ checkpoint_path = os.path.join(model_checkpoint_dir, f"epoch{epoch + 1}.pth")
187
+ torch.save({
188
+ 'epoch': epoch + 1,
189
+ 'model_state_dict': model.state_dict(),
190
+ 'optimizer_state_dict': optimizer.state_dict(),
191
+ 'loss': train_loss,
192
+ }, checkpoint_path)
193
+ print(f"Checkpoint saved at {checkpoint_path}\n")
194
+
195
+ # Save hyperparameters only once
196
+ if epoch == 0: # Hyperparameters don't change midway through training
197
+ hyperparams_file = os.path.join(model_checkpoint_dir, "hyperparams.txt")
198
+ with open(hyperparams_file, 'w') as f:
199
+ for key, value in hyperparams.items():
200
+ f.write(f"{key}: {value}\n")
201
+ print(f"Hyperparameters saved at {hyperparams_file}\n")
202
+
203
+ # Evaluate model on test dataset
204
+ print("Test set")
205
+ test_preds, test_labels = evaluate(model, test_dataloader, device)
206
+ test_metrics = calculate_metrics(test_preds, test_labels)
207
+ print(test_metrics)
208
+ print("TEST METRICS:")
209
+ print(f"Accuracy: {test_metrics[0]:.4f}")
210
+ print(f"Precision: {test_metrics[1]:.4f}")
211
+ print(f"Recall: {test_metrics[2]:.4f}")
212
+ print(f"F1 Macro Score: {test_metrics[3]:.4f}")
213
+ print(f"F1 Micro Score: {test_metrics[4]:.4f}")
214
+
215
+ #Save test results
216
+ test_results_file = os.path.join(model_checkpoint_dir, "test_results.txt")
217
+ with open(test_results_file, 'w') as f:
218
+ f.write("TEST METRICS:\n")
219
+ f.write(f"Accuracy: {test_metrics[0]:.4f}\n")
220
+ f.write(f"Precision: {test_metrics[1]:.4f}\n")
221
+ f.write(f"Recall: {test_metrics[2]:.4f}\n")
222
+ f.write(f"F1 Macro Score: {test_metrics[3]:.4f}\n")
223
+ f.write(f"F1 Micro: {test_metrics[4]:.4f}\n")
224
+ print(f"Test results saved at {test_results_file}\n")
benchmarks/Supervised/Localization/process_cell_local_data.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ path = "/home/sg666/MDpLM/benchmarks/Supervised/Localization"
4
+
5
+ train_val = pd.read_csv(path + "/deeploc2.0_train_val.csv")
6
+ test = pd.read_csv(path + "/deeploc2.0_test.csv")
7
+
8
+ train_val = train_val[train_val['Sequence'].apply(len) < 1024].reset_index(drop=True)
9
+ test = test[test['Sequence'].apply(len) < 1024].reset_index(drop=True)
10
+
11
+ train_val.to_csv(path + "/true_deeploc2.0_cell-local_train-val.csv", index=False)
12
+ test.to_csv(path + "/true_deeploc2.0_cell-local_test.csv", index=False)
benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_test.csv ADDED
The diff for this file is too large to render. See raw diff
 
memdlm_schematic.png → benchmarks/Supervised/Localization/true_deeploc2.0_cell-local_train-val.csv RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e27d6e53b463f6265e4ea6cc6c156d2d6bb11b69284a5988f29648ece581cb19
3
- size 228019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca38d78cc8fbc8777a23f456477901f5af4bbfda7a0908081effd09adbe7e94
3
+ size 12568908
benchmarks/Supervised/Membrane Type/membrane_type_predictor.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from transformers import AutoModel, AutoTokenizer
6
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
7
+
8
+ from tqdm import tqdm
9
+ from datetime import datetime
10
+ import pandas as pd
11
+ import numpy as np
12
+ import pickle
13
+ import os
14
+
15
+ # Hyperparameters dictionary
16
+ path = "/workspace/sg666/MDpLM"
17
+
18
+ hyperparams = {
19
+ "batch_size": 1,
20
+ "learning_rate": 5e-4,
21
+ "num_epochs": 5,
22
+ "esm_model_path": "facebook/esm2_t33_650M_UR50D",
23
+ 'mlm_model_path': path + "/benchmarks/MLM/model_ckpts/best_model_epoch",
24
+ "mdlm_model_path": path + "/checkpoints/membrane_automodel/epochs30_lr3e-4_bsz16_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params",
25
+ "train_data": path + "/benchmarks/Supervised/Membrane Type/membrane_type_train.csv",
26
+ "test_data" : path + "/benchmarks/Supervised/Membrane Type/membrane_type_test.csv",
27
+ }
28
+
29
+ # Helper functions to obtain all embeddings for a sequence
30
+ def load_models(esm_model_path, mlm_model_path, mdlm_model_path):
31
+ esm_tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
32
+ esm_model = AutoModel.from_pretrained(esm_model_path).to(device)
33
+ mlm_model = AutoModel.from_pretrained(mlm_model_path).to(device)
34
+ mdlm_model = AutoModel.from_pretrained(mdlm_model_path).to(device)
35
+ return esm_tokenizer, esm_model, mlm_model, mdlm_model
36
+
37
+ def get_latents(embedding_type, tokenizer, esm_model, mlm_model, mdlm_model, sequence, device):
38
+ if embedding_type == "esm":
39
+ inputs = tokenizer(sequence, return_tensors='pt').to(device)
40
+ with torch.no_grad():
41
+ outputs = esm_model(**inputs)
42
+ embeddings = outputs.last_hidden_state.squeeze(0)
43
+
44
+ elif embedding_type == "mlm":
45
+ inputs = tokenizer(sequence, return_tensors='pt').to(device)
46
+ with torch.no_grad():
47
+ embeddings = mlm_model(**inputs).last_hidden_state.squeeze(0)
48
+
49
+ elif embedding_type == "mdlm":
50
+ inputs = tokenizer(sequence, return_tensors="pt").to(device)
51
+ with torch.no_grad():
52
+ embeddings = mdlm_model(**inputs).last_hidden_state.squeeze(0)
53
+
54
+ return embeddings
55
+
56
+
57
+ # Dataset class can load pickle file
58
+ class MembraneDataset(Dataset):
59
+ def __init__(self, embedding_type, csv_file, esm_model_path, mlm_model_path, mdlm_model_path, device):
60
+ self.data = pd.read_csv(csv_file)
61
+ self.data = self.data[self.data['Sequence'].apply(len) < 1024].reset_index(drop=True)
62
+
63
+ self.embedding_type = embedding_type
64
+ self.device = device
65
+
66
+ self.tokenizer, self.esm_model, self.mlm_model, self.mdlm_model = load_models(esm_model_path, mlm_model_path, mdlm_model_path)
67
+
68
+ # Create multi-class label list
69
+ self.data['label'] = self.data.iloc[:, 3:7].values.tolist()
70
+ self.data['label'] = self.data['label']
71
+
72
+ def __len__(self):
73
+ return len(self.data)
74
+
75
+ def __getitem__(self, idx):
76
+ sequence = self.data.iloc[idx]['Sequence']
77
+ embeddings = get_latents(self.embedding_type, self.tokenizer, self.esm_model, self.mlm_model, self.mdlm_model,
78
+ sequence, self.device)
79
+ labels = torch.tensor(self.data.iloc[idx]['label'], dtype=torch.float32)
80
+
81
+ return embeddings, labels
82
+
83
+
84
+ # Predict localization with MLP head using pooled embeddings
85
+ class MembranePredictor(nn.Module):
86
+ def __init__(self, input_dim, num_classes):
87
+ super(MembranePredictor, self).__init__()
88
+ self.classifier = nn.Sequential(
89
+ nn.Linear(input_dim, 640),
90
+ nn.ReLU(),
91
+ nn.Linear(640, num_classes)
92
+ )
93
+
94
+ def forward(self, embeddings):
95
+ logits = self.classifier(embeddings)
96
+ logits = torch.mean(logits, dim=1)
97
+ probs = torch.sigmoid(logits)
98
+ return probs # pass logits of dimension 1x8 (8-class distribution) to CE loss
99
+
100
+ # Training function
101
+ def train(model, dataloader, optimizer, criterion, device):
102
+ model.train()
103
+ total_loss = 0
104
+ for embeddings, labels in tqdm(dataloader):
105
+ embeddings, labels = embeddings.to(device), labels.to(device)
106
+ optimizer.zero_grad()
107
+ outputs = model(embeddings)
108
+ loss = criterion(outputs, labels)
109
+ loss.backward()
110
+ optimizer.step()
111
+ total_loss += loss.item()
112
+ return total_loss / len(dataloader)
113
+
114
+ # Evaluation function
115
+ def evaluate(model, dataloader, device):
116
+ model.eval()
117
+ preds, true_labels = [], []
118
+ with torch.no_grad():
119
+ for embeddings, labels in tqdm(dataloader):
120
+ embeddings, labels = embeddings.to(device), labels.to(device)
121
+ outputs = model(embeddings)
122
+ preds.append(outputs.cpu().numpy())
123
+ true_labels.append(labels.cpu().numpy())
124
+ return preds, true_labels
125
+
126
+ # Metrics calculation
127
+ def calculate_metrics(preds, labels, threshold=0.5):
128
+ all_metrics = []
129
+ for pred, label in zip(preds, labels):
130
+ pred = (pred > threshold).astype(int)
131
+
132
+ accuracy = accuracy_score(label, pred)
133
+ precision = precision_score(label, pred, average='macro')
134
+ recall = recall_score(label, pred, average='macro')
135
+ f1_macro = f1_score(label, pred, average='macro')
136
+ f1_micro = f1_score(label, pred, average='micro')
137
+
138
+ all_metrics.append([accuracy, precision, recall, f1_macro, f1_micro])
139
+
140
+ avg_metrics = np.mean(all_metrics, axis=0)
141
+ return avg_metrics
142
+
143
+
144
+ if __name__ == "__main__":
145
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
146
+
147
+ for embedding_type in ['mdlm', 'mlm', 'esm']:
148
+ # Initialize datasets
149
+ train_dataset = MembraneDataset(embedding_type,
150
+ hyperparams['train_data'],
151
+ hyperparams['esm_model_path'],
152
+ hyperparams['mlm_model_path'],
153
+ hyperparams['mdlm_model_path'],
154
+ device)
155
+ test_dataset = MembraneDataset(embedding_type,
156
+ hyperparams['test_data'],
157
+ hyperparams['esm_model_path'],
158
+ hyperparams['mlm_model_path'],
159
+ hyperparams['mdlm_model_path'],
160
+ device)
161
+
162
+ # Prepare dataloaders
163
+ train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
164
+ test_dataloader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
165
+
166
+ # Initialize model, optimizer, and loss function
167
+ input_dim=640 if embedding_type=="mdlm" else 1280
168
+ model = MembranePredictor(input_dim=input_dim, num_classes=4).to(device)
169
+ optimizer = optim.Adam(model.parameters(), lr=hyperparams["learning_rate"])
170
+ criterion = nn.CrossEntropyLoss()
171
+
172
+ # Initialize main directory model checkpoints
173
+ base_checkpoint_dir = f"{path}/benchmarks/Supervised/Membrane Type/model_checkpoints/{embedding_type}"
174
+ # Initialize subdirectory and name it based on hyperparameters
175
+ hyperparam_str = f"batch_{hyperparams['batch_size']}_lr_{hyperparams['learning_rate']}_epochs_{hyperparams['num_epochs']}"
176
+ model_checkpoint_dir = os.path.join(base_checkpoint_dir, hyperparam_str)
177
+ os.makedirs(model_checkpoint_dir, exist_ok=True)
178
+
179
+ # Training loop
180
+ for epoch in range(hyperparams["num_epochs"]):
181
+ # Train the model
182
+ train_loss = train(model, train_dataloader, optimizer, criterion, device)
183
+ print(f"EPOCH {epoch+1}/{hyperparams['num_epochs']}")
184
+ print(f"TRAIN LOSS: {train_loss:.4f}")
185
+ print("\n")
186
+
187
+ # Save the model checkpoint for the current epoch
188
+ checkpoint_path = os.path.join(model_checkpoint_dir, f"epoch{epoch + 1}.pth")
189
+ torch.save({
190
+ 'epoch': epoch + 1,
191
+ 'model_state_dict': model.state_dict(),
192
+ 'optimizer_state_dict': optimizer.state_dict(),
193
+ 'loss': train_loss,
194
+ }, checkpoint_path)
195
+ print(f"Checkpoint saved at {checkpoint_path}\n")
196
+
197
+ # Save hyperparameters only once
198
+ if epoch == 0: # Hyperparameters don't change midway through training
199
+ hyperparams_file = os.path.join(model_checkpoint_dir, "hyperparams.txt")
200
+ with open(hyperparams_file, 'w') as f:
201
+ for key, value in hyperparams.items():
202
+ f.write(f"{key}: {value}\n")
203
+ print(f"Hyperparameters saved at {hyperparams_file}\n")
204
+
205
+
206
+ # Evaluate model on test dataset
207
+ print("Test set")
208
+ test_preds, test_labels = evaluate(model, test_dataloader, device)
209
+ test_metrics = calculate_metrics(test_preds, test_labels)
210
+ print("TEST METRICS:")
211
+ print(f"Accuracy: {test_metrics[0]:.4f}")
212
+ print(f"Precision: {test_metrics[1]:.4f}")
213
+ print(f"Recall: {test_metrics[2]:.4f}")
214
+ print(f"F1 Macro Score: {test_metrics[3]:.4f}")
215
+ print(f"F1 Micro Score: {test_metrics[4]:.4f}")
216
+
217
+ # Save test results
218
+ test_results_file = os.path.join(model_checkpoint_dir, "test_results.txt")
219
+ with open(test_results_file, 'w') as f:
220
+ f.write("TEST METRICS:\n")
221
+ f.write(f"Accuracy: {test_metrics[0]:.4f}\n")
222
+ f.write(f"Precision: {test_metrics[1]:.4f}\n")
223
+ f.write(f"Recall: {test_metrics[2]:.4f}\n")
224
+ f.write(f"F1 Macro Score: {test_metrics[3]:.4f}\n")
225
+ f.write(f"F1 Micro: {test_metrics[4]:.4f}\n")
226
+ print(f"Test results saved at {test_results_file}\n")
benchmarks/Supervised/Membrane Type/membrane_type_test.csv ADDED
The diff for this file is too large to render. See raw diff
 
benchmarks/Supervised/Membrane Type/membrane_type_train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b8eec677afa2de578d04ee1a0fc9582b2f8cfc47622cbd6374309cd6ab96f3
3
+ size 12335695
benchmarks/Supervised/Membrane Type/split_membrane_type_data.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Splits the DeepLoc 2.1 membrane type data into train/val and testing splits
2
+ # Partition value of "4" indicates testing data
3
+
4
+ import pandas as pd
5
+
6
+ path = "/workspace/a03-sgoel/MDpLM/benchmarks/DeepLoc/Membrane Type"
7
+
8
+ df = pd.read_csv(path + "/unsplit_membrane_type_all.csv")
9
+ df = df.drop(columns=['Unnamed: 0'])
10
+
11
+ train = df[df['Partition'] != 4]
12
+ test = df[df['Partition'] == 4]
13
+
14
+ train.to_csv(path + "/membrane_type_train.csv", index=False)
15
+ test.to_csv(path + "/membrane_type_test.csv", index=False)
benchmarks/Supervised/Membrane Type/unsplit_membrane_type_all.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d878da32a06092f880262048e3c1eb692721c274b0a458fcc712a0dcbd80c71
3
+ size 15683507
benchmarks/Supervised/Solubility/solubility_transformer.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from transformers import AutoModel, AutoTokenizer
6
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
7
+ from sklearn.model_selection import ParameterGrid
8
+ from tqdm import tqdm
9
+ import pandas as pd
10
+ import numpy as np
11
+ import sys
12
+ import os
13
+ from datetime import datetime
14
+ import logging
15
+
16
+ logging.getLogger("transformers").setLevel(logging.ERROR)
17
+
18
+ # Hyperparameters dictionary
19
+ path = "/workspace/sg666/MDpLM"
20
+ hyperparams = {
21
+ "train_data": path + "/data/membrane/train.csv",
22
+ "val_data": path + "/data/membrane/val.csv",
23
+ "test_data": path + "/data/membrane/test.csv",
24
+ 'esm_model_path': "facebook/esm2_t33_650M_UR50D",
25
+ 'mlm_model_path': path + "/benchmarks/MLM/model_ckpts/best_model_epoch",
26
+ "mdlm_model_path": path + "/checkpoints/membrane_automodel/epochs30_lr3e-4_bsz16_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params",
27
+ "batch_size": 1,
28
+ "learning_rate": 5e-5,
29
+ "num_epochs": 2,
30
+ "num_layers": 4,
31
+ "num_heads": 16,
32
+ "dropout": 0.5
33
+ }
34
+
35
+
36
+ # Helper functions to obtain all embeddings for a sequence
37
+ def load_models(esm_model_path, mlm_model_path, mdlm_model_path):
38
+ esm_tokenizer = AutoTokenizer.from_pretrained(esm_model_path)
39
+ esm_model = AutoModel.from_pretrained(esm_model_path).to(device)
40
+ mlm_model = AutoModel.from_pretrained(mlm_model_path).to(device)
41
+ mdlm_model = AutoModel.from_pretrained(mdlm_model_path).to(device)
42
+ return esm_tokenizer, esm_model, mlm_model, mdlm_model
43
+
44
+
45
+ def get_latents(embedding_type, esm_model_path, mlm_model_path, mdlm_model_path, sequence, device):
46
+ tokenizer, esm_model, mlm_model, mdlm_model = load_models(esm_model_path, mlm_model_path, mdlm_model_path)
47
+
48
+ if embedding_type == "esm":
49
+ model = esm_model
50
+ elif embedding_type == "mlm":
51
+ model = mlm_model
52
+ elif embedding_type == "mdlm":
53
+ model = mdlm_model
54
+
55
+ inputs = tokenizer(sequence.upper(), return_tensors="pt").to(device)['input_ids']
56
+ with torch.no_grad():
57
+ embeddings = model(inputs).last_hidden_state.squeeze(0)[1:-1]
58
+
59
+ return embeddings
60
+
61
+
62
+ # Dataset class that loads embeddings and labels
63
+ class SolubilityDataset(Dataset):
64
+ def __init__(self, embedding_type, csv_file, esm_model_path, mlm_model_path, mdlm_model_path, device):
65
+ self.data = pd.read_csv(csv_file).head(5)
66
+ #self.data = self.data[self.data['Sequence'].apply(len) < 1024].reset_index(drop=True)
67
+ self.embedding_type = embedding_type
68
+ self.esm_model_path = esm_model_path
69
+ self.mlm_model_path = mlm_model_path
70
+ self.mdlm_model_path = mdlm_model_path
71
+ self.device = device
72
+
73
+ def __len__(self):
74
+ return len(self.data)
75
+
76
+ def __getitem__(self, idx):
77
+ sequence = self.data.iloc[idx]['Sequence']
78
+ seq_len = len(sequence)
79
+ embeddings = get_latents(self.embedding_type, self.esm_model_path, self.mlm_model_path, self.mdlm_model_path,
80
+ sequence, self.device)
81
+ # Lowercase residues = soluble, uppercase = insoluble
82
+ label = [0 if residue.islower() else 1 for residue in sequence]
83
+ labels = torch.tensor(label, dtype=torch.float32)
84
+
85
+ return embeddings, labels, seq_len
86
+
87
+ # Transformer model class
88
+ class SolubilityPredictor(nn.Module):
89
+ def __init__(self, input_dim, hidden_dim, num_heads, num_layers, dropout):
90
+ super(SolubilityPredictor, self).__init__()
91
+ #self.embedding_dim = input_dim
92
+ # self.self_attention = nn.MultiheadAttention(input_dim, num_heads, dropout)
93
+ # encoder_layer = nn.TransformerEncoderLayer(
94
+ # d_model=hidden_dim,
95
+ # nhead=num_heads,
96
+ # dropout=dropout,
97
+ # batch_first=True
98
+ # )
99
+ # self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
100
+ self.classifier = nn.Sequential(
101
+ nn.Linear(input_dim, 320),
102
+ nn.ReLU(),
103
+ nn.Linear(320, 1)
104
+ )
105
+ self.sigmoid = nn.Sigmoid()
106
+
107
+ def forward(self, embeddings):
108
+ #attn_out, _ = self.self_attention(embeddings, embeddings, embeddings)
109
+ #transformer_out = self.transformer_encoder(attn_out)#.squeeze(1).mean(dim=1)
110
+ #logits = self.classifier(transformer_out)
111
+
112
+ logits = self.classifier(embeddings)
113
+ probs = self.sigmoid(logits.squeeze(-1))
114
+
115
+ return probs # Get probabilities of dimension seq_len
116
+
117
+
118
+ # Training function
119
+ def train(model, train_loader, val_loader, optimizer, criterion, device):
120
+ """
121
+ Trains the model for a single epoch.
122
+ Args:
123
+ model (nn.Module): model that will be trained
124
+ dataloader (DataLoader): PyTorch DataLoader with training data
125
+ optimizer (torch.optim): optimizer
126
+ criterion (nn.Module): loss function
127
+ device (torch.device): device (GPU or CPU to train the model
128
+ Returns:
129
+ total_loss (float): model loss
130
+ """
131
+ # Training loop
132
+ model.train()
133
+ train_loss = 0
134
+
135
+ prog_bar = tqdm(total=len(train_loader), leave=True, file=sys.stdout)
136
+ for step, batch in enumerate(train_loader, start=1):
137
+ embeddings, labels, seq_len = batch
138
+ embeddings, labels = embeddings.to(device), labels.to(device)
139
+ embeddings = embeddings.squeeze(1)
140
+ optimizer.zero_grad()
141
+ outputs = model(embeddings)
142
+ loss = criterion(outputs, labels)
143
+ loss.backward()
144
+ optimizer.step()
145
+ train_loss += loss.item()
146
+ prog_bar.update()
147
+ sys.stdout.flush()
148
+ prog_bar.close()
149
+
150
+ # Validation loop
151
+ model.eval()
152
+ val_loss = 0.0
153
+
154
+ prog_bar = tqdm(total=len(val_loader), leave=True, file=sys.stdout)
155
+ for step, batch in enumerate(val_loader):
156
+ embeddings, labels, seq_len = batch
157
+ embeddings, labels = embeddings.to(device), labels.to(device)
158
+ with torch.no_grad():
159
+ outputs = model(embeddings)
160
+ loss = criterion(outputs, labels)
161
+ val_loss += loss.item()
162
+ prog_bar.update()
163
+ sys.stdout.flush()
164
+ prog_bar.close()
165
+
166
+ return train_loss/len(train_loader), val_loss/len(val_loader)
167
+
168
+
169
+
170
+ # Evaluation function
171
+ def evaluate(model, dataloader, device):
172
+ """
173
+ Performs inference on a trained model
174
+ Args:
175
+ model (nn.Module): the trained model
176
+ dataloader (DataLoader): PyTorch DataLoader with testing data
177
+ device (torch.device): device (GPU or CPU) to be used for inference
178
+ Returns:
179
+ preds (list): predicted per-residue disorder labels
180
+ true_labels (list): ground truth per-residue disorder labels
181
+ """
182
+ model.eval()
183
+ preds, true_labels = [], []
184
+ with torch.no_grad():
185
+ for embeddings, labels, seq_len in tqdm(dataloader):
186
+ embeddings, labels = embeddings.to(device), labels.to(device)
187
+ outputs = model(embeddings)
188
+ preds.append(outputs.cpu().numpy())
189
+ true_labels.append(labels.cpu().numpy())
190
+ return preds, true_labels
191
+
192
+ # Metrics calculation
193
+ def calculate_metrics(preds, labels, threshold=0.5):
194
+ """
195
+ Calculates metrics to assess model performance
196
+ Args:
197
+ preds (list): model's predictions
198
+ labels (list): ground truth labels
199
+ threshold (float): minimum threshold a prediction must be met to be considered disordered
200
+ Returns:
201
+ accuracy (float): accuracy
202
+ precision (float): precision
203
+ recall (float): recall
204
+ f1 (float): F1 score
205
+ roc_auc (float): AUROC score
206
+ """
207
+ flat_binary_preds, flat_prob_preds, flat_labels = [], [], []
208
+
209
+ for pred, label in zip(preds, labels):
210
+ flat_binary_preds.extend((pred > threshold).astype(int).flatten())
211
+ flat_prob_preds.extend(pred.flatten())
212
+ flat_labels.extend(label.flatten())
213
+
214
+ flat_binary_preds = np.array(flat_binary_preds)
215
+ flat_prob_preds = np.array(flat_prob_preds)
216
+ flat_labels = np.array(flat_labels)
217
+
218
+ accuracy = accuracy_score(flat_labels, flat_binary_preds)
219
+ precision = precision_score(flat_labels, flat_binary_preds)
220
+ recall = recall_score(flat_labels, flat_binary_preds)
221
+ f1 = f1_score(flat_labels, flat_binary_preds)
222
+ roc_auc = roc_auc_score(flat_labels, flat_prob_preds)
223
+
224
+ return accuracy, precision, recall, f1, roc_auc
225
+
226
+
227
+ if __name__ == "__main__":
228
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
229
+ print(device)
230
+
231
+ for embedding_type in ['mlm', 'esm', 'mdlm']:
232
+ best_val_loss = float('inf')
233
+ best_model = None
234
+
235
+ # Load train and test dataset
236
+ train_dataset = SolubilityDataset(embedding_type,
237
+ hyperparams['train_data'],
238
+ hyperparams['esm_model_path'],
239
+ hyperparams['mlm_model_path'],
240
+ hyperparams['mdlm_model_path'],
241
+ device)
242
+ test_dataset = SolubilityDataset(embedding_type,
243
+ hyperparams['test_data'],
244
+ hyperparams['esm_model_path'],
245
+ hyperparams['mlm_model_path'],
246
+ hyperparams['mdlm_model_path'],
247
+ device)
248
+ val_dataset = SolubilityDataset(embedding_type,
249
+ hyperparams['val_data'],
250
+ hyperparams['esm_model_path'],
251
+ hyperparams['mlm_model_path'],
252
+ hyperparams['mdlm_model_path'],
253
+ device)
254
+
255
+ # Load PyTorch datasets into DataLoaders
256
+ train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
257
+ val_dataloader = DataLoader(val_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
258
+ test_dataloader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=False)
259
+
260
+ # Set device to GPU
261
+
262
+ ### Grid search to explore hyperparameter space
263
+ # Define hyperparameters
264
+ param_grid = {
265
+ 'learning_rate': [5e-4],
266
+ 'batch_size': [1],
267
+ 'num_heads': [4],
268
+ 'num_layers': [2],
269
+ 'dropout': [0.5],
270
+ 'num_epochs': [5]
271
+ }
272
+
273
+ # Loop over the parameter grid
274
+ grid = ParameterGrid(param_grid)
275
+ for params in grid:
276
+ # Update hyperparameters
277
+ hyperparams.update(params)
278
+
279
+ # Update model with the new set of hyperparms
280
+ input_dim=640 if embedding_type=="mdlm" else 1280
281
+ hidden_dim = input_dim
282
+ model = SolubilityPredictor(
283
+ input_dim=input_dim,
284
+ hidden_dim=hidden_dim,
285
+ num_layers=hyperparams["num_layers"],
286
+ num_heads=hyperparams["num_heads"],
287
+ dropout=hyperparams['dropout']
288
+ )
289
+ model = model.to(device) # Push model to GPU
290
+
291
+ # Update optimizer
292
+ optimizer = optim.Adam(model.parameters(), lr=hyperparams["learning_rate"])
293
+ criterion = nn.BCELoss()
294
+ num_epochs = hyperparams['num_epochs']
295
+
296
+ # Train
297
+ for epoch in range(hyperparams["num_epochs"]):
298
+ print(f"EPOCH {epoch+1}/{hyperparams['num_epochs']}")
299
+ train_loss, val_loss = train(model, train_dataloader, val_dataloader, optimizer, criterion, device)
300
+ print(f"TRAIN LOSS: {train_loss:.4f}")
301
+ print(f"VALIDATION LOSS: {val_loss:.4f}\n")
302
+ sys.stdout.flush()
303
+
304
+ if val_loss < best_val_loss:
305
+ best_val_loss = val_loss
306
+ best_model = model.state_dict()
307
+
308
+ # Evaluate model on test sequences
309
+ print("TEST METRICS:")
310
+ test_preds, test_labels = evaluate(model, test_dataloader, device)
311
+ test_metrics = calculate_metrics(test_preds, test_labels)
312
+ print(f"Accuracy: {test_metrics[0]:.4f}")
313
+ print(f"Precision: {test_metrics[1]:.4f}")
314
+ print(f"Recall: {test_metrics[2]:.4f}")
315
+ print(f"F1 Score: {test_metrics[3]:.4f}")
316
+ print(f"ROC AUC: {test_metrics[4]:.4f}")
317
+ print(f"\n")
318
+ sys.stdout.flush()
319
+
320
+ ### Save model and metrics for this hyperparameter combination
321
+ folder_name = f"{path}/benchmarks/Supervised/Solubility/transformer_models/{embedding_type}/lr{hyperparams['learning_rate']}_bs{hyperparams['batch_size']}_epochs{hyperparams['num_epochs']}_layers{hyperparams['num_layers']}_heads{hyperparams['num_heads']}_drpt{hyperparams['dropout']}"
322
+ os.makedirs(folder_name, exist_ok=True)
323
+
324
+ # Save current model for this hyperparameter combination
325
+ model_file_path = os.path.join(folder_name, "model.pth")
326
+ torch.save(model.state_dict(), model_file_path)
327
+
328
+ # Save hyperparameters and test metrics to txt file
329
+ output_file_path = os.path.join(folder_name, "hyperparams_and_test_results.txt")
330
+ with open(output_file_path, 'w') as out_file:
331
+ for key, value in hyperparams.items():
332
+ out_file.write(f"{key}: {value}\n")
333
+
334
+ out_file.write("\nTEST METRICS:\n")
335
+ out_file.write(f"Accuracy: {test_metrics[0]:.4f}\n")
336
+ out_file.write(f"Precision: {test_metrics[1]:.4f}\n")
337
+ out_file.write(f"Recall: {test_metrics[2]:.4f}\n")
338
+ out_file.write(f"F1 Score: {test_metrics[3]:.4f}\n")
339
+ out_file.write(f"ROC AUC: {test_metrics[4]:.4f}\n")
340
+
341
+ # Save the best model and its hyperparameters
342
+ if best_model is not None:
343
+ best_model_dir = f"{path}/benchmarks/Supervised/Solubility/transformer_models/{embedding_type}"
344
+ os.makedirs(best_model_dir, exist_ok=True)
345
+ best_model_path = os.path.join(best_model_dir, "best_model.pth")
346
+ torch.save(best_model, best_model_path)
347
+
348
+ # Save the hyperparameters for the best model
349
+ best_hyperparams_path = f"{path}/benchmarks/Supervised/Solubility/transformer_models/{embedding_type}/best_model_hyperparams.txt"
350
+ with open(best_hyperparams_path, 'w') as out_file:
351
+ out_file.write("Best Validation Loss: {:.4f}\n".format(best_val_loss))
352
+ for key, value in hyperparams.items():
353
+ out_file.write(f"{key}: {value}\n")
checkpoints/.DS_Store ADDED
Binary file (8.2 kB). View file
 
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/esm2_t30_150M_UR50D",
3
+ "architectures": [
4
+ "EsmModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout": null,
8
+ "emb_layer_norm_before": false,
9
+ "esmfold_config": null,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 640,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2560,
15
+ "is_folding_model": false,
16
+ "layer_norm_eps": 1e-05,
17
+ "mask_token_id": 32,
18
+ "max_position_embeddings": 1026,
19
+ "model_type": "esm",
20
+ "num_attention_heads": 20,
21
+ "num_hidden_layers": 30,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "rotary",
24
+ "token_dropout": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.44.2",
27
+ "use_cache": true,
28
+ "vocab_list": null,
29
+ "vocab_size": 33
30
+ }
config.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - /callbacks: [checkpoint_every_n_steps, checkpoint_monitor, learning_rate_monitor]
4
+ - /model: small
5
+ - /strategy: ddp
6
+ - /noise: loglinear
7
+ - /lr_scheduler: constant_warmup
8
+
9
+ mode: sample_eval # train / ppl_eval / sample_eval
10
+ diffusion: absorbing_state
11
+ backbone: membrane_esm_finetune # dit / dimamba / ar / vanilla_esm_pretrain / membrane_esm_finetune
12
+ parameterization: subs # subs / d3pm / sedd
13
+ time_conditioning: False
14
+ T: 0 # 0 (continuous time) / 1000
15
+ subs_masking: False
16
+
17
+ seed: 42
18
+
19
+ data:
20
+ train:
21
+ vanilla_esm_train_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/train.csv
22
+ membrane_esm_train_path: /workspace/sg666/MDpLM/data/membrane/train.csv
23
+ wrap: null
24
+ test:
25
+ vanilla_esm_test_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/test.csv
26
+ membrane_esm_test_path: /workspace/sg666/MDpLM/data/membrane/test.csv
27
+ wrap: null
28
+ valid:
29
+ vanilla_esm_valid_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/val.csv
30
+ membrane_esm_valid_path: /workspace/sg666/MDpLM/data/membrane/val.csv
31
+ wrap: null
32
+ wrapping: True
33
+
34
+ loader:
35
+ global_batch_size: 8
36
+ eval_global_batch_size: ${.global_batch_size}
37
+ # Note: batch_size and eval_batch_size are **per machine**
38
+ batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
39
+ eval_batch_size: ${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
40
+ num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"}
41
+ pin_memory: True
42
+
43
+ sampling:
44
+ predictor: ddpm_cache # analytic, ddpm, ddpm_cache
45
+ steps: 128
46
+ noise_removal: True
47
+ # TODO(yair): @subham, why aren't these params under `eval`?
48
+ num_sample_batches: 2 # Total samples: `num_gpus` * `loader.eval_batch_size` * num_sample_batches
49
+ num_sample_log: 2
50
+ semi_ar: False
51
+ stride_length: 1
52
+ num_strides: 1
53
+
54
+ training:
55
+ ema: 0.9999
56
+ antithetic_sampling: True
57
+ importance_sampling: False
58
+ sampling_eps: 1e-3
59
+ change_of_variables: False
60
+ mlm_model_path: /workspace/sg666/MDpLM/benchmarks/MLM/model_ckpts_650M/best_model_epoch
61
+ esm_model_path: facebook/esm2_t30_150M_UR50D
62
+ focus_mask: False
63
+
64
+ eval:
65
+ checkpoint_path: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/eos-wrapping_epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/checkpoints/best.ckpt # Used to evaluate a checkpoint after training.
66
+ disable_ema: False
67
+ compute_generative_perplexity: False
68
+ perplexity_batch_size: 8
69
+ compute_perplexity_on_sanity: False
70
+ gen_ppl_eval_model_name_or_path: gpt2-large # gpt2-large, meta-llama/Llama-2-7b-hf
71
+ generate_samples: True
72
+ generation_model: /workspace/sg666/MDpLM/checkpoints/membrane_automodel/epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/
73
+
74
+ optim:
75
+ weight_decay: 0.075
76
+ lr: 3e-4
77
+ beta1: 0.9
78
+ beta2: 0.999
79
+ eps: 1e-8
80
+
81
+ Model:
82
+ hidden_size: 1280
83
+ cond_dim: 256
84
+ n_heads: 20
85
+ n_blocks: 4
86
+ dropout: 0.5
87
+ length: null #512
88
+ scale_by_sigma: True
89
+
90
+ trainer:
91
+ _target_: lightning.Trainer
92
+ accelerator: cuda
93
+ num_nodes: 1
94
+ devices: ${device_count:}
95
+ accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}
96
+ gradient_clip_val: 1.0
97
+ precision: bf16
98
+ num_sanity_val_steps: 2
99
+ max_epochs: 60
100
+ max_steps: 1_000_000
101
+ log_every_n_steps: 10
102
+ limit_train_batches: 1.0 # train on full dataset, can be used to toggle quick run
103
+ limit_val_batches: 1.0 # validate on full dataset, can be used to toggle quick run
104
+ val_check_interval: 955
105
+
106
+ wandb:
107
+ project: MDpLM_finetune_membrane_200k-seqs
108
+ notes: null
109
+ group: programmablebio
110
+ job_type: null
111
+ name: dit_test #dit_wrapping_epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16
112
+ id: ${.name}_${seed}
113
+
114
+ hydra:
115
+ run:
116
+ dir: /workspace/sg666/MDpLM/outputs/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S}
117
+ job:
118
+ chdir: true
119
+
120
+ checkpointing:
121
+ # Use custom `save_dir` if, e.g., saving to S3 bucket, otherwise leave this parameter as is
122
+ save_dir: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/
123
+ # Note: `checkpoints` path should correspond to `checkpoint_every_n_steps.dirpath`
124
+ resume_from_ckpt: false
125
+ resume_ckpt_path: ${.save_dir}/epochs30_lr3e-4_bsz8_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params_no-compile/checkpoints/last.ckpt #/checkpoints/last.ckpt
126
+ pretrained_esm_mdlm_automodel_path: /workspace/sg666/MDpLM/checkpoints/vanilla_esm_pretrained_automodel/epochs10_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/
127
+ finetuned_esm_mdlm_automodel_path: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/
data/.DS_Store ADDED
Binary file (6.15 kB). View file
 
data/membrane/test.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/membrane/train.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/membrane/val.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/uniref/100k_seqs/check_data.ipynb ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 21,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import seaborn as sns\n",
11
+ "import matplotlib.pyplot as plt\n",
12
+ "import numpy as np"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 4,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "path = \"/home/sg666/MDpLM/data/uniref50/100k_seqs\""
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 5,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "train = pd.read_csv(path + \"/train.csv\")\n",
31
+ "test = pd.read_csv(path + \"/test.csv\")\n",
32
+ "val = pd.read_csv(path + '/val.csv')"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 23,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "valid_residues = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V']\n",
42
+ "\n",
43
+ "for df in [train, test, val]:\n",
44
+ " df['Length'] = df['Sequence'].str.len()\n",
45
+ "\n",
46
+ " for residue in valid_residues:\n",
47
+ " df[residue] = 0\n",
48
+ "\n",
49
+ " for idx, row in df.iterrows():\n",
50
+ " sequence = row['Sequence']\n",
51
+ "\n",
52
+ " for residue in valid_residues:\n",
53
+ " df.at[idx, residue] = sequence.count(residue)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 28,
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "data": {
63
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/IAAAIjCAYAAACgdyAGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABst0lEQVR4nO3dd3yNd//H8fdJIhOxEzsxagZFqRlqJDVqr1IrVW3NWsVttVqzlNaqu0Zp7VWtonYVpTatomoTMWOVkFy/P/xybkdOhkhycvF6Ph7nwfle33Ndn+vKlZPzPt9rWAzDMAQAAAAAAEzBydEFAAAAAACAhCPIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIA3gq7du3l5+fX7Iv5+HDh+rXr59y584tJycnNWzYMNmX+aSoqCgVL15cn376aYovOzZ+fn6qV6+eo8tAHP7880+5uLjo8OHDji7lmVksFg0bNszRZbyQNm/eLIvFos2bN6fock+dOiWLxaLZs2en6HITavbs2bJYLDp16lS8ff38/NS+fftkrwnP5ml+pgD+hyAPPIeGDRsmi8WiK1eu2J1evHhxVatWLcmWZ7FYbB7p06dXYGCgVq1aleh5zpw5U2PHjlXTpk31zTff6IMPPpD06IPZk8uzWCx69913Y8zjxo0beuedd5Q1a1Z5eXmpevXq2rt3b4JrmD9/vs6ePauuXbta227fvq2hQ4cqODhYmTJlivcD75EjRxQcHKy0adMqU6ZMeuutt3T58uUY/aKiojRmzBj5+/vL3d1dJUqU0Pz58xNca0JERUVpzpw5Kl++vDJlyqR06dLppZdeUtu2bfXbb78l6bJeZEWLFlXdunU1ZMiQZJl/9Ife+B4p8YXbs4rt9/nJR1KFyhEjRmjFihVJMq/U4I033pCnp6du3boVa5/WrVvL1dVVV69eTdJlR3/REP1wdnZWtmzZ1LRpUx05ciRJl2U2T26bxx8tW7Z0dHkAnhMuji4AgLn897//VVRUVIz2WrVqqW3btjIMQ6dPn9bUqVNVv359rV69WkFBQU+9nI0bNypnzpz6/PPPY0wrVaqUevfubdP20ksv2TyPiopS3bp1deDAAfXt21dZsmTRlClTVK1aNe3Zs0cFCxaMt4axY8eqZcuW8vb2trZduXJFH3/8sfLkyaOSJUvGOVp27tw5Va1aVd7e3hoxYoRu376tzz77TIcOHdKuXbvk6upq7fuf//xHo0aNUqdOnfTKK6/o+++/15tvvpmkH/y6d++uyZMnq0GDBmrdurVcXFx09OhRrV69Wvny5dOrr76aJMuB9O6776pOnTo6ceKE8ufPn6Tzrlq1qubOnWvT9vbbb6tcuXJ65513rG1p06Z95mX9+++/cnFJvo8KEyZM0O3bt63Pf/rpJ82fP1+ff/65smTJYm2vWLFikixvxIgRatq0qUOO8EkOrVu31g8//KDly5erbdu2MabfvXtX33//vYKDg5U5c+ZkqaF79+565ZVX9ODBAx08eFDTpk3T5s2bdfjwYfn6+ib58t566y21bNlSbm5uST7vpBa9bR5nhi/YUpqZfqZAqmIAeO4MHTrUkGRcvnzZ7vRixYoZgYGBSbY8SUaXLl1s2v78809DkvH6668nap7Vq1c3ihUrFqM9b968Rt26deN9/cKFCw1JxuLFi61tYWFhRoYMGYxWrVrF+/q9e/cakoz169fbtN+7d8+4ePGiYRiG8fvvvxuSjFmzZtmdx3vvvWd4eHgYp0+ftratW7fOkGR89dVX1rZz584ZadKksdmGUVFRRpUqVYxcuXIZDx8+tLYndP2fFBoaalgsFqNTp04xpkVFRRmXLl166nkidhEREUbGjBmNwYMHp8jyvLy8jHbt2sXZ58GDB8b9+/dTpJ7EGjt2rCHJOHnyZLLMPyHbKbXYtGmTIcnYtGlTrH3u3r1rpEuXzggKCrI7fd68eYYkY8GCBQle7smTJ+N8X3uyvsffYw3DMKZOnWpIMkaPHp3gZSaXvHnzOuTnHdu2iYsZfj8BpC4cWg/AehjgokWL9OmnnypXrlxyd3dXjRo19Pfff9v0Teg58kWKFFGWLFl04sQJm/b79+9r6NChKlCggNzc3JQ7d27169dP9+/fl/S/8zM3bdqkP/74w3o44pMj3xEREbpz506sy1+yZIl8fHzUuHFja1vWrFnVvHlzff/999blxWbFihVydXVV1apVbdrd3NwSPMq0dOlS1atXT3ny5LG21axZUy+99JIWLVpkbfv+++/14MEDvf/++9Y2i8Wi9957T+fOndOOHTviXM4333wjFxcX9e3bN9Y+J0+elGEYqlSpUoxpFotF2bJls2m7ceOGevbsqdy5c8vNzU0FChTQ6NGjYxyNcePGDbVv317e3t7KkCGD2rVrp/3798c4HLpatWp2T+ewtz9FRUVpwoQJKlasmNzd3eXj46POnTvr+vXrNv2irxfw66+/qly5cnJ3d1e+fPk0Z86cGMu5ceOGPvjgA/n5+cnNzU25cuVS27ZtbU4/iW/fjLZu3TpVrlxZGTJkUNq0aVWoUCENHDjQpk+aNGlUrVo1ff/99zFqSQnRv0efffaZJkyYoPz588vNzU1//vmnIiIiNGTIEJUpU0be3t7y8vJSlSpVtGnTphjzefIc+ejTdv7++2+1b99eGTJkkLe3tzp06KC7d+8m2/p8++23KlOmjDw8PJQpUya1bNlSZ8+etelz/PhxNWnSRL6+vnJ3d1euXLnUsmVLhYeHW9flzp07+uabb6zvK4k5f/r7779X3bp1lSNHDrm5uSl//vwaPny4IiMjbfpVq1ZNxYsX159//qnq1avL09NTOXPm1JgxY2LM89y5c2rYsKG8vLyULVs2ffDBB/G+R0mSh4eHGjdurA0bNigsLCzG9Hnz5ildunR64403dO3aNfXp00cBAQFKmzat0qdPr9dff10HDhx46m0QlypVqkhSjPf+8+fPq2PHjvLx8ZGbm5uKFSummTNnxnj9l19+qWLFisnT01MZM2ZU2bJlNW/ePOt0e+dTG4ahTz75RLly5ZKnp6eqV6+uP/74I8a8o/ffJ8V2jvbq1atVpUoVeXl5KV26dKpbt67d+T6tuH4/Jemvv/5S06ZNlSlTJrm7u6ts2bJauXJljPn88ccfeu211+Th4aFcuXLpk08+0cyZM2OsS2zXurB3DYGEvPc/Xv/06dOt9b/yyiv6/fffYyznr7/+UvPmzZU1a1Z5eHioUKFC+s9//mOd/izbPzQ0VB06dFCuXLnk5uam7Nmzq0GDBpxvjxcCh9YDsBo1apScnJzUp08fhYeHa8yYMWrdurV27tz51PMKDw/X9evXbQ4rjoqK0htvvKFff/1V77zzjooUKaJDhw7p888/17Fjx7RixQplzZpVc+fO1aeffqrbt29r5MiRkh59MRBt48aN8vT0VGRkpPLmzasPPvhAPXr0sFn+vn37VLp0aTk52X5fWa5cOU2fPl3Hjh1TQEBArPVv375dxYsXV5o0aZ563aVHH1rDwsJUtmzZGNPKlSunn376yaZWLy8vm3WM7hc9vXLlynaXM336dL377rsaOHCgPvnkk1jryZs3ryRp8eLFatasmTw9PWPte/fuXQUGBur8+fPq3Lmz8uTJo+3bt2vAgAG6ePGiJkyYIOnRh+cGDRro119/1bvvvqsiRYpo+fLlateuXazzTojOnTtr9uzZ6tChg7p3766TJ09q0qRJ2rdvn7Zt22bzM/n777/VtGlThYSEqF27dpo5c6bat2+vMmXKqFixYpIeXdegSpUqOnLkiDp27KjSpUvrypUrWrlypc6dO6csWbIkaN+UHn1wrlevnkqUKKGPP/5Ybm5u+vvvv7Vt27YY61GmTBl9//33unnzptKnT/9M2ySxZs2apXv37umdd96Rm5ubMmXKpJs3b+rrr79Wq1at1KlTJ926dUszZsxQUFCQdu3apVKlSsU73+bNm8vf318jR47U3r179fXXXytbtmwaPXp0kq/Dp59+qsGDB6t58+Z6++23dfnyZX355ZeqWrWq9u3bpwwZMigiIkJBQUG6f/++unXrJl9fX50/f14//vijbty4IW9vb82dOzfGKQiJOe1h9uzZSps2rXr16qW0adNq48aNGjJkiG7evKmxY8fa9L1+/bqCg4PVuHFjNW/eXEuWLNGHH36ogIAAvf7665Ienb5Qo0YNnTlzRt27d1eOHDk0d+5cbdy4MUH1tG7dWt98840WLVpkcz2Pa9euae3atWrVqpU8PDz0xx9/aMWKFWrWrJn8/f116dIlffXVVwoMDNSff/6pHDlyPPW2sCc6QGXMmNHadunSJb366quyWCzq2rWrsmbNqtWrVyskJEQ3b95Uz549JT06dat79+5q2rSpevTooXv37ungwYPauXOn3nzzzViXOWTIEH3yySeqU6eO6tSpo71796p27dqKiIhI9HrMnTtX7dq1U1BQkEaPHq27d+9q6tSpqly5svbt25egL7Rv3boV41o1mTJlsv7f3u/nH3/8oUqVKilnzpzq37+/vLy8tGjRIjVs2FBLly5Vo0aNJD0KsNWrV9fDhw+t/aZPny4PD49Er3NC3/ujzZs3T7du3VLnzp1lsVg0ZswYNW7cWP/884/1ffrgwYOqUqWK0qRJo3feeUd+fn46ceKEfvjhhzgvJJvQ7d+kSRP98ccf6tatm/z8/BQWFqZ169bpzJkznMaA55+DjwgAkAye9tD66MMAixQpYnNo38SJEw1JxqFDh6xt7dq1M/LmzWszP0lGSEiIcfnyZSMsLMzYvXu3ERwcbEgyxo4da+03d+5cw8nJydi6davN66dNm2ZIMrZt22ZtCwwMtHtoff369Y3Ro0cbK1asMGbMmGFUqVLFkGT069fPpp+Xl5fRsWPHGK9ftWqVIclYs2aN3W0TLVeuXEaTJk3i7BPXofXR0+bMmRNjWt++fQ1Jxr179wzDMIy6desa+fLli9Hvzp07hiSjf//+1rbHD62fOHGiYbFYjOHDh8dZZ7S2bdsakoyMGTMajRo1Mj777DPjyJEjMfoNHz7c8PLyMo4dO2bT3r9/f8PZ2dk4c+aMYRiGsWLFCkOSMWbMGGufhw8fWn8mj2+XwMBAu6dzPLk/bd261ZBkfPfddzb91qxZE6M9b968hiTjl19+sbaFhYUZbm5uRu/eva1tQ4YMMSQZy5Yti7H8qKgowzASvm9+/vnncf5uPS76sOadO3fG2/dZPXnIePTh0enTpzfCwsJs+j58+DDGIbzXr183fHx8YvzOSDKGDh1qfR793vJkv0aNGhmZM2d+5vV48tD6U6dOGc7Ozsann35q0+/QoUOGi4uLtX3fvn0JOpQ5KQ6tv3v3boy2zp07G56entbfacN4tM8/+R5w//59w9fX1+a9ZcKECYYkY9GiRda2O3fuGAUKFIj30HrDePTzzJ49u1GhQgWb9uh9d+3atYZhPDotKDIy0qbPyZMnDTc3N+Pjjz+2aYvtfe1x0X83Zs6caVy+fNm4cOGCsWbNGqNAgQKGxWIxdu3aZe0bEhJiZM+e3bhy5YrNPFq2bGl4e3tbt2mDBg3svu8/btasWTb7SFhYmOHq6mrUrVvX+vtsGIYxcOBAQ5LNzzt6/41vnrdu3TIyZMgQ41Sk0NBQw9vb2+4pSva2jb3HyZMn4/z9rFGjhhEQEGCzL0VFRRkVK1Y0ChYsaG3r2bNnjPeXsLAww9vbO8bpKU/+Hkd78tSDhL73R9efOXNm49q1a9Z+33//vSHJ+OGHH6xtVatWNdKlS2dziln0OkVL7Pa/fv16jM8ZwIuEQ+sBWHXo0MHmAmzRh0j+888/8b52xowZypo1q7Jly6ayZctqw4YN6tevn3r16mXts3jxYhUpUkSFCxfWlStXrI/XXntNkuwe2vuklStXql+/fmrQoIE6duyoLVu2KCgoSOPHj9e5c+es/f7991+7F85xd3e3To/L1atXbUaUnlb0/BNSQ2JqHTNmjHr06KHRo0dr0KBBCapp1qxZmjRpkvz9/bV8+XL16dNHRYoUUY0aNXT+/Hlrv8WLF6tKlSrKmDGjzc+pZs2aioyM1C+//CLp0YXJXFxc9N5771lf6+zsrG7duiWoHnsWL14sb29v1apVy2bZZcqUUdq0aWPsI0WLFrXup9Kj0ycKFSpks88uXbpUJUuWtI5kPS76MNuE7psZMmSQ9OjwansXfXxc9P4T290jUkKTJk2UNWtWmzZnZ2fr73lUVJSuXbumhw8fqmzZsgm+q8OTd4moUqWKrl69qps3byZN4f9v2bJlioqKUvPmzW1+Lr6+vipYsKD15xJ9Qcq1a9cm6yH+kmxGPKNHXKtUqaK7d+/qr7/+sumbNm1atWnTxvrc1dVV5cqVs9k/f/rpJ2XPnl1Nmza1tnl6etpcuDAuzs7OatmypXbs2GFzOPG8efPk4+OjGjVqSHr0XhR9hFJkZKSuXr1qPTXkae7m8aSOHTsqa9asypEjh4KDgxUeHq65c+daL/JmGIaWLl2q+vXryzAMm59jUFCQwsPDrcvPkCGDzp07Z/fw7NisX79eERER6tatm81h89Gj/Imxbt063bhxQ61atbKp19nZWeXLl0/Q3yrp0ZEC69ats3k8fmrWk7+f165d08aNG9W8eXPrvnXlyhVdvXpVQUFBOn78uPW9+qefftKrr75qPXJLevT+17p160Svd0Lf+6O1aNHC5u/kk58ZLl++rF9++UUdO3a0OcVMkt1THKIldPt7eHjI1dVVmzdvjnHqFfAi4NB64AVl74/ok39oo/9AJ+QPZIMGDdS1a1dFRETo999/14gRI3T37l2bQ9uPHz+uI0eOxAgW0eyd4xkfi8WiDz74QGvXrtXmzZutH5o9PDzsnmN679496/T4GIbx1PVEi55/Qmp42lq3bNmiVatW6cMPP4zzvPgnOTk5qUuXLurSpYuuXr2qbdu2adq0aVq9erVatmyprVu3Snr0czp48GC8P6fTp08re/bsMa6OXqhQoQTX9KTjx48rPDw8xjn7Ty472pP7rPRov318nz1x4oSaNGkS73ITsm+2aNFCX3/9td5++231799fNWrUUOPGjdW0adMYp3FE7z9xfWCNiIjQtWvX7E5LmzbtM1953t/f3277N998o3Hjxumvv/7SgwcP4u3/pLjeK5LyNILjx4/LMIxY7zIRffiuv7+/evXqpfHjx+u7775TlSpV9MYbb6hNmzY2d51ICn/88YcGDRqkjRs3xvjiIvp8/Gi5cuWK8fPPmDGjDh48aH1++vRpFShQIEa/p/k9at26tT7//HPNmzdPAwcO1Llz57R161Z1795dzs7Okh59aTNx4kRNmTJFJ0+etDmn/1muaD9kyBBVqVJFt2/f1vLly7VgwQKb34XLly/rxo0bmj59uqZPn253HtG/Xx9++KHWr1+vcuXKqUCBAqpdu7befPNNu9f2iHb69GlJirGPZM2aNdFfxh4/flySrF/kPSmh+3hAQIBq1qwZ6/Qnf9/+/vtvGYahwYMHa/DgwXZfExYWppw5c+r06dMqX758jOnP+v6bkPf+aPF9ZogO9MWLF3/qOqT4t7+bm5tGjx6t3r17y8fHR6+++qrq1auntm3bJssdE4DUhiAPPIfiG3W+e/eutc/joj/wPSkhgTZXrlzWDyx16tRRlixZ1LVrV1WvXt16wbmoqCgFBARo/PjxdueRO3fueJcT1+seD0TZs2fXxYsXY/SNbovvfNDMmTM/0zf82bNnt1nekzVkypTJOgqfPXt2bdq0SYZh2HyYj63WYsWK6caNG5o7d646d+6c4PD1uMyZM+uNN97QG2+8oWrVqmnLli06ffq08ubNq6ioKNWqVUv9+vWz+9onb/WXEBaLxe5+9OQFwqKiopQtWzZ99913dudjb3TZnqf9Eiah+6aHh4d++eUXbdq0SatWrdKaNWu0cOFCvfbaa/r5559t6onefx6/jdqTtm/frurVq9udNnToULsXqHoa9r6w+vbbb9W+fXs1bNhQffv2VbZs2eTs7KyRI0fGuEBZbJJqu8cnKipKFotFq1evtrvMx7/oGDdunNq3b6/vv/9eP//8s7p3766RI0fqt99+U65cuZKknhs3bigwMFDp06fXxx9/rPz588vd3V179+7Vhx9+GOMojZTaTmXKlFHhwoU1f/58DRw4UPPnz5dhGDajsyNGjNDgwYPVsWNHDR8+XJkyZZKTk5N69uwZ79ElcXk8rDZs2FB3795Vp06dVLlyZeXOnds67zZt2sR6/YwSJUpIenQtlKNHj+rHH3/UmjVrtHTpUk2ZMkVDhgzRRx99lOgao8X2pZq99yHp0Xna9gJhUt2S8cnfz+jl9unTJ9bbthYoUCBJli3ZX++nee9Prv37abZ/z549Vb9+fa1YsUJr167V4MGDNXLkSG3cuFEvv/zyM9UBpHYEeeA5FH1hs6NHj8YIx3fv3tXZs2dVu3btZK2hc+fO+vzzzzVo0CA1atRIFotF+fPn14EDB1SjRo04RymfVvS3/o+HvFKlSmnr1q2KioqyGR3auXOnPD094w2jhQsX1smTJxNdU86cOZU1a1bt3r07xrQnLyhWqlQpff311zpy5IiKFi1qU2v09MdlyZJFS5YsUeXKlVWjRg39+uuvz3ShqrJly2rLli26ePGi8ubNq/z58+v27dtxjiRJj/azDRs26Pbt2zaB6ujRozH6ZsyY0e4pGtGjadHy58+v9evXq1KlSs900aYn53n48OF4+yR033RyclKNGjVUo0YNjR8/XiNGjNB//vMfbdq0yWabnTx5Uk5OTnHuayVLltS6devsTsuXL1+cdSTWkiVLlC9fPi1btsxmXYcOHZosy3sW+fPnl2EY8vf3T9AXSAEBAQoICNCgQYO0fft2VapUSdOmTbNeCPJZ33c2b96sq1evatmyZTZ3tHiW94q8efPq8OHDMb7Is/d7FJfWrVtr8ODBOnjwoObNm6eCBQva3MN8yZIlql69umbMmGHzuhs3bsT5ZdPTGjVqlJYvX65PP/1U06ZNU9asWZUuXTpFRkbG+54iSV5eXmrRooVatGihiIgINW7cWJ9++qkGDBhg9wvo6L93x48ft/mduXz5cowvY6NHjG/cuGE9TUay/z4kSdmyZUtQzUkluv40adIk6P03euT6cbG9/964ccOmLSIiIsYXzQl970+o6PWJ7/33SU+7/fPnz6/evXurd+/eOn78uEqVKqVx48bp22+/ffqiARPhHHngOVSjRg25urpq6tSpMUZapk+frocPH1qvmJxcXFxc1Lt3bx05csR6C67mzZvr/Pnz+u9//xuj/7///hvn7eSkRyPuT44gPHjwQKNGjZKrq6vNyGbTpk116dIlLVu2zNp25coVLV68WPXr17d7TvrjKlSooMOHDyfoFlCxadKkiX788Ueb22Rt2LBBx44dU7NmzaxtDRo0UJo0aTRlyhRrm2EYmjZtmnLmzKmKFSvGmHeuXLm0fv16/fvvv6pVq5auXr0aZy2hoaHWWxs9LiIiQhs2bJCTk5N1pKd58+basWOH1q5dG6P/jRs39PDhQ0mPjrx4+PChpk6dap0eGRmpL7/8Msbr8ufPr7/++kuXL1+2th04cCDG1d6bN2+uyMhIDR8+PMY8Hj58GOPDaEI0adJEBw4c0PLly2NMix45Sui+ae8w+OgvWp7cV/bs2aNixYrFeWh3xowZVbNmTbuP5Ary0aNoj4+a7dy5M97bHDpC48aN5ezsrI8++ijGKJ9hGNb9/ubNm9b9MlpAQICcnJxsfi5eXl6J2oei2dt2ERERNr+7T6tOnTq6cOGClixZYm27e/durIehxyZ69H3IkCHav39/jHOlnZ2dY2zDxYsX21wfIynkz59fTZo00ezZsxUaGipnZ2c1adJES5cutRvoHn9PePJ9zNXVVUWLFpVhGDangDyuZs2aSpMmjb788kub9XvyCuvRtUmyOdc7+paEjwsKClL69Ok1YsQIu8t9vOaklC1bNlWrVk1fffWV3aO5Hl9unTp19Ntvv2nXrl020+0dzZQ/f/4Y57dPnz49xt/ThL73J1TWrFlVtWpVzZw5U2fOnLGZFteofUK3/927d62noEXLnz+/0qVL90x/uwGzYEQeeA5ly5ZNQ4YM0aBBg1S1alW98cYb8vT01Pbt2zV//nzVrl1b9evXT/Y62rdvryFDhmj06NFq2LCh3nrrLS1atEjvvvuuNm3apEqVKikyMlJ//fWXFi1apLVr19q9XVu0lStX6pNPPlHTpk3l7++va9euad68eTp8+LBGjBhhcwhe06ZN9eqrr6pDhw76888/lSVLFk2ZMkWRkZEJOkSzQYMGGj58uLZs2RLj6IVJkybpxo0bunDhgiTphx9+sF5or1u3btbgNnDgQC1evFjVq1dXjx49dPv2bY0dO1YBAQHq0KGDdX65cuVSz549NXbsWD148ECvvPKKVqxYoa1bt+q7776L9fDFAgUK6Oeff1a1atUUFBSkjRs3xnru5rlz51SuXDm99tprqlGjhnx9fRUWFqb58+frwIED6tmzp3VUrm/fvlq5cqXq1atnvZXbnTt3dOjQIS1ZskSnTp1SlixZVL9+fVWqVEn9+/fXqVOnVLRoUS1btizGecLSowtijR8/XkFBQQoJCVFYWJimTZumYsWK2ZxnHBgYqM6dO2vkyJHav3+/ateurTRp0uj48eNavHixJk6caHNRsITo27evlixZombNmqljx44qU6aMrl27ppUrV2ratGkqWbJkgvfNjz/+WL/88ovq1q2rvHnzKiwsTFOmTFGuXLlsbhH44MEDbdmyRe+///5T1ZoS6tWrp2XLlqlRo0aqW7euTp48qWnTpqlo0aK6fft2ki1n2LBh+uijj7Rp0yZVq1YtUfPInz+/PvnkEw0YMECnTp1Sw4YNlS5dOp08eVLLly/XO++8oz59+mjjxo3q2rWrmjVrppdeekkPHz7U3LlzrSEyWpkyZbR+/XqNHz9eOXLkkL+/v/U8Y4vFosDAQG3evDnWeipWrKiMGTOqXbt26t69uywWi+bOnftMhxJ36tRJkyZNUtu2bbVnzx5lz55dc+fOjfMWkfb4+/urYsWK1i9Onwzy9erV08cff6wOHTqoYsWKOnTokL777rtk+cKob9++WrRokSZMmKBRo0Zp1KhR2rRpk8qXL69OnTqpaNGiunbtmvbu3av169dbvyCrXbu2fH19ValSJfn4+OjIkSOaNGmS6tatq3Tp0tldVtasWdWnTx+NHDlS9erVU506dbRv3z6tXr06xpEGtWvXVp48eRQSEqK+ffvK2dlZM2fOVNasWW2CZvr06TV16lS99dZbKl26tFq2bGnts2rVKlWqVEmTJk1K8u0mSZMnT1blypUVEBCgTp06KV++fLp06ZJ27Nihc+fO6cCBA5Kkfv36ae7cuQoODlaPHj2st5/LmzevzTUYJOntt9/Wu+++qyZNmqhWrVo6cOCA1q5dG2P7JPS9/2l88cUXqly5skqXLq133nlH/v7+OnXqlFatWqX9+/fbfU1Ct/+xY8dUo0YNNW/eXEWLFpWLi4uWL1+uS5cuqWXLlk9VJ2BKKXR1fAAO8O233xqvvvqq4eXlZbi5uRmFCxc2PvroI5vb2hjG/26V8+Stm+zdhii228916dLFbg3Dhg2zuYVSRESEMXr0aKNYsWKGm5ubkTFjRqNMmTLGRx99ZISHh1tfZ+/2c7t37zbq169v5MyZ03B1dTXSpk1rVK5c2ea2TY+7du2aERISYmTOnNnw9PQ0AgMDjd9//z2uTWajRIkSRkhISIz26Nue2Xs8fssfwzCMw4cPG7Vr1zY8PT2NDBkyGK1btzZCQ0NjzDMyMtIYMWKEkTdvXsPV1dUoVqyY8e2339pddvTt56Lt3LnTSJcunVG1alW7t8YyDMO4efOmMXHiRCMoKMjIlSuXkSZNGiNdunRGhQoVjP/+9782twIyjEe3/xkwYIBRoEABw9XV1ciSJYtRsWJF47PPPjMiIiKs/a5evWq89dZbRvr06Q1vb2/jrbfest4K7MnbV3377bdGvnz5DFdXV6NUqVLG2rVr7e5PhmEY06dPN8qUKWN4eHgY6dKlMwICAox+/foZFy5ciHNbGIb9W91dvXrV6Nq1q3XfyZUrl9GuXTub22ElZN/csGGD0aBBAyNHjhyGq6urkSNHDqNVq1Yxbte0evVqQ5Jx/Phxuz+PpBbb7efs3ZYpKirKuq+5ubkZL7/8svHjjz/G+rtt7/ZzT95+78nbRxmGYfTu3duwWCx2b3EYmydvPxdt6dKlRuXKlQ0vLy/Dy8vLKFy4sNGlSxfj6NGjhmEYxj///GN07NjRyJ8/v+Hu7m5kypTJqF69urF+/Xqb+fz1119G1apVDQ8PD5tbk926dcuQZLRs2TLeGrdt22a8+uqrhoeHh5EjRw6jX79+xtq1a2PcKi62W2ja286nT5823njjDcPT09PIkiWL0aNHD+stF+O7/dzjJk+ebEgyypUrF2PavXv3jN69exvZs2c3PDw8jEqVKhk7duyI8fvytLefi+2Wf9WqVTPSp09v3LhxwzAMw7h06ZLRpUsXI3fu3EaaNGkMX19fo0aNGsb06dOtr/nqq6+MqlWrGpkzZzbc3NyM/PnzG3379rX522BvX4uMjDQ++ugj67pVq1bNOHz4cIzbqxmGYezZs8coX7684erqauTJk8cYP3683XlGr2NQUJDh7e1tuLu7G/nz5zfat29v7N69+5m2TVy/n4ZhGCdOnDDatm1r+Pr6GmnSpDFy5sxp1KtXz1iyZIlNv4MHDxqBgYGGu7u7kTNnTmP48OHGjBkz7G6fDz/80MiSJYvh6elpBAUFGX///bfd7ZOQ9/646n/yPcMwHv0dbNSokZEhQwbD3d3dKFSokDF48GDr9MRu/ytXrhhdunQxChcubHh5eRne3t5G+fLlY/1MADxvLIaRxFdcAYDnxNy5c9WlSxedOXPG5nxKxO3UqVPy9/fXrFmz1L59e0eX4xANGzaUxWKxezj/i6JcuXLKmzevFi9e7OhS4vXTTz+pXr16OnDggAICAhxdDpBos2fPVocOHXTy5En5+fk5uhwAyYhz5AEgFq1bt1aePHk0efJkR5cCEzly5Ih+/PFHu+f5vyhu3rypAwcO6OOPP3Z0KQmyadMmtWzZkhAPADANzpEHgFg4OTk99dV2gSJFijz1RaGeN+nTpzfVxabGjh3r6BIAAHgqjMgDAAAAAGAinCMPAAAAAICJMCIPAAAAAICJEOQBAAAAADARLnZnR1RUlC5cuKB06dLJYrE4uhwAAAAAwHPOMAzdunVLOXLkkJNT3GPuBHk7Lly4oNy5czu6DAAAAADAC+bs2bPKlStXnH0I8nakS5dO0qMNmD59egdXAwAAAAB43t28eVO5c+e25tG4EOTtiD6cPn369AR5AAAAAECKScjp3VzsDgAAAAAAEyHIAwAAAABgIgR5AAAAAABMxOHnyE+ePFljx45VaGioSpYsqS+//FLlypWz2/ePP/7QkCFDtGfPHp0+fVqff/65evbs+UzzBAAAAADELzIyUg8ePHB0Gabl7OwsFxeXJLnFuUOD/MKFC9WrVy9NmzZN5cuX14QJExQUFKSjR48qW7ZsMfrfvXtX+fLlU7NmzfTBBx8kyTwBAAAAAHG7ffu2zp07J8MwHF2KqXl6eip79uxydXV9pvlYDAf+JMqXL69XXnlFkyZNkiRFRUUpd+7c6tatm/r37x/na/38/NSzZ88YI/KJmef9+/d1//596/Poy/6Hh4dz1XoAAAAAL7TIyEgdP35cnp6eypo1a5KMKL9oDMNQRESELl++rMjISBUsWFBOTrZnut+8eVPe3t4JyqEOG5GPiIjQnj17NGDAAGubk5OTatasqR07dqToPEeOHKmPPvooUcsEAAAAgOfZgwcPZBiGsmbNKg8PD0eXY1oeHh5KkyaNTp8+rYiICLm7uyd6Xg672N2VK1cUGRkpHx8fm3YfHx+Fhoam6DwHDBig8PBw6+Ps2bOJWj4AAAAAPK8YiX92T47CJ5bDL3aXGri5ucnNzc3RZQAAAAAAEC+HjchnyZJFzs7OunTpkk37pUuX5Ovrm2rmCQAAAABAauKwEXlXV1eVKVNGGzZsUMOGDSU9ujDdhg0b1LVr11QzTwAAAABATH79V6Xo8k6Nqpuiy7MntouupzSHHlrfq1cvtWvXTmXLllW5cuU0YcIE3blzRx06dJAktW3bVjlz5tTIkSMlPbqY3Z9//mn9//nz57V//36lTZtWBQoUSNA8AQAAAADPt/jO5x86dKiGDRv21PP9/fff5eXllciqko5Dg3yLFi10+fJlDRkyRKGhoSpVqpTWrFljvVjdmTNnbC4GcOHCBb388svW55999pk+++wzBQYGavPmzQmaJwAAAADg+Xbx4kXr/xcuXKghQ4bo6NGj1ra0adNa/28YhiIjI+XiEn88zpo1a9IWmkgOO0c+WteuXXX69Gndv39fO3fuVPny5a3TNm/erNmzZ1uf+/n5yTCMGI/oEJ+QeQIAAAAAnm++vr7Wh7e3tywWi/X5X3/9pXTp0mn16tUqU6aM3Nzc9Ouvv+rEiRNq0KCBfHx8lDZtWr3yyitav369zXz9/Pw0YcIE63OLxaKvv/5ajRo1kqenpwoWLKiVK1cm+/o5PMgDAAAAAJDS+vfvr1GjRunIkSMqUaKEbt++rTp16mjDhg3at2+fgoODVb9+fZ05cybO+Xz00Udq3ry5Dh48qDp16qh169a6du1astZOkAcAAAAAvHA+/vhj1apVS/nz51emTJlUsmRJde7cWcWLF1fBggU1fPhw5c+fP94R9vbt26tVq1YqUKCARowYodu3b2vXrl3JWjtBHgAAAADwwilbtqzN89u3b6tPnz4qUqSIMmTIoLRp0+rIkSPxjsiXKFHC+n8vLy+lT59eYWFhyVJzNIde7A4AAAAAAEd48urzffr00bp16/TZZ5+pQIEC8vDwUNOmTRURERHnfNKkSWPz3GKxKCoqKsnrfRxBHgAAAADwwtu2bZvat2+vRo0aSXo0Qn/q1CnHFhULgjwAAEhxfv1XxTn91Ki6KVQJAACPFCxYUMuWLVP9+vVlsVg0ePDgZB9ZTyyCPAAASH2GeSegT3jy1wEAiNXz9qXr+PHj1bFjR1WsWFFZsmTRhx9+qJs3bzq6LLsshmEYji4itbl586a8vb0VHh6u9OnTO7ocAACeO/GOyLu/Ge88AvzzxNvnULtDCa4JAGDfvXv3dPLkSfn7+8vd3d3R5ZhaXNvyaXIoV60HAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBEXBxdAAAAAADAhIZ5p/DywlN2eakYI/IAAAAAgOeKxWKJ8zFs2LBnmveKFSuSrNbEYEQeAAAAAPBcuXjxovX/Cxcu1JAhQ3T06FFrW9q0aR1RVpJhRB4AAAAA8Fzx9fW1Pry9vWWxWGzaFixYoCJFisjd3V2FCxfWlClTrK+NiIhQ165dlT17drm7uytv3rwaOXKkJMnPz0+S1KhRI1ksFuvzlMaIPAAAAADghfHdd99pyJAhmjRpkl5++WXt27dPnTp1kpeXl9q1a6cvvvhCK1eu1KJFi5QnTx6dPXtWZ8+elST9/vvvypYtm2bNmqXg4GA5Ozs7ZB0I8gAAAACAF8bQoUM1btw4NW7cWJLk7++vP//8U1999ZXatWunM2fOqGDBgqpcubIsFovy5s1rfW3WrFklSRkyZJCvr69D6pcI8gAAAACAF8SdO3d04sQJhYSEqFOnTtb2hw8fytv70VX427dvr1q1aqlQoUIKDg5WvXr1VLt2bUeVbBdBHgAAAADwQrh9+7Yk6b///a/Kly9vMy36MPnSpUvr5MmTWr16tdavX6/mzZurZs2aWrJkSYrXGxuCPAAAAADgheDj46McOXLon3/+UevWrWPtlz59erVo0UItWrRQ06ZNFRwcrGvXrilTpkxKkyaNIiMjU7DqmAjyAAAAAIAXxkcffaTu3bvL29tbwcHBun//vnbv3q3r16+rV69eGj9+vLJnz66XX35ZTk5OWrx4sXx9fZUhQwZJj65cv2HDBlWqVElubm7KmDFjiq8DQR4AAAAA8PSGhTu6gkR5++235enpqbFjx6pv377y8vJSQECAevbsKUlKly6dxowZo+PHj8vZ2VmvvPKKfvrpJzk5Pbp7+7hx49SrVy/997//Vc6cOXXq1KkUXweLYRhGii81lbt586a8vb0VHh6u9OnTO7ocAACeO379V8U5/ZT7m/HOI8A/T7x9DrU7lOCaAAD23bt3TydPnpS/v7/c3d0dXY6pxbUtnyaHOiVnkQAAAAAAIGkR5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIm4OLoAAAAAAID5BHwTkKLLO9TuUIour1q1aipVqpQmTJiQostNCEbkAQAAAADPlfr16ys4ONjutK1bt8pisejgwYMpXFXSIcgDAAAAAJ4rISEhWrdunc6dOxdj2qxZs1S2bFmVKFHCAZUlDYI8AAAAAOC5Uq9ePWXNmlWzZ8+2ab99+7YWL16shg0bqlWrVsqZM6c8PT0VEBCg+fPnO6bYRCDIAwAAAACeKy4uLmrbtq1mz54twzCs7YsXL1ZkZKTatGmjMmXKaNWqVTp8+LDeeecdvfXWW9q1a5cDq044gjwAAAAA4LnTsWNHnThxQlu2bLG2zZo1S02aNFHevHnVp08flSpVSvny5VO3bt0UHBysRYsWObDihCPIAwAAAACeO4ULF1bFihU1c+ZMSdLff/+trVu3KiQkRJGRkRo+fLgCAgKUKVMmpU2bVmvXrtWZM2ccXHXCEOQBAAAAAM+lkJAQLV26VLdu3dKsWbOUP39+BQYGauzYsZo4caI+/PBDbdq0Sfv371dQUJAiIiIcXXKCEOQBAAAAAM+l5s2by8nJSfPmzdOcOXPUsWNHWSwWbdu2TQ0aNFCbNm1UsmRJ5cuXT8eOHXN0uQlGkAcAAAAAPJfSpk2rFi1aaMCAAbp48aLat28vSSpYsKDWrVun7du368iRI+rcubMuXbrk2GKfgoujCwAAAAAAmM+hdoccXUKChISEaMaMGapTp45y5MghSRo0aJD++ecfBQUFydPTU++8844aNmyo8PBwB1ebMAR5AAAAAMBzq0KFCja3oJOkTJkyacWKFXG+bvPmzclX1DMiyJucX/9V8fY5NapuClQCAAAAAEgJBPkXwTDveLsE+OeJc7pZDpsBAAAAgOcdF7sDAAAAAMBECPIAAAAAAJgIQR4AAAAAEK8nLxiHp5dU25AgDwAAAACIlbOzsyQpIiLCwZWY3927dyVJadKkeab5cLE7AAAAAECsXFxc5OnpqcuXLytNmjRycmI8+GkZhqG7d+8qLCxMGTJksH45klgEeQAAAABArCwWi7Jnz66TJ0/q9OnTji7H1DJkyCBfX99nng9BHgAAAAAQJ1dXVxUsWJDD659BmjRpnnkkPhpBHgAAAAAQLycnJ7m7uzu6DIiL3QEAAAAAYCoEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiTg8yE+ePFl+fn5yd3dX+fLltWvXrjj7L168WIULF5a7u7sCAgL0008/2Uy/ffu2unbtqly5csnDw0NFixbVtGnTknMVAAAAAABIMQ4N8gsXLlSvXr00dOhQ7d27VyVLllRQUJDCwsLs9t++fbtatWqlkJAQ7du3Tw0bNlTDhg11+PBha59evXppzZo1+vbbb3XkyBH17NlTXbt21cqVK1NqtQAAAAAASDYODfLjx49Xp06d1KFDB+vIuaenp2bOnGm3/8SJExUcHKy+ffuqSJEiGj58uEqXLq1JkyZZ+2zfvl3t2rVTtWrV5Ofnp3feeUclS5aMd6QfAAAAAAAzcFiQj4iI0J49e1SzZs3/FePkpJo1a2rHjh12X7Njxw6b/pIUFBRk079ixYpauXKlzp8/L8MwtGnTJh07dky1a9eOtZb79+/r5s2bNg8AAAAAAFIjhwX5K1euKDIyUj4+PjbtPj4+Cg0Ntfua0NDQePt/+eWXKlq0qHLlyiVXV1cFBwdr8uTJqlq1aqy1jBw5Ut7e3tZH7ty5n2HNAAAAAABIPg6/2F1S+/LLL/Xbb79p5cqV2rNnj8aNG6cuXbpo/fr1sb5mwIABCg8Ptz7Onj2bghUDAAAAAJBwLo5acJYsWeTs7KxLly7ZtF+6dEm+vr52X+Pr6xtn/3///VcDBw7U8uXLVbduXUlSiRIltH//fn322WcxDsuP5ubmJjc3t2ddJQAAAAAAkp3DRuRdXV1VpkwZbdiwwdoWFRWlDRs2qEKFCnZfU6FCBZv+krRu3Tpr/wcPHujBgwdycrJdLWdnZ0VFRSXxGgAAAAAAkPIcNiIvPbpVXLt27VS2bFmVK1dOEyZM0J07d9ShQwdJUtu2bZUzZ06NHDlSktSjRw8FBgZq3Lhxqlu3rhYsWKDdu3dr+vTpkqT06dMrMDBQffv2lYeHh/LmzastW7Zozpw5Gj9+vMPWEwAAAACApOLQIN+iRQtdvnxZQ4YMUWhoqEqVKqU1a9ZYL2h35swZm9H1ihUrat68eRo0aJAGDhyoggULasWKFSpevLi1z4IFCzRgwAC1bt1a165dU968efXpp5/q3XffTfH1AwAAAAAgqVkMwzAcXURqc/PmTXl7eys8PFzp06d3dDlx8uu/Kt4+p9zfjLdPgH+eOKcfancowTUBABCf+P5+JcXfLom/XwAA83iaHPrcXbUeAAAAAIDnGUEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBEXRxcAAAAAIHUJ+CYgzumH2h1KoUoA2MOIPAAAAAAAJsKIPAAAAPCc8Ou/Kt4+p9zfjH9G/nmSoBoAyYUReQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATMTF0QUAAF4sfv1XxdvnlPub8c9oWHgSVAMAAGA+BHkAgCkFfBMQ5/RD7Q6lUCUAAAApi0PrAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABNxcXQBAAAAAAAkVsA3AfH2OdTuUApUknIYkQcAAAAAwEQYkQcAAAAAOIRf/1Xx9jnl/mbcHfzzJFE15sGIPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJuLi6AIAwNSGecczPTxl6gAAAMALgxF5AAAAAABMxOFBfvLkyfLz85O7u7vKly+vXbt2xdl/8eLFKly4sNzd3RUQEKCffvopRp8jR47ojTfekLe3t7y8vPTKK6/ozJkzybUKAAAAAACkGIceWr9w4UL16tVL06ZNU/ny5TVhwgQFBQXp6NGjypYtW4z+27dvV6tWrTRy5EjVq1dP8+bNU8OGDbV3714VL15cknTixAlVrlxZISEh+uijj5Q+fXr98ccfcnd3T+nVQ2LEd5iyxKHKSDF+/VfF2+cUby0AAABIYQ4N8uPHj1enTp3UoUMHSdK0adO0atUqzZw5U/3794/Rf+LEiQoODlbfvn0lScOHD9e6des0adIkTZs2TZL0n//8R3Xq1NGYMWOsr8ufP38KrA3ik1ShKOCbgDinH2p3KKElAQAAAIDpOCzIR0REaM+ePRowYIC1zcnJSTVr1tSOHTvsvmbHjh3q1auXTVtQUJBWrFghSYqKitKqVavUr18/BQUFad++ffL399eAAQPUsGHDWGu5f/++7t+/b31+8+bNxK8YADwmvi+eJL58AgAAwNNx2DnyV65cUWRkpHx8fGzafXx8FBoaavc1oaGhcfYPCwvT7du3NWrUKAUHB+vnn39Wo0aN1LhxY23ZsiXWWkaOHClvb2/rI3fu3M+4dgAAAAAAJA+HX+wuKUVFRUmSGjRooA8++EClSpVS//79Va9ePeuh9/YMGDBA4eHh1sfZs2dTqmQAAAAAAJ6Kww6tz5Ili5ydnXXp0iWb9kuXLsnX19fua3x9fePsnyVLFrm4uKho0aI2fYoUKaJff/011lrc3Nzk5uaWmNUAAAAAACBFOWxE3tXVVWXKlNGGDRusbVFRUdqwYYMqVKhg9zUVKlSw6S9J69ats/Z3dXXVK6+8oqNHj9r0OXbsmPLmzZvEawAAAAAAQMpz6FXre/XqpXbt2qls2bIqV66cJkyYoDt37livYt+2bVvlzJlTI0eOlCT16NFDgYGBGjdunOrWrasFCxZo9+7dmj59unWeffv2VYsWLVS1alVVr15da9as0Q8//KDNmzc7YhUBAAAAAEhSDg3yLVq00OXLlzVkyBCFhoaqVKlSWrNmjfWCdmfOnJGT0/8OGqhYsaLmzZunQYMGaeDAgSpYsKBWrFhhvYe8JDVq1EjTpk3TyJEj1b17dxUqVEhLly5V5cqVU3z9AAAAAABIag4N8pLUtWtXde3a1e40e6PozZo1U7NmzeKcZ8eOHdWxY8ekKA8AAAAAgFTlubpqPQAAAAAAz7tEBfl//vknqesAAAAAAAAJkKggX6BAAVWvXl3ffvut7t27l9Q1AQAAAACAWCQqyO/du1clSpRQr1695Ovrq86dO2vXrl1JXRsAAAAAAHhCooJ8qVKlNHHiRF24cEEzZ87UxYsXVblyZRUvXlzjx4/X5cuXk7pOAAAAAACgZ7zYnYuLixo3bqzFixdr9OjR+vvvv9WnTx/lzp1bbdu21cWLF5OqTgAAAAAAoGcM8rt379b777+v7Nmza/z48erTp49OnDihdevW6cKFC2rQoEFS1QkAAAAAAJTI+8iPHz9es2bN0tGjR1WnTh3NmTNHderUkZPTo+8F/P39NXv2bPn5+SVlrQAAAAAAvPASFeSnTp2qjh07qn379sqePbvdPtmyZdOMGTOeqTgAAAAAiEvANwFxTj/U7lAKVQKknEQF+ePHj8fbx9XVVe3atUvM7AEAAAA85/z6r4q3zyn3N+OfkX+eJKgGMJdEnSM/a9YsLV68OEb74sWL9c033zxzUQAAAAAAwL5EBfmRI0cqS5YsMdqzZcumESNGPHNRAAAAAADAvkQF+TNnzsjf3z9Ge968eXXmzJlnLgoAAAAAANiXqCCfLVs2HTx4MEb7gQMHlDlz5mcuCgAAAAAA2JeoIN+qVSt1795dmzZtUmRkpCIjI7Vx40b16NFDLVu2TOoaAQAAAADA/0vUVeuHDx+uU6dOqUaNGnJxeTSLqKgotW3blnPkAQBwkPiuAH1qVN0UqgQAACSnRAV5V1dXLVy4UMOHD9eBAwfk4eGhgIAA5c2bN6nrAwAAKYj7MQMAkPolKshHe+mll/TSSy8lVS0AACA5DfOOvw/3YwYAINVLVJCPjIzU7NmztWHDBoWFhSkqKspm+saNG5OkOABJJCEf3oeFJ38dAAAAAJ5ZooJ8jx49NHv2bNWtW1fFixeXxWJJ6roAAAAAAIAdiQryCxYs0KJFi1SnTp2krgdAIsR7gSv3FCoEAAAAQLJL9MXuChQokNS1AHCg+C5wJXGRKwAAACA1SNR95Hv37q2JEyfKMIykrgcAAAAAAMQhUSPyv/76qzZt2qTVq1erWLFiSpMmjc30ZcuWJUlxAAAAAADAVqKCfIYMGdSoUaOkrgUAAAAAAMQjUUF+1qxZSV0HAAAAAABIgESdIy9JDx8+1Pr16/XVV1/p1q1bkqQLFy7o9u3bSVYcAAAAAACwlagR+dOnTys4OFhnzpzR/fv3VatWLaVLl06jR4/W/fv3NW3atKSuEwAAAAAAKJEj8j169FDZsmV1/fp1eXh4WNsbNWqkDRs2JFlxAAAAAADAVqJG5Ldu3art27fL1dXVpt3Pz0/nz59PksIAAAAAAEBMiRqRj4qKUmRkZIz2c+fOKV26dM9cFAAAAAAAsC9RQb527dqaMGGC9bnFYtHt27c1dOhQ1alTJ6lqAwAAAAAAT0jUofXjxo1TUFCQihYtqnv37unNN9/U8ePHlSVLFs2fPz+pawQAAAAAAP8vUUE+V65cOnDggBYsWKCDBw/q9u3bCgkJUevWrW0ufgcAAABACvgmIN4+h9odSoFKADwPEhXkJcnFxUVt2rRJyloAAAAAAEA8EhXk58yZE+f0tm3bJqoYAAAAwJSGecc93T9PytQB4IWQqCDfo0cPm+cPHjzQ3bt35erqKk9PT4I8AAB4ccQX4CRpWHjy14Fk49d/Vbx9TrmnQCEA8P8SFeSvX78eo+348eN677331Ldv32cuCgAAILWIL8QlJMBxfjQAICkl6vZz9hQsWFCjRo2KMVoPAAAAAACSTpIFeenRBfAuXLiQlLMEAAAAAACPSdSh9StXrrR5bhiGLl68qEmTJqlSpUpJUhgAAAAAAIgpUUG+YcOGNs8tFouyZs2q1157TePGjUuKugAAAAAAgB2JCvJRUVFJXQcAAAAAAEiAJD1HHgAAAAAAJK9Ejcj36tUrwX3Hjx+fmEUAAAAAAAA7EhXk9+3bp3379unBgwcqVKiQJOnYsWNydnZW6dKlrf0sFkvSVAkAAAAAACQlMsjXr19f6dKl0zfffKOMGTNKkq5fv64OHTqoSpUq6t27d5IWCQAAAAAAHknUOfLjxo3TyJEjrSFekjJmzKhPPvmEq9YDAAAAAJCMEhXkb968qcuXL8dov3z5sm7duvXMRQEAAAAAAPsSFeQbNWqkDh06aNmyZTp37pzOnTunpUuXKiQkRI0bN07qGgEAAAAAwP9L1Dny06ZNU58+ffTmm2/qwYMHj2bk4qKQkBCNHTs2SQsEAAAAAAD/k6gg7+npqSlTpmjs2LE6ceKEJCl//vzy8vJK0uIAAAAAAICtRB1aH+3ixYu6ePGiChYsKC8vLxmGkVR1AQAAAAAAOxIV5K9evaoaNWropZdeUp06dXTx4kVJUkhICLeeAwAAAAAgGSUqyH/wwQdKkyaNzpw5I09PT2t7ixYttGbNmiQrDgAAAAAA2ErUOfI///yz1q5dq1y5ctm0FyxYUKdPn06SwgAAAAAAQEyJGpG/c+eOzUh8tGvXrsnNze2ZiwIAAAAAAPYlKshXqVJFc+bMsT63WCyKiorSmDFjVL169SQrDgAAAAAA2ErUofVjxoxRjRo1tHv3bkVERKhfv376448/dO3aNW3bti2pawQAAAAAAP8vUSPyxYsX17Fjx1S5cmU1aNBAd+7cUePGjbVv3z7lz58/qWsEAAAAAAD/76lH5B88eKDg4GBNmzZN//nPf5KjJgAAAAAAEIunHpFPkyaNDh48mBy1AAAAAACAeCTq0Po2bdpoxowZSV0LAAAAAACIR6Iudvfw4UPNnDlT69evV5kyZeTl5WUzffz48UlSHAAAAAAAsPVUQf6ff/6Rn5+fDh8+rNKlS0uSjh07ZtPHYrEkXXUAAAAAAMDGUwX5ggUL6uLFi9q0aZMkqUWLFvriiy/k4+OTLMUBAAAAAABbT3WOvGEYNs9Xr16tO3fuJGlBAAAAAAAgdom62F20J4M9AAAAAABIXk8V5C0WS4xz4DknHgAAAACAlPNU58gbhqH27dvLzc1NknTv3j29++67Ma5av2zZsqSrEAAAAAAAWD1VkG/Xrp3N8zZt2iRpMQAAAAAAIG5PFeRnzZqVXHUAAAAAAIAEeKaL3QEAAAAAgJRFkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADCRVBHkJ0+eLD8/P7m7u6t8+fLatWtXnP0XL16swoULy93dXQEBAfrpp59i7fvuu+/KYrFowoQJSVw1AAAAAAApz+FBfuHCherVq5eGDh2qvXv3qmTJkgoKClJYWJjd/tu3b1erVq0UEhKiffv2qWHDhmrYsKEOHz4co+/y5cv122+/KUeOHMm9GgAAAAAApAiHB/nx48erU6dO6tChg4oWLapp06bJ09NTM2fOtNt/4sSJCg4OVt++fVWkSBENHz5cpUuX1qRJk2z6nT9/Xt26ddN3332nNGnSpMSqAAAAAACQ7FwcufCIiAjt2bNHAwYMsLY5OTmpZs2a2rFjh93X7NixQ7169bJpCwoK0ooVK6zPo6Ki9NZbb6lv374qVqxYvHXcv39f9+/ftz6/efPmU64Jnkd+/VfFOf3UqLrxziPgm4A4px9qd+ipagIAAAAAhwb5K1euKDIyUj4+PjbtPj4++uuvv+y+JjQ01G7/0NBQ6/PRo0fLxcVF3bt3T1AdI0eO1EcfffSU1eOFN8w7/j7+eZK/DgAAAAAvFIcfWp/U9uzZo4kTJ2r27NmyWCwJes2AAQMUHh5ufZw9ezaZqwQAAAAAIHEcOiKfJUsWOTs769KlSzbtly5dkq+vr93X+Pr6xtl/69atCgsLU548/xsJjYyMVO/evTVhwgSdOnUqxjzd3Nzk5ub2jGsDAACAhIrvFDYp/tPY4juFTeI0NgDPJ4eOyLu6uqpMmTLasGGDtS0qKkobNmxQhQoV7L6mQoUKNv0lad26ddb+b731lg4ePKj9+/dbHzly5FDfvn21du3a5FsZAAAAAABSgENH5CWpV69eateuncqWLaty5cppwoQJunPnjjp06CBJatu2rXLmzKmRI0dKknr06KHAwECNGzdOdevW1YIFC7R7925Nnz5dkpQ5c2ZlzpzZZhlp0qSRr6+vChUqlLIrBwAAgMSL73o0XIsGwAvK4UG+RYsWunz5soYMGaLQ0FCVKlVKa9assV7Q7syZM3Jy+t+BAxUrVtS8efM0aNAgDRw4UAULFtSKFStUvHhxR60CAAAAAAApxuFBXpK6du2qrl272p22efPmGG3NmjVTs2bNEjx/e+fFAwAAAABgRs/dVesBAAAAAHiepYoReQBACojvXNNh4SlTBwAAAJ4JI/IAAAAAAJgII/IA8BxI0P2Y3eOezv2YAQAAzIEReQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwERdHFwAAAAAASF0CvgmIt8+hdodSoBLYQ5AHAAAAgOeIX/9VcU4/5f5m/DPxz5NE1SA5cGg9AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAibg4ugAAAACkHgHfBMTb51C7QylQCQAgNgR5AABi4dd/Vbx9Trm/GW+fAP88cU4nFCGpJGifHVU3BSoBACQngjwAAMCLZJh33NPj+eIJAOB4nCMPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwERSRZCfPHmy/Pz85O7urvLly2vXrl1x9l+8eLEKFy4sd3d3BQQE6KeffrJOe/DggT788EMFBATIy8tLOXLkUNu2bXXhwoXkXg0AAAAAAJKdw4P8woUL1atXLw0dOlR79+5VyZIlFRQUpLCwMLv9t2/frlatWikkJET79u1Tw4YN1bBhQx0+fFiSdPfuXe3du1eDBw/W3r17tWzZMh09elRvvPFGSq4WAAAAAADJwuFBfvz48erUqZM6dOigokWLatq0afL09NTMmTPt9p84caKCg4PVt29fFSlSRMOHD1fp0qU1adIkSZK3t7fWrVun5s2bq1ChQnr11Vc1adIk7dmzR2fOnEnJVQMAAAAAIMk5NMhHRERoz549qlmzprXNyclJNWvW1I4dO+y+ZseOHTb9JSkoKCjW/pIUHh4ui8WiDBky2J1+//593bx50+YBAAAAAEBq5NAgf+XKFUVGRsrHx8em3cfHR6GhoXZfExoa+lT97927pw8//FCtWrVS+vTp7fYZOXKkvL29rY/cuXMnYm0AAAAAAEh+Dj+0Pjk9ePBAzZs3l2EYmjp1aqz9BgwYoPDwcOvj7NmzKVglAAAAAAAJ5+LIhWfJkkXOzs66dOmSTfulS5fk6+tr9zW+vr4J6h8d4k+fPq2NGzfGOhovSW5ubnJzc0vkWgAAAAAAkHIcOiLv6uqqMmXKaMOGDda2qKgobdiwQRUqVLD7mgoVKtj0l6R169bZ9I8O8cePH9f69euVOXPm5FkBAAAAAABSmENH5CWpV69eateuncqWLaty5cppwoQJunPnjjp06CBJatu2rXLmzKmRI0dKknr06KHAwECNGzdOdevW1YIFC7R7925Nnz5d0qMQ37RpU+3du1c//vijIiMjrefPZ8qUSa6uro5ZUQAAAAAAkoDDg3yLFi10+fJlDRkyRKGhoSpVqpTWrFljvaDdmTNn5OT0vwMHKlasqHnz5mnQoEEaOHCgChYsqBUrVqh48eKSpPPnz2vlypWSpFKlStksa9OmTapWrVqKrBcAAAAAAMnB4UFekrp27aquXbvanbZ58+YYbc2aNVOzZs3s9vfz85NhGElZHgAAAAAAqcZzfdV6AAAAAACeNwR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMxMXRBQB4sfj1XxXn9FOj6qZQJQAAAIA5EeQBmE7ANwHx9jnU7lAKVAIAAACkPII8gNRlmHf8ffzzJH8dAAAAQCrFOfIAAAAAAJgII/IAAAAAEI94r/Pj/ma88wiI56hCTg1EQjEiDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiaSKID958mT5+fnJ3d1d5cuX165du+Lsv3jxYhUuXFju7u4KCAjQTz/9ZDPdMAwNGTJE2bNnl4eHh2rWrKnjx48n5yoAAAAAAJAiHB7kFy5cqF69emno0KHau3evSpYsqaCgIIWFhdntv337drVq1UohISHat2+fGjZsqIYNG+rw4cPWPmPGjNEXX3yhadOmaefOnfLy8lJQUJDu3buXUqsFAAAAAECycHiQHz9+vDp16qQOHTqoaNGimjZtmjw9PTVz5ky7/SdOnKjg4GD17dtXRYoU0fDhw1W6dGlNmjRJ0qPR+AkTJmjQoEFq0KCBSpQooTlz5ujChQtasWJFCq4ZAAAAAABJz8WRC4+IiNCePXs0YMAAa5uTk5Nq1qypHTt22H3Njh071KtXL5u2oKAga0g/efKkQkNDVbNmTet0b29vlS9fXjt27FDLli1jzPP+/fu6f/++9Xl4eLgk6ebNm4let5QSdf9uvH1uWox4+0T+Gxn3PJJgW5ipVin+el+0WqWU2Q/MVKsUf73UGpOZ3gvMVKvE+5bd+fC+FXM5z9F7AbXGZKb3LTPVKvEea3c+qWQ/SKlak1t0jYYR/89HhgOdP3/ekGRs377dpr1v375GuXLl7L4mTZo0xrx582zaJk+ebGTLls0wDMPYtm2bIcm4cOGCTZ9mzZoZzZs3tzvPoUOHGpJ48ODBgwcPHjx48ODBgwcPhz7Onj0bb5Z26Ih8ajFgwACbUf6oqChdu3ZNmTNnlsVicWBlSe/mzZvKnTu3zp49q/Tp0zu6nDiZqVbJXPVSa/Kg1uRBrcnDTLVK5qqXWpMHtSYPak0+ZqqXWlMHwzB069Yt5ciRI96+Dg3yWbJkkbOzsy5dumTTfunSJfn6+tp9ja+vb5z9o/+9dOmSsmfPbtOnVKlSdufp5uYmNzc3m7YMGTI8zaqYTvr06U2z45upVslc9VJr8qDW5EGtycNMtUrmqpdakwe1Jg9qTT5mqpdaHc/b2ztB/Rx6sTtXV1eVKVNGGzZssLZFRUVpw4YNqlChgt3XVKhQwaa/JK1bt87a39/fX76+vjZ9bt68qZ07d8Y6TwAAAAAAzMLhh9b36tVL7dq1U9myZVWuXDlNmDBBd+7cUYcOHSRJbdu2Vc6cOTVy5EhJUo8ePRQYGKhx48apbt26WrBggXbv3q3p06dLkiwWi3r27KlPPvlEBQsWlL+/vwYPHqwcOXKoYcOGjlpNAAAAAACShMODfIsWLXT58mUNGTJEoaGhKlWqlNasWSMfHx9J0pkzZ+Tk9L8DBypWrKh58+Zp0KBBGjhwoAoWLKgVK1aoePHi1j79+vXTnTt39M477+jGjRuqXLmy1qxZI3d39xRfv9TGzc1NQ4cOjXEqQWpkplolc9VLrcmDWpMHtSYPM9Uqmateak0e1Jo8qDX5mKleajUfi2Ek5Nr2AAAAAAAgNXDoOfIAAAAAAODpEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8i+YHTt2yNnZWXXr1nV0KbFq3769LBaLLBaL0qRJI39/f/Xr10/37t1zdGkxRNc6atQom/YVK1bIYrE4qCr7ntyuPj4+qlWrlmbOnKmoqChHl2dXaGiounXrpnz58snNzU25c+dW/fr1tWHDBkeXZuPxbfv4Izg42NGlmV5oaKh69OihAgUKyN3dXT4+PqpUqZKmTp2qu3fvOro8q/bt29u9xenmzZtlsVh048aNFK8pIWKrOzUxQ43R7NW6ZMkSubu7a9y4cY4p6gnR71fvvvtujGldunSRxWJR+/btU76wOMT2Hvv33387urQYHq/V1dVVBQoU0Mcff6yHDx86ujS7Ll++rPfee0958uSRm5ubfH19FRQUpG3btjm6NCt7P/vHH8OGDXN0iZo2bZrSpUtn83O+ffu20qRJo2rVqtn0jf67cOLEiRSu0pZhGKpZs6aCgoJiTJsyZYoyZMigc+fOOaCymOrXrx/rZ6qtW7fKYrHo4MGDKVyV4xHkXzAzZsxQt27d9Msvv+jChQuOLidWwcHBunjxov755x99/vnn+uqrrzR06FBHl2WXu7u7Ro8erevXrzu6lHhFb9dTp05p9erVql69unr06KF69eqlug8Zp06dUpkyZbRx40aNHTtWhw4d0po1a1S9enV16dLF0eXFEL1tH3/Mnz/f0WXFcPbsWXXs2FE5cuSQq6ur8ubNqx49eujq1auOLi2Gf/75Ry+//LJ+/vlnjRgxQvv27dOOHTvUr18//fjjj1q/fr2jSwTi9PXXX6t169aaOnWqevfu7ehyrHLnzq0FCxbo33//tbbdu3dP8+bNU548eRxYWezsvcf6+/s7uiy7oms9fvy4evfurWHDhmns2LGOLsuuJk2aaN++ffrmm2907NgxrVy5UtWqVUtVfxMe/5lPmDBB6dOnt2nr06ePo0tU9erVdfv2be3evdvatnXrVvn6+mrnzp02g1GbNm1Snjx5lD9/fkeUamWxWDRr1izt3LlTX331lbX95MmT6tevn7788kvlypXLgRX+T0hIiNatW2f3i4VZs2apbNmyKlGihAMqcyyH30ceKef27dtauHChdu/erdDQUM2ePVsDBw50dFl2RX8rLD36wFGzZk2tW7dOo0ePdnBlMdWsWVN///23Ro4cqTFjxji6nDg9vl1z5syp0qVL69VXX1WNGjU0e/Zsvf322w6u8H/ef/99WSwW7dq1S15eXtb2YsWKqWPHjg6szL7Ht21q9c8//6hChQp66aWXNH/+fPn7++uPP/5Q3759tXr1av3222/KlCmTo8u0ev/99+Xi4qLdu3fb7AP58uVTgwYNxN1TkZqNGTNGQ4cO1YIFC9SoUSNHl2OjdOnSOnHihJYtW6bWrVtLkpYtW6Y8efKk2nBshvfYaI/X+t5772n58uVauXKlBgwY4ODKbN24cUNbt27V5s2bFRgYKEnKmzevypUr5+DKbD3+c/f29pbFYkl1+0KhQoWUPXt2bd68Wa+++qqkRyPvDRo00MaNG/Xbb79ZR+Y3b96s6tWrO7Da/8mdO7cmTpyorl27qnbt2vLz81NISIhq166tt956y9HlWdWrV09Zs2bV7NmzNWjQIGv77du3tXjx4lT7RVlyY0T+BbJo0SIVLlxYhQoVUps2bTRz5kxTfBA+fPiwtm/fLldXV0eXYpezs7NGjBihL7/8MtUcgvQ0XnvtNZUsWVLLli1zdClW165d05o1a9SlSxebABctQ4YMKV/Uc6BLly5ydXXVzz//rMDAQOXJk0evv/661q9fr/Pnz+s///mPo0u0unr1qn7++edY9wFJqe70FSDahx9+qOHDh+vHH39MdSE+WseOHTVr1izr85kzZ6pDhw4OrOj55eHhoYiICEeXEUPatGmVNm1arVixQvfv33d0OaZXvXp1bdq0yfp806ZNqlatmgIDA63t//77r3bu3JlqgrwktWvXTjVq1FDHjh01adIkHT582GaEPjVwcXFR27ZtNXv2bJvssnjxYkVGRqpVq1YOrM5xCPIvkBkzZqhNmzaSHh32FR4eri1btji4Kvt+/PFHpU2bVu7u7goICFBYWJj69u3r6LJi1ahRI5UqVSrVHv4fn8KFC+vUqVOOLsPq77//lmEYKly4sKNLSbDoffbxx4gRIxxdltW1a9e0du1avf/++/Lw8LCZ5uvrq9atW2vhwoWp5su96H2gUKFCNu1ZsmSxbt8PP/zQQdXZZ28feP311x1dFlLY6tWrNWbMGH3//feqUaOGo8uJVZs2bfTrr7/q9OnTOn36tLZt22b9jJAaPfn71axZM0eXFC/DMLR+/XqtXbtWr732mqPLicHFxUWzZ8/WN998owwZMqhSpUoaOHDgC3mucVKoXr26tm3bpocPH+rWrVvat2+fAgMDVbVqVW3evFnSo2tV3b9/P1UFeUmaPn26Dh8+rJ49e2r69OnKmjWro0uKoWPHjjpx4oRNdpk1a5aaNGkib29vB1bmOBxa/4I4evSodu3apeXLl0t69ObdokULzZgxI8ZFOFKD6tWra+rUqbpz544+//xzubi4qEmTJo4uK06jR4/Wa6+9lirO1XpahmGkqtHN1BImn0b0Pvu41HSY+vHjx2UYhooUKWJ3epEiRXT9+nVdvnxZ2bJlS+HqEm7Xrl2KiopS69atU90Ikr19YOfOnak6HCHplShRQleuXNHQoUNVrlw5pU2b1tEl2ZU1a1bVrVvXOsJVt25dZcmSxdFlxerJ36/YjtRJDaK/dHjw4IGioqL05ptvpooLstnTpEkT1a1bV1u3btVvv/1m/SLq66+/TnUXPUztqlWrpjt37uj333/X9evX9dJLLylr1qwKDAxUhw4ddO/ePW3evFn58uVLddeiyJYtmzp37qwVK1ak2ouLFi5cWBUrVtTMmTNVrVo1/f3339q6das+/vhjR5fmMAT5F8SMGTP08OFD5ciRw9pmGIbc3Nw0adKkVPdNlpeXlwoUKCDp0eF+JUuW1IwZMxQSEuLgymJXtWpVBQUFacCAAab743fkyJFUdV5kwYIFZbFY9Ndffzm6lAR7fJ9NzeL7kiS1nMJSoEABWSwWHT161KY9X758khTjqILUwN4+YMbTbfBscubMqSVLlqh69eoKDg7W6tWrlS5dOkeXZVfHjh3VtWtXSdLkyZMdXE3czPIeK/3vSwdXV1flyJFDLi6p++O2u7u7atWqpVq1amnw4MF6++23NXToUNN9lnG0AgUKKFeuXNq0aZOuX79uve5Ajhw5lDt3bm3fvl2bNm1KlUdnSI8G+VL7vhoSEqJu3bpp8uTJmjVrlvLnz2/dzi8iDq1/ATx8+FBz5szRuHHjtH//fuvjwIEDypEjR6q8svbjnJycNHDgQA0aNMjmCrup0ahRo/TDDz9ox44dji4lwTZu3KhDhw6lqiMeMmXKpKCgIE2ePFl37tyJMT213sorNYsOxkeOHLE7/ciRI8qaNWuquf5A5syZVatWLU2aNMnuPgCkZnnz5tWWLVsUGhqq4OBg3bp1y9El2RUcHKyIiAg9ePDA7i2okDjRXzrkyZMn1Qcje4oWLcr7biJVr15dmzdv1ubNm22OeK1atapWr16tXbt2pbrD6s2kefPmcnJy0rx58zRnzhx17NgxVR1RmtII8i+AH3/8UdevX1dISIiKFy9u82jSpIlmzJjh6BLj1axZMzk7O6f6EYOAgAC1bt1aX3zxhaNLsev+/fsKDQ3V+fPntXfvXo0YMUINGjRQvXr11LZtW0eXZ2Py5MmKjIxUuXLltHTpUh0/flxHjhzRF198oQoVKji6vBiit+3jjytXrji6LKvoYDxlypQYX4iFhobqu+++S3WjL1OmTNHDhw9VtmxZLVy4UEeOHNHRo0f17bff6q+//pKzs7OjS0QKCQ8Pt/kiev/+/Tp79qyjy4pT7ty5tXnzZoWFhSkoKEg3b950dEkxODs768iRI/rzzz/5fXoBXb16Va+99pq+/fZbHTx4UCdPntTixYs1ZswYNWjQwNHlmVL16tX166+/av/+/TYjxYGBgfrqq68UERFBkH8GadOmVYsWLTRgwABdvHgx1X1uSWkE+RfAjBkzVLNmTbuHzzdp0kS7d+9O9Rc2cXFxUdeuXTVmzJhU/y3xxx9/rKioKEeXYdeaNWuUPXt2+fn5KTg4WJs2bdIXX3yh77//PtV9iMuXL5/27t2r6tWrq3fv3ipevLhq1aqlDRs2xDgPOTWI3raPPypXruzosmxMmjRJ9+/fV1BQkH755RedPXtWa9asUa1atfTSSy9pyJAhji7RRv78+bVv3z7VrFlTAwYMUMmSJVW2bFl9+eWX6tOnj4YPH+7oEpFCNm/erJdfftnm8dFHHzm6rHjlypVLmzdv1pUrV1JtmE+fPr3Sp0/v6DLgAGnTplX58uX1+eefq2rVqipevLgGDx6sTp06adKkSY4uz5SqV6+uf//9VwUKFJCPj4+1PTAwULdu3bLepg6JFxISouvXrysoKMjmlOEXkcUw41WlAACJcurUKQ0bNkxr1qxRWFiYDMNQ48aNNXfuXHl6ejq6PAAAACQAQR4AXmBDhw7V+PHjtW7dOr366quOLgcAAAAJQJAHgBfcrFmzFB4eru7du8vJiTOuAAAAUjuCPAAAAAAAJsLQCwAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAABBDtWrV1LNnT0eXEa/Zs2crQ4YMcfYZNmyYSpUqlSL1AACQEgjyAACY3I4dO+Ts7Ky6desm2TyXLVum4cOHJ9n84hMUFCRnZ2f9/vvvT/W6Fi1a6NixY8lUFQAAqRNBHgAAk5sxY4a6deumX375RRcuXEiSeWbKlEnp0qVLknnF58yZM9q+fbu6du2qmTNnPtVrPTw8lC1btmSqDACA1IkgDwCAid2+fVsLFy7Ue++9p7p162r27Nk20zdv3iyLxaK1a9fq5ZdfloeHh1577TWFhYVp9erVKlKkiNKnT68333xTd+/etb7uyUPr/fz8NGLECHXs2FHp0qVTnjx5NH36dJtlHTp0SK+99po8PDyUOXNmvfPOO7p9+3a86zBr1izVq1dP7733nubPn69///3XZvqNGzfUuXNn+fj4yN3dXcWLF9ePP/4oyf6h9aNGjZKPj4/SpUunkJAQ3bt3LwFbEgAA8yDIAwBgYosWLVLhwoVVqFAhtWnTRjNnzpRhGDH6DRs2TJMmTdL27dt19uxZNW/eXBMmTNC8efO0atUq/fzzz/ryyy/jXNa4ceNUtmxZ7du3T++//77ee+89HT16VJJ0584dBQUFKWPGjPr999+1ePFirV+/Xl27do1znoZhaNasWWrTpo0KFy6sAgUKaMmSJdbpUVFRev3117Vt2zZ9++23+vPPPzVq1Cg5OzvHuj2GDRumESNGaPfu3cqePbumTJkS32YEAMBUCPIAAJjYjBkz1KZNG0lScHCwwsPDtWXLlhj9PvnkE1WqVEkvv/yyQkJCtGXLFk2dOlUvv/yyqlSpoqZNm2rTpk1xLqtOnTp6//33VaBAAX344YfKkiWL9TXz5s3TvXv3NGfOHBUvXlyvvfaaJk2apLlz5+rSpUuxznP9+vW6e/eugoKCJElt2rTRjBkzbKbv2rVLy5YtU61atZQvXz7Vq1dPr7/+ut35TZgwQSEhIQoJCVGhQoX0ySefqGjRonFvRAAATIYgDwCASR09elS7du1Sq1atJEkuLi5q0aKFTRCOVqJECev/fXx85OnpqXz58tm0hYWFxbm8x+dhsVjk6+trfc2RI0dUsmRJeXl5WftUqlRJUVFR1lF7e2bOnKkWLVrIxcVFktSqVStt27ZNJ06ckCTt379fuXLl0ksvvRRnbdGOHDmi8uXL27RVqFAhQa8FAMAsCPIAAJjUjBkz9PDhQ+XIkUMuLi5ycXHR1KlTtXTpUoWHh9v0TZMmjfX/FovF5nl0W1RUVJzLS8xr4nLt2jUtX75cU6ZMsdafM2dOPXz40HrROw8Pj0TPHwCA5xVBHgAAE3r48KHmzJmjcePGaf/+/dbHgQMHlCNHDs2fPz9F6ylSpIgOHDigO3fuWNu2bdsmJycnFSpUyO5rvvvuO+XKlUsHDhywWYdx48Zp9uzZioyMVIkSJXTu3LkE32KuSJEi2rlzp03bb7/9lvgVAwAgFSLIAwBgQj/++KOuX7+ukJAQFS9e3ObRpEkTu4fXJ6fWrVvL3d1d7dq10+HDh7Vp0yZ169ZNb731lnx8fOy+ZsaMGWratGmM+kNCQnTlyhWtWbNGgYGBqlq1qpo0aaJ169bp5MmTWr16tdasWWN3nj169NDMmTM1a9YsHTt2TEOHDtUff/yRnKsOAECKI8gDAGBCM2bMUM2aNeXt7R1jWpMmTbR7924dPHgwxerx9PTU2rVrde3aNb3yyitq2rSpatSooUmTJtntv2fPHh04cEBNmjSJMc3b21s1atSwfhmxdOlSvfLKK2rVqpWKFi2qfv36KTIy0u58W7RoocGDB6tfv34qU6aMTp8+rffeey/pVhQAgFTAYti7Rw0AAAAAAEiVGJEHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABP5P0lnIpmHJQCXAAAAAElFTkSuQmCC",
64
+ "text/plain": [
65
+ "<Figure size 1200x600 with 1 Axes>"
66
+ ]
67
+ },
68
+ "metadata": {},
69
+ "output_type": "display_data"
70
+ }
71
+ ],
72
+ "source": [
73
+ "amino_acid_frequencies = {}\n",
74
+ "\n",
75
+ "datasets = {'Train': train, 'Test': test, 'Val': val}\n",
76
+ "\n",
77
+ "\n",
78
+ "for name, df in datasets.items():\n",
79
+ " # Count total occurrences of each amino acid in each dataset\n",
80
+ " amino_acid_frequencies[name] = df[valid_residues].sum() / df['Length'].sum()\n",
81
+ "\n",
82
+ "# Convert frequencies to a dataframe for easier manipulation\n",
83
+ "freq_df = pd.DataFrame(amino_acid_frequencies)\n",
84
+ "\n",
85
+ "# Plot the histogram comparing amino acid frequencies\n",
86
+ "plt.figure(figsize=(12, 6))\n",
87
+ "\n",
88
+ "# Set bar width\n",
89
+ "bar_width = 0.2\n",
90
+ "\n",
91
+ "# Generate positions for the bars\n",
92
+ "amino_acids = list(valid_residues)\n",
93
+ "x = np.arange(len(amino_acids)) # positions for the amino acids\n",
94
+ "\n",
95
+ "# Plot the bars for each dataset with an offset\n",
96
+ "plt.bar(x - bar_width, freq_df['Train'], width=bar_width, label='Train', align='center')\n",
97
+ "plt.bar(x, freq_df['Test'], width=bar_width, label='Test', align='center')\n",
98
+ "plt.bar(x + bar_width, freq_df['Val'], width=bar_width, label='Val', align='center')\n",
99
+ "\n",
100
+ "plt.xticks(x, amino_acids)\n",
101
+ "\n",
102
+ "plt.title('UniRef50 (100k Sequences) - Train, Test, and Val Residue Frequencies')\n",
103
+ "plt.xlabel('Amino Acid')\n",
104
+ "plt.ylabel('Frequency')\n",
105
+ "\n",
106
+ "plt.legend()\n",
107
+ "plt.show()"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 30,
113
+ "metadata": {},
114
+ "outputs": [
115
+ {
116
+ "data": {
117
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAIjCAYAAABViau2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABn/0lEQVR4nO3deZyN9f//8ecZs+/WWawTyk7WxposI5K1qCkmUyjKEqIkUoQSJaRECxUl+RASsmfLTqgmsowRZsYQw8z790ffOT/HDGbGmDMuj/vtdm4357re53q/rjPXHPM87/d1XTZjjBEAAAAAALAsF2cXAAAAAAAAbi3CPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCP4A8IyoqSqVKlbrl/Vy+fFmDBg1S8eLF5eLiorZt297yPq+WmpqqSpUq6c0338z1vq+lVKlSeuihh5xdBq5j7969cnV11e7du51dSrYMHz5cNpvN2WXcse6//37df//9ud5vbn22w5psNpt69+7t7DIASyD8A8iStD/e//nnnwzXV6pUKUf/uLTZbA4Pf39/NWrUSIsWLcr2Nj/55BONGzdOHTt21Keffqp+/fpJ+i/8Xt2fzWZTz549020jPj5e3bt3V+HCheXj46PGjRvr119/zXQNX375pf7++2+HP2iSkpL02muvqUWLFipQoIBsNptmzpx5zW3s27dPLVq0kK+vrwoUKKAnn3xSJ0+eTNcuNTVVY8eOVVhYmDw9PVWlShV9+eWXma41M1JTU/XZZ5+pTp06KlCggPz8/HT33XerS5cu+uWXX3K0rztZhQoV1KpVKw0bNixHt3utY//qx/WOx7xg5syZmdqPnAqi69ev1/DhwxUfH58j23O2X3/9VTabTUOHDr1mm4MHD8pms6l///453j+fI7dOVFSUfH19nV3GNVntdwnIq1ydXQAApPnoo4+UmpqabnmzZs3UpUsXGWN06NAhTZkyRa1bt9bixYsVERGR5X5WrFihokWL6t133023rlq1anrxxRcdlt19990Oz1NTU9WqVSvt2LFDAwcOVKFChTR58mTdf//92rp1q8qWLXvDGsaNG6fOnTsrICDAvuyff/7R66+/rhIlSqhq1ar6+eefr/n6I0eOqGHDhgoICNCoUaOUlJSkt99+W7t27dKmTZvk7u5ub/vKK6/orbfe0jPPPKNatWrp+++/1+OPPy6bzabOnTvfsNbMeOGFF/TBBx+oTZs2ioyMlKurq/bv36/Fixfrrrvu0n333Zcj/UDq2bOnWrZsqT/++EOlS5fOkW1OmDBBSUlJ9uc//PCDvvzyS7377rsqVKiQfXndunVvqp+hQ4dq8ODBN7WN62nYsKE+//xzh2VPP/20ateure7du9uX5VQIWr9+vUaMGKGoqCgFBgbmyDadqXr16ipXrpy+/PJLvfHGGxm2mT17tiTpiSeeyPH++Ry5c1ntdwnIswwAZMFrr71mJJmTJ09muL5ixYqmUaNGOdafJNOrVy+HZXv37jWSzIMPPpitbTZu3NhUrFgx3fKSJUuaVq1a3fD1X3/9tZFk5s6da18WFxdnAgMDzWOPPXbD1//6669Gkvnpp58cll+4cMEcP37cGGPM5s2bjSQzY8aMDLfx7LPPGi8vL3Po0CH7smXLlhlJ5sMPP7QvO3LkiHFzc3N4D1NTU02DBg1MsWLFzOXLl+3LM7v/V4uNjTU2m80888wz6dalpqaaEydOZHmbuLbk5GSTP39+8+qrr96yPsaNG2ckmZiYmOu2S0pKumU15BQfHx/TtWvXW7LtzL5PeUWjRo1u+Pk8cuRII8ls2LAhw/X33HOPKVeuXJb67dq1qylZsuR12/A5cmt17drV+Pj4OLuMa7re71JGfwcAyB6m/QO4pX7++WfZbDbNmTNHb775pooVKyZPT081adJEv//+u0PbzJ4XWr58eRUqVEh//PGHw/KLFy/qtddeU5kyZeTh4aHixYtr0KBBunjxoiTpr7/+ks1m08qVK7Vnzx77FOCrR9iTk5N17ty5a/b/zTffKCgoSO3bt7cvK1y4sB599FF9//339v6uZf78+XJ3d1fDhg0dlnt4eCg4OPiG+y9J3377rR566CGVKFHCvqxp06a6++67NWfOHPuy77//XpcuXdJzzz1nX2az2fTss8/qyJEj2rBhw3X7+fTTT+Xq6qqBAwdes01MTIyMMapXr166dTabTUWKFHFYFh8fr759+6p48eLy8PBQmTJlNGbMmHSzPuLj4xUVFaWAgAAFBgaqa9eu2r59e7rp59c6jzmj4yk1NVUTJkxQxYoV5enpqaCgIPXo0UNnzpxxaJd2/YO1a9eqdu3a8vT01F133aXPPvssXT/x8fHq16+fSpUqJQ8PDxUrVkxdunRxODXmRsdmmmXLlql+/foKDAyUr6+v7rnnHr388ssObdzc3HT//ffr+++/T1fLrZQ2bfiPP/5Qy5Yt5efnp8jISEnSmjVr9Mgjj6hEiRL2/evXr5/+/fdfh21kdM5/2vm88+fPV6VKleTh4aGKFStqyZIlt2xfjh49qm7duikoKMje3yeffJKu3fvvv6+KFSvK29tb+fPnV82aNe0j38OHD7f/XoSFhdk/T/76668s1XL69GkNGDBAlStXlq+vr/z9/fXggw9qx44dDu2y8lkqSdOmTVPp0qXl5eWl2rVra82aNZmqJ+1nmrafV9q6dav2799vb/P999+rVatWCg0NlYeHh0qXLq2RI0cqJSUlS++BxOdITn6O3IyNGzeqRYsWCggIkLe3txo1aqR169Y5tEn7Pf7999/tI/UBAQF66qmndP78eYe2//77r1544QUVKlRIfn5+evjhh3X06FHZbDYNHz7cvr3M/C7d6DPi7Nmz6tu3r/09LFKkiJo1a5alU/IAq2PaP4Bc8dZbb8nFxUUDBgxQQkKCxo4dq8jISG3cuDHL20pISNCZM2ccpjynpqbq4Ycf1tq1a9W9e3eVL19eu3bt0rvvvqsDBw5o/vz5Kly4sD7//HO9+eabSkpK0ujRoyX992VCmhUrVsjb21spKSkqWbKk+vXrpz59+jj0v23bNlWvXl0uLo7fn9auXVvTpk3TgQMHVLly5WvWv379elWqVElubm5Z3nfpv+ASFxenmjVrpltXu3Zt/fDDDw61+vj4OOxjWru09fXr18+wn2nTpqlnz556+eWXrzkFWJJKliwpSZo7d64eeeQReXt7X7Pt+fPn1ahRIx09elQ9evRQiRIltH79eg0ZMkTHjx/XhAkTJEnGGLVp00Zr165Vz549Vb58eX333Xfq2rXrNbedGT169NDMmTP11FNP6YUXXlBMTIwmTZqkbdu2ad26dQ4/k99//10dO3ZUdHS0unbtqk8++URRUVGqUaOGKlasKOm/6zQ0aNBA+/btU7du3VS9enX9888/WrBggY4cOaJChQpl6tiUpD179uihhx5SlSpV9Prrr8vDw0O///57uj+8JalGjRr6/vvvlZiYKH9//5t6T7Li8uXLioiIUP369fX222/bf9Zz587V+fPn9eyzz6pgwYLatGmT3n//fR05ckRz58694XbXrl2refPm6bnnnpOfn5/ee+89dejQQYcPH1bBggVzdB9OnDih++67z/6lQ+HChbV48WJFR0crMTFRffv2lfTfaUgvvPCCOnbsqD59+ujChQvauXOnNm7cqMcff1zt27fXgQMH0p0eUbhw4SzV8+eff2r+/Pl65JFHFBYWphMnTujDDz9Uo0aNtHfvXoWGhjq0z8xn6fTp09WjRw/VrVtXffv21Z9//qmHH35YBQoUUPHixa9bT1hYmOrWras5c+bo3XffVb58+ezr0r4QePzxxyX9d40FX19f9e/fX76+vlqxYoWGDRumxMREjRs3LkvvA58jOfM5cjNWrFihBx98UDVq1NBrr70mFxcXzZgxQw888IDWrFlj/38jzaOPPqqwsDCNHj1av/76qz7++GMVKVJEY8aMsbeJiorSnDlz9OSTT+q+++7TqlWr1KpVK4ftZOZ3KTOfET179tQ333yj3r17q0KFCjp16pTWrl2rffv2qXr16jf9/gCW4NyJBwBuN1md9r9y5UojyZQvX95cvHjRvnzixIlGktm1a5d9WUZTQyWZ6Ohoc/LkSRMXF2e2bNliWrRoYSSZcePG2dt9/vnnxsXFxaxZs8bh9VOnTjWSzLp16+zLGjVqlOG0/9atW5sxY8aY+fPnm+nTp5sGDRoYSWbQoEEO7Xx8fEy3bt3SvX7RokVGklmyZEmG702aYsWKmQ4dOly3zfWm/aet++yzz9KtGzhwoJFkLly4YIwxplWrVuauu+5K1+7cuXNGkhk8eLB92ZXT/idOnGhsNpsZOXLkdetM06VLFyPJ5M+f37Rr1868/fbbZt++fenajRw50vj4+JgDBw44LB88eLDJly+fOXz4sDHGmPnz5xtJZuzYsfY2ly9ftv9MrnxfrjWV+erjac2aNUaSmTVrlkO7JUuWpFtesmRJI8msXr3aviwuLs54eHiYF1980b5s2LBhRpKZN29euv5TU1ONMZk/Nt99993r/m5dafbs2UaS2bhx4w3bZkdGU3C7du2a7phJc/78+XTLRo8ebWw2m8OpKWmfH1eSZNzd3c3vv/9uX7Zjxw4jybz//vs3vS9XT/uPjo42ISEh5p9//nFo17lzZxMQEGDflzZt2mT4OXGlnJj2f+HCBZOSkuKwLCYmxnh4eJjXX3/dviyzn6XJycmmSJEiplq1ag7tpk2bZiRl6rSsDz74wEgyS5cutS9LSUkxRYsWNeHh4fZlGf3ce/ToYby9ve2fQcZkbtq/MXyO5MTnyLXcaNp/amqqKVu2rImIiLD3acx/P+OwsDDTrFkz+7K03+Or/x9s166dKViwoP351q1bjSTTt29fh3ZRUVFGknnttdfsy2407T8znxEBAQGcHgDcANP+AeSKp556yuEidA0aNJD036jXjUyfPl2FCxdWkSJFVLNmTS1fvlyDBg1yuNr03LlzVb58eZUrV07//POP/fHAAw9IklauXHnDfhYsWKBBgwapTZs26tatm1atWqWIiAiNHz9eR44csbf7999/5eHhke71np6e9vXXc+rUKeXPn/+G9VxL2vYzU0N2ah07dqz69OmjMWPGXPeq31eaMWOGJk2apLCwMH333XcaMGCAypcvryZNmujo0aP2dnPnzlWDBg2UP39+h59T06ZNlZKSotWrV0v674Jzrq6uevbZZ+2vzZcvn55//vlM1ZORuXPnKiAgQM2aNXPou0aNGvL19U13jFSoUMF+nEr/jULdc889Dsfst99+q6pVq6pdu3bp+kub3p7ZYzPtIlfff/99hhe+vFLa8XOtu27cSlf+TNJ4eXnZ/33u3Dn9888/qlu3rowx2rZt2w232bRpU4eZPFWqVJG/v3+mPh+ywhijb7/9Vq1bt5YxxuHnERERoYSEBPsU4cDAQB05ckSbN2/O0Rqu5uHhYZ9FlJKSolOnTtlP+chouvKNPku3bNmiuLg49ezZ06Fd2tT3zOjUqZPc3Nwcpv6vWrVKR48etU/5lxx/7mfPntU///yjBg0a6Pz58/rtt98y1deV+By5+c+R7Nq+fbsOHjyoxx9/XKdOnbJv/9y5c2rSpIlWr16d7nPp6jvhNGjQQKdOnVJiYqIk2aflX3namaRsvf+Z+YwIDAzUxo0bdezYsSxvH7hTMO0fQI7L6D7eV56bLv3/8HL1OZIZadOmjXr37q3k5GRt3rxZo0aN0vnz5x2m3R88eFD79u275pTbuLi4rOyCpP/2o1+/flq6dKl+/vln+9Wtvby8MjzH8sKFC/b1N2KMyXI9adK2n5kaslrrqlWrtGjRIr300kvXPc//ai4uLurVq5d69eqlU6dOad26dZo6daoWL16szp072883PnjwoHbu3HnDn9OhQ4cUEhKS7qrs99xzT6ZrutrBgweVkJCQ7tzhq/tOc/UxK/133F55zP7xxx/q0KHDDfvNzLHZqVMnffzxx3r66ac1ePBgNWnSRO3bt1fHjh3TnWKSdvxk9LuWJjk5WadPn3ZYVrhwYYdp3Fnl6uqqYsWKpVt++PBhDRs2TAsWLEj3O52QkHDD7Wbmvc4JJ0+eVHx8vKZNm6Zp06Zl2Cbt5/HSSy/pp59+Uu3atVWmTBk1b95cjz/+eIbnpN+M1NRUTZw4UZMnT1ZMTIzD+fIZnfJwo8/SQ4cOSVK6u464ubnprrvuylRNBQsWVEREhL777jtNnTpVnp6emj17tlxdXfXoo4/a2+3Zs0dDhw7VihUr7IEvTWZ+7lfjc+T6/eb0/3FXb1/SdU+JSEhIcPji+nrHor+/vw4dOiQXFxeFhYU5tCtTpkyW68vM+zh27Fh17dpVxYsXV40aNdSyZUt16dIl08c9cCcg/APIkhuNbp8/f97e5krXChyZCcHFihVT06ZNJUktW7ZUoUKF1Lt3bzVu3Nh+0b3U1FRVrlxZ48ePz3AbNzrP9VrSXndliAoJCdHx48fTtU1bdvU5ulcrWLDgTYWakJAQh/6urqFAgQL20f6QkBCtXLlSxhiHoHitWitWrKj4+Hh9/vnn6tGjR7o/2jKjYMGCevjhh/Xwww/r/vvv16pVq3To0CGVLFlSqampatasmQYNGpTha6++rWJm2Gy2DI+jqy86lpqaqiJFimjWrFkZbufqP6pv5pi9ut/MHJteXl5avXq1Vq5cqUWLFmnJkiX6+uuv9cADD+jHH390qCft+LnyNnxXW79+vRo3buywLCYm5qbucX/lKHWalJQUNWvWTKdPn9ZLL72kcuXKycfHR0ePHlVUVNQNZzFIOfde30haLU888cQ1Q06VKlUk/XctkP3792vhwoVasmSJvv32W02ePFnDhg3TiBEjcqymUaNG6dVXX1W3bt00cuRIFShQQC4uLurbt2+G711uvVdPPPGEFi5cqIULF+rhhx/Wt99+q+bNm9t/T+Lj49WoUSP5+/vr9ddfV+nSpeXp6alff/1VL730UqZ+7tfD54ijW/V/3JXbl/67DW21atUybHP1lyi5dSxmtq9HH31UDRo00Hfffacff/xR48aN05gxYzRv3jw9+OCDOV4TcDsi/APIkrSLMu3fvz/dHxvnz5/X33//rebNm9/SGnr06KF3331XQ4cOVbt27WSz2VS6dGnt2LFDTZo0ue5oaFalTSm88g+6atWqac2aNUpNTXUIQhs3bpS3t/cN//AsV66cYmJisl1T0aJFVbhwYW3ZsiXduk2bNjn84VatWjV9/PHH2rdvnypUqOBQa9r6KxUqVEjffPON6tevryZNmmjt2rU3/DLjemrWrKlVq1bp+PHjKlmypEqXLq2kpCT7lznXUrJkSS1fvlxJSUkOf3Du378/Xdv8+fNnOD08bQQ0TenSpfXTTz+pXr16mZqdkRmlS5fW7t27b9gms8emi4uLmjRpoiZNmmj8+PEaNWqUXnnlFa1cudLhPYuJiZGLi8t1j7WqVatq2bJlDssyezeJrNi1a5cOHDigTz/9VF26dLEvv7rvvKBw4cLy8/NTSkrKDY9BSfLx8VGnTp3UqVMnJScnq3379nrzzTc1ZMgQeXp65shnzTfffKPGjRtr+vTpDsvj4+Ov++XOtaR9Rh88eNA+JVySLl26pJiYGFWtWjVT23n44Yfl5+en2bNny83NTWfOnHGY8v/zzz/r1KlTmjdvnsOdS27ms+1a+BzJ2udIdmuQJH9//0z9bmRG2hc1MTExDjNRMro7RU7tU0hIiJ577jk999xziouLU/Xq1fXmm28S/oH/wzn/ALKkSZMmcnd315QpU9KN7EybNk2XL1++5f/Jurq66sUXX9S+ffvstzt79NFHdfToUX300Ufp2v/777/XvXWf9N/I/tUjPJcuXdJbb70ld3d3hxHUjh076sSJE5o3b5592T///KO5c+eqdevWGZ5jf6Xw8HDt3r37pm7P1KFDBy1cuFB///23fdny5ct14MABPfLII/Zlbdq0kZubmyZPnmxfZozR1KlTVbRoUdWtWzfdtosVK6affvpJ//77r5o1a6ZTp05dt5bY2Fjt3bs33fLk5GQtX75cLi4u9mmejz76qDZs2KClS5emax8fH6/Lly9L+m+Gx+XLlzVlyhT7+pSUFL3//vvpXle6dGn99ttvOnnypH3Zjh070l0l/9FHH1VKSopGjhyZbhuXL19WfHz8dfczIx06dNCOHTv03XffpVuXNiKV2WPz6in60v//cubqY2Xr1q2qWLHidc/hzp8/v5o2berwyGhWzs1KG5G7cgTOGKOJEyfmeF83K1++fOrQoYO+/fbbDMPWlcfQ1ce9u7u7KlSoIGOMLl26JOm/LwckZevYubKmq0dK586d63COe1bUrFlThQsX1tSpU5WcnGxfPnPmzCzV6eXlpXbt2umHH37QlClT5OPjozZt2jjULTn+3JOTkx0+a7KCz5Gc+RzJrho1aqh06dJ6++23lZSUlG79le9LZkVEREhSumMio/f/Zn+XUlJS0p1qUqRIEYWGhuborRCB2x0j/wCypEiRIho2bJiGDh2qhg0b6uGHH5a3t7fWr1+vL7/8Us2bN1fr1q1veR1RUVEaNmyYxowZo7Zt2+rJJ5/UnDlz1LNnT61cuVL16tVTSkqKfvvtN82ZM0dLly7N8NZ4aRYsWKA33nhDHTt2VFhYmE6fPq3Zs2dr9+7dGjVqlMOIaceOHXXffffpqaee0t69e1WoUCFNnjxZKSkpmZoO3KZNG40cOVKrVq1KN0ti0qRJio+Pt1+w6H//+5/9YoPPP/+8Pey9/PLLmjt3rho3bqw+ffooKSlJ48aNU+XKlfXUU0/Zt1esWDH17dtX48aN06VLl1SrVi3Nnz9fa9as0axZs645lbJMmTL68ccfdf/99ysiIkIrVqy45i3ljhw5otq1a+uBBx5QkyZNFBwcrLi4OH355ZfasWOH+vbtax/BHDhwoBYsWKCHHnrIfrurc+fOadeuXfrmm2/0119/qVChQmrdurXq1aunwYMH66+//lKFChU0b968DM8j7tatm8aPH6+IiAhFR0crLi5OU6dOVcWKFR3OQ27UqJF69Oih0aNHa/v27WrevLnc3Nx08OBBzZ07VxMnTlTHjh1v+PO70sCBA/XNN9/okUceUbdu3VSjRg2dPn1aCxYs0NSpU1W1atVMH5uvv/66Vq9erVatWqlkyZKKi4vT5MmTVaxYMYfbMV66dEmrVq1KdxEtZylXrpxKly6tAQMG6OjRo/L399e3336b4+frS/+NDjZq1Eg///xztrfx1ltvaeXKlapTp46eeeYZVahQQadPn9avv/6qn376yf4lTPPmzRUcHKx69eopKChI+/bt06RJk9SqVSv5+flJ+i8wSdIrr7yizp07y83NTa1bt5aPj4+GDx+uESNGaOXKlRnePz7NQw89pNdff11PPfWU6tatq127dmnWrFnZPk/Zzc1Nb7zxhnr06KEHHnhAnTp1UkxMjGbMmJHlbT7xxBP67LPPtHTpUkVGRtoDmiTVrVtX+fPnV9euXfXCCy/IZrPp888/z/aUbz5HcuZz5HouXbqU4W1bCxQooOeee04ff/yxHnzwQVWsWFFPPfWUihYtqqNHj2rlypXy9/fX//73vyztV40aNdShQwdNmDBBp06dst/q78CBA5IcR/uv97uUGWfPnlWxYsXUsWNHVa1aVb6+vvrpp5+0efNmvfPOO1mqG7C03LuxAAAr+eKLL8x9991nfHx8jIeHhylXrpwZMWKEw+2djPn/t6eaO3euw/KYmJh0t1q61q3+rnXrnuHDhxtJZuXKlcaY/25xNWbMGFOxYkXj4eFh8ufPb2rUqGFGjBhhEhIS7K/L6FZ/W7ZsMa1btzZFixY17u7uxtfX19SvX9/MmTMnw75Pnz5toqOjTcGCBY23t7dp1KiR2bx58/XeMgdVqlQx0dHR6Zan3Roqo8fVt0DavXu3ad68ufH29jaBgYEmMjLSxMbGpttmSkqKGTVqlClZsqRxd3c3FStWNF988UWGfafd6i/Nxo0bjZ+fn2nYsGGGt/UyxpjExEQzceJEExERYYoVK2bc3NyMn5+fCQ8PNx999JHDbaOMMebs2bNmyJAhpkyZMsbd3d0UKlTI1K1b17z99tsmOTnZ3u7UqVPmySefNP7+/iYgIMA8+eSTZtu2bRneAvGLL74wd911l3F3dzfVqlUzS5cuvebtxaZNm2Zq1KhhvLy8jJ+fn6lcubIZNGiQOXbs2HXfC2Myvh3YqVOnTO/eve3HTrFixUzXrl0dbiWXmWNz+fLlpk2bNiY0NNS4u7ub0NBQ89hjj6W7ndnixYuNJHPw4MEMfx454Vq3+rvWrcL27t1rmjZtanx9fU2hQoXMM888Y78V15U/q2vd6i+j3/GSJUs63KLv7NmzRpLp3Llzlvbl6lv9GWPMiRMnTK9evUzx4sWNm5ubCQ4ONk2aNDHTpk2zt/nwww9Nw4YNTcGCBY2Hh4cpXbq0GThwoMNniTH/3XauaNGixsXFxeE9e/HFF43NZsvwVnVXunDhgnnxxRdNSEiI8fLyMvXq1TMbNmxId6xl5bPUGGMmT55swsLCjIeHh6lZs6ZZvXr1NW9ndy2XL182ISEhRpL54Ycf0q1ft26due+++4yXl5cJDQ01gwYNMkuXLnX4XDYmc7f643MkZz5HriXtVp0ZPUqXLm1vt23bNtO+fXv7cV+yZEnz6KOPmuXLl9vbXOuWvzNmzEj3uXHu3DnTq1cvU6BAAePr62vatm1r9u/fbySZt956y+H11/pdysxnxMWLF83AgQNN1apVjZ+fn/Hx8TFVq1Y1kydPvu77AtxpbMbcgqtyAACu6/PPP1evXr10+PBh+y3ecGN//fWXwsLCNGPGDEVFRTm7HKdo27atbDZbhlOEreyHH37QQw89pB07dqhy5crOLueGateurZIlS2ru3LnOLgVX4XPEubZv3657771XX3zxhcN1JADcepzzDwBOEBkZqRIlSuiDDz5wdim4jezbt08LFy7M8Hxjq1u5cqU6d+58WwT/xMRE7dixQ6+//rqzSwGcKqM7A02YMEEuLi4OF4oEkDs45x8AnMDFxeWGV3cGrla+fHn7xczuNOPGjXN2CZnm7+/PRcYASWPHjtXWrVvVuHFjubq6avHixVq8eLG6d+9+07cnBJB1hH8AAAAAOa5u3bpatmyZRo4cqaSkJJUoUULDhw/XK6+84uzSgDsS5/wDAAAAAGBxnPMPAAAAAIDFEf4BAAAAALA4zvnPIampqTp27Jj8/Pxks9mcXQ4AAAAAwOKMMTp79qxCQ0Pl4nL9sX3Cfw45duwYVy0FAAAAAOS6v//+W8WKFbtuG8J/DvHz85P035vu7+/v5GoAAAAAAFaXmJio4sWL2/Po9RD+c0jaVH9/f3/CPwAAAAAg12Tm1HMu+AcAAAAAgMUR/gEAAAAAsDjCPwAAAAAAFsc5/wAAAACAWyIlJUWXLl1ydhm3rXz58snV1TVHbidP+AcAAAAA5LikpCQdOXJExhhnl3Jb8/b2VkhIiNzd3W9qO4R/AAAAAECOSklJ0ZEjR+Tt7a3ChQvnyMj1ncYYo+TkZJ08eVIxMTEqW7asXFyyf+Y+4R8AAAAAkKMuXbokY4wKFy4sLy8vZ5dz2/Ly8pKbm5sOHTqk5ORkeXp6ZntbXPAPAAAAAHBLMOJ/825mtN9hOzmyFQAAAAAAkGcR/gEAAAAAsDjO+QcAAAAA5IromZtztb/pUbVytb+MlCpVSn379lXfvn2dWgcj/wAAAACAO57NZrvuY/jw4dna7ubNm9W9e/ecLTYbGPkHAAAAANzxjh8/bv/3119/rWHDhmn//v32Zb6+vvZ/G2OUkpIiV9cbR+rChQvnbKHZxMg/AAAAAOCOFxwcbH8EBATIZrPZn//222/y8/PT4sWLVaNGDXl4eGjt2rX6448/1KZNGwUFBcnX11e1atXSTz/95LDdUqVKacKECfbnNptNH3/8sdq1aydvb2+VLVtWCxYsuOX7R/gHAAAAACATBg8erLfeekv79u1TlSpVlJSUpJYtW2r58uXatm2bWrRoodatW+vw4cPX3c6IESP06KOPaufOnWrZsqUiIyN1+vTpW1o74R8AAAAAgEx4/fXX1axZM5UuXVoFChRQ1apV1aNHD1WqVElly5bVyJEjVbp06RuO5EdFRemxxx5TmTJlNGrUKCUlJWnTpk23tHbCPwAAAAAAmVCzZk2H50lJSRowYIDKly+vwMBA+fr6at++fTcc+a9SpYr93z4+PvL391dcXNwtqTkNF/wDAAAAACATfHx8HJ4PGDBAy5Yt09tvv60yZcrIy8tLHTt2VHJy8nW34+bm5vDcZrMpNTU1x+u9EuEfAAAAAIBsWLdunaKiotSuXTtJ/80E+Ouvv5xb1DUQ/mFJ0TM352p/06Nq5Wp/AAAAAJyvbNmymjdvnlq3bi2bzaZXX331lo/gZxfhHwAAAACQK6w2aDZ+/Hh169ZNdevWVaFChfTSSy8pMTHR2WVlyGaMMc4uwgoSExMVEBCghIQE+fv7O7ucOx4j/wAAAIDzXLhwQTExMQoLC5Onp6ezy7mtXe+9zEoO5Wr/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYnKuzCwAAAAAA3CFmd8rd/h7/Onf7y8MY+QcAAAAA3PFsNtt1H8OHD7+pbc+fPz/Has0ORv4BAAAAAHe848eP2//99ddfa9iwYdq/f799ma+vrzPKyjGM/AMAAAAA7njBwcH2R0BAgGw2m8Oyr776SuXLl5enp6fKlSunyZMn21+bnJys3r17KyQkRJ6enipZsqRGjx4tSSpVqpQkqV27drLZbPbnuY2RfwAAAAAArmPWrFkaNmyYJk2apHvvvVfbtm3TM888Ix8fH3Xt2lXvvfeeFixYoDlz5qhEiRL6+++/9ffff0uSNm/erCJFimjGjBlq0aKF8uXL55R9IPwDAAAAAHAdr732mt555x21b99ekhQWFqa9e/fqww8/VNeuXXX48GGVLVtW9evXl81mU8mSJe2vLVy4sCQpMDBQwcHBTqlfIvwDAAAAAHBN586d0x9//KHo6Gg988wz9uWXL19WQECAJCkqKkrNmjXTPffcoxYtWuihhx5S8+bNnVVyhpx6zv/q1avVunVrhYaGZnj1Q2OMhg0bppCQEHl5ealp06Y6ePCgQ5vTp08rMjJS/v7+CgwMVHR0tJKSkhza7Ny5Uw0aNJCnp6eKFy+usWPHpqtl7ty5KleunDw9PVW5cmX98MMPOb6/AAAAAIDbS1q+/Oijj7R9+3b7Y/fu3frll18kSdWrV1dMTIxGjhypf//9V48++qg6duzozLLTcWr4P3funKpWraoPPvggw/Vjx47Ve++9p6lTp2rjxo3y8fFRRESELly4YG8TGRmpPXv2aNmyZVq4cKFWr16t7t2729cnJiaqefPmKlmypLZu3apx48Zp+PDhmjZtmr3N+vXr9dhjjyk6Olrbtm1T27Zt1bZtW+3evfvW7TwAAAAAIM8LCgpSaGio/vzzT5UpU8bhERYWZm/n7++vTp066aOPPtLXX3+tb7/9VqdPn5Ykubm5KSUlxVm7IMnJ0/4ffPBBPfjggxmuM8ZowoQJGjp0qNq0aSNJ+uyzzxQUFKT58+erc+fO2rdvn5YsWaLNmzerZs2akqT3339fLVu21Ntvv63Q0FDNmjVLycnJ+uSTT+Tu7q6KFStq+/btGj9+vP1LgokTJ6pFixYaOHCgJGnkyJFatmyZJk2apKlTp+bCOwEAAAAAyKtGjBihF154QQEBAWrRooUuXryoLVu26MyZM+rfv7/Gjx+vkJAQ3XvvvXJxcdHcuXMVHByswMBASf9d8X/58uWqV6+ePDw8lD9//lzfhzx7zn9MTIxiY2PVtGlT+7KAgADVqVNHGzZsUOfOnbVhwwYFBgbag78kNW3aVC4uLtq4caPatWunDRs2qGHDhnJ3d7e3iYiI0JgxY3TmzBnlz59fGzZsUP/+/R36j4iISHcawpUuXryoixcv2p8nJibmwF4DAAAAgIU9/rWzK8iWp59+Wt7e3ho3bpwGDhwoHx8fVa5cWX379pUk+fn5aezYsTp48KDy5cunWrVq6YcffpCLy3+T7d955x31799fH330kYoWLaq//vor1/chz4b/2NhYSf9NsbhSUFCQfV1sbKyKFCnisN7V1VUFChRwaHPlVIwrtxkbG6v8+fMrNjb2uv1kZPTo0RoxYkQ29gwAAAAAkJdFRUUpKirKYdnjjz+uxx9/PMP2zzzzjMPFAK/WunVrtW7dOidLzDKnnvN/OxsyZIgSEhLsj7R7OAIAAAAAkNfk2fCfdv/DEydOOCw/ceKEfV1wcLDi4uIc1l++fFmnT592aJPRNq7s41ptrncPRg8PD/n7+zs8AAAAAADIi/Js+A8LC1NwcLCWL19uX5aYmKiNGzcqPDxckhQeHq74+Hht3brV3mbFihVKTU1VnTp17G1Wr16tS5cu2dssW7ZM99xzj/0iC+Hh4Q79pLVJ6wcAAAAAgNuZU8N/UlKS/R6J0n8X+du+fbsOHz4sm82mvn376o033tCCBQu0a9cudenSRaGhoWrbtq0kqXz58mrRooWeeeYZbdq0SevWrVPv3r3VuXNnhYaGSvrvvAx3d3dFR0drz549+vrrrzVx4kSHC/z16dNHS5Ys0TvvvKPffvtNw4cP15YtW9S7d+/cfksAAAAAAMhxTr3g35YtW9S4cWP787RA3rVrV82cOVODBg3SuXPn1L17d8XHx6t+/fpasmSJPD097a+ZNWuWevfurSZNmsjFxUUdOnTQe++9Z18fEBCgH3/8Ub169VKNGjVUqFAhDRs2zH6bP0mqW7euZs+eraFDh+rll19W2bJlNX/+fFWqVCkX3gUAAAAAAG4tmzHGOLsIK0hMTFRAQIASEhI4/z8PiJ65OVf7mx5VK1f7AwAAAPKyCxcuKCYmRmFhYQ6Dt8i6672XWcmhefacfwAAAAAAkDMI/wAAAAAAWBzhHwAAAAAAi3PqBf8AAAAAAHeO3stz945qk5pMytX+7r//flWrVk0TJkzI1X4zg5F/AAAAAMAdr3Xr1mrRokWG69asWSObzaadO3fmclU5h/APAAAAALjjRUdHa9myZTpy5Ei6dTNmzFDNmjVVpUoVJ1SWMwj/AAAAAIA73kMPPaTChQtr5syZDsuTkpI0d+5ctW3bVo899piKFi0qb29vVa5cWV9++aVzis0Gwj8AAAAA4I7n6uqqLl26aObMmTLG2JfPnTtXKSkpeuKJJ1SjRg0tWrRIu3fvVvfu3fXkk09q06ZNTqw68wj/AAAAAABI6tatm/744w+tWrXKvmzGjBnq0KGDSpYsqQEDBqhatWq666679Pzzz6tFixaaM2eOEyvOPMI/AAAAAACSypUrp7p16+qTTz6RJP3+++9as2aNoqOjlZKSopEjR6py5coqUKCAfH19tXTpUh0+fNjJVWcO4R8AAAAAgP8THR2tb7/9VmfPntWMGTNUunRpNWrUSOPGjdPEiRP10ksvaeXKldq+fbsiIiKUnJzs7JIzxdXZBeDOED1zs7NLAAAAAIAbevTRR9WnTx/Nnj1bn332mZ599lnZbDatW7dObdq00RNPPCFJSk1N1YEDB1ShQgUnV5w5jPwDAAAAAPB/fH191alTJw0ZMkTHjx9XVFSUJKls2bJatmyZ1q9fr3379qlHjx46ceKEc4vNAkb+AQAAAAC5YlKTSc4uIVOio6M1ffp0tWzZUqGhoZKkoUOH6s8//1RERIS8vb3VvXt3tW3bVgkJCU6uNnMI/wAAAAAAXCE8PNzhdn+SVKBAAc2fP/+6r/v5559vXVE3iWn/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAuCWuvmgesi6n3kPCPwAAAAAgR+XLl0+SlJyc7ORKbn/nz5+XJLm5ud3UdrjVHwAAAAAgR7m6usrb21snT56Um5ubXFwYd84qY4zOnz+vuLg4BQYG2r9QyS7CPwAAAAAgR9lsNoWEhCgmJkaHDh1ydjm3tcDAQAUHB9/0dgj/AAAAAIAc5+7urrJlyzL1/ya4ubnd9Ih/GsI/AAAAAOCWcHFxkaenp7PLgLjgHwAAAAAAlkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDhXZxcAWEH0zM252t/0qFq52h8AAACA2xsj/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABaXp8N/SkqKXn31VYWFhcnLy0ulS5fWyJEjZYyxtzHGaNiwYQoJCZGXl5eaNm2qgwcPOmzn9OnTioyMlL+/vwIDAxUdHa2kpCSHNjt37lSDBg3k6emp4sWLa+zYsbmyjwAAAAAA3Gp5OvyPGTNGU6ZM0aRJk7Rv3z6NGTNGY8eO1fvvv29vM3bsWL333nuaOnWqNm7cKB8fH0VEROjChQv2NpGRkdqzZ4+WLVumhQsXavXq1erevbt9fWJiopo3b66SJUtq69atGjdunIYPH65p06bl6v4CAAAAAHAr2MyVw+h5zEMPPaSgoCBNnz7dvqxDhw7y8vLSF198IWOMQkND9eKLL2rAgAGSpISEBAUFBWnmzJnq3Lmz9u3bpwoVKmjz5s2qWbOmJGnJkiVq2bKljhw5otDQUE2ZMkWvvPKKYmNj5e7uLkkaPHiw5s+fr99++y1TtSYmJiogIEAJCQny9/fP4Xfi9hc9c7OzS7CU6VG1nF0CAAAAACfLSg7N0yP/devW1fLly3XgwAFJ0o4dO7R27Vo9+OCDkqSYmBjFxsaqadOm9tcEBASoTp062rBhgyRpw4YNCgwMtAd/SWratKlcXFy0ceNGe5uGDRvag78kRUREaP/+/Tpz5kyGtV28eFGJiYkODwAAAAAA8iJXZxdwPYMHD1ZiYqLKlSunfPnyKSUlRW+++aYiIyMlSbGxsZKkoKAgh9cFBQXZ18XGxqpIkSIO611dXVWgQAGHNmFhYem2kbYuf/786WobPXq0RowYkQN7CQAAAADArZWnR/7nzJmjWbNmafbs2fr111/16aef6u2339ann37q7NI0ZMgQJSQk2B9///23s0sCAAAAACBDeXrkf+DAgRo8eLA6d+4sSapcubIOHTqk0aNHq2vXrgoODpYknThxQiEhIfbXnThxQtWqVZMkBQcHKy4uzmG7ly9f1unTp+2vDw4O1okTJxzapD1Pa3M1Dw8PeXh43PxOAgAAAABwi+Xpkf/z58/LxcWxxHz58ik1NVWSFBYWpuDgYC1fvty+PjExURs3blR4eLgkKTw8XPHx8dq6dau9zYoVK5Samqo6derY26xevVqXLl2yt1m2bJnuueeeDKf8AwAAAABwO8nT4b9169Z68803tWjRIv3111/67rvvNH78eLVr106SZLPZ1LdvX73xxhtasGCBdu3apS5duig0NFRt27aVJJUvX14tWrTQM888o02bNmndunXq3bu3OnfurNDQUEnS448/Lnd3d0VHR2vPnj36+uuvNXHiRPXv399Zuw4AAAAAQI7J09P+33//fb366qt67rnnFBcXp9DQUPXo0UPDhg2ztxk0aJDOnTun7t27Kz4+XvXr19eSJUvk6elpbzNr1iz17t1bTZo0kYuLizp06KD33nvPvj4gIEA//vijevXqpRo1aqhQoUIaNmyYunfvnqv7CwAAAADArWAzxhhnF2EFWbm/4p0oeuZmZ5dgKdOjajm7BAAAAABOlpUcmqen/QMAAAAAgJtH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAi8vTV/sHkLHcvoAiFxgEAAAAbm+M/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHHZCv9//vlnTtcBAAAAAABukWyF/zJlyqhx48b64osvdOHChZyuCQAAAAAA5KBshf9ff/1VVapUUf/+/RUcHKwePXpo06ZNOV0bAAAAAADIAdkK/9WqVdPEiRN17NgxffLJJzp+/Ljq16+vSpUqafz48Tp58mRO1wkAAAAAALLppi745+rqqvbt22vu3LkaM2aMfv/9dw0YMEDFixdXly5ddPz48ZyqEwAAAAAAZJPrzbx4y5Yt+uSTT/TVV1/Jx8dHAwYMUHR0tI4cOaIRI0aoTZs2nA4AWED0zM253uf0qFq53icAAABgVdkK/+PHj9eMGTO0f/9+tWzZUp999platmwpF5f/JhKEhYVp5syZKlWqVE7WCgAAAAAAsiFb4X/KlCnq1q2boqKiFBISkmGbIkWKaPr06TdVHAAAAAAAuHnZCv8HDx68YRt3d3d17do1O5sHAAAAAAA5KFsX/JsxY4bmzp2bbvncuXP16aef3nRRAAAAAAAg52Qr/I8ePVqFChVKt7xIkSIaNWrUTRd1paNHj+qJJ55QwYIF5eXlpcqVK2vLli329cYYDRs2TCEhIfLy8lLTpk3TzUw4ffq0IiMj5e/vr8DAQEVHRyspKcmhzc6dO9WgQQN5enqqePHiGjt2bI7uBwAAAAAAzpKt8H/48GGFhYWlW16yZEkdPnz4potKc+bMGdWrV09ubm5avHix9u7dq3feeUf58+e3txk7dqzee+89TZ06VRs3bpSPj48iIiJ04cIFe5vIyEjt2bNHy5Yt08KFC7V69Wp1797dvj4xMVHNmzdXyZIltXXrVo0bN07Dhw/XtGnTcmxfAAAAAABwlmyd81+kSBHt3Lkz3dX8d+zYoYIFC+ZEXZKkMWPGqHjx4poxY4Z92ZVfOhhjNGHCBA0dOlRt2rSRJH322WcKCgrS/Pnz1blzZ+3bt09LlizR5s2bVbNmTUnS+++/r5YtW+rtt99WaGioZs2apeTkZH3yySdyd3dXxYoVtX37do0fP97hSwIAAAAAAG5H2Rr5f+yxx/TCCy9o5cqVSklJUUpKilasWKE+ffqoc+fOOVbcggULVLNmTT3yyCMqUqSI7r33Xn300Uf29TExMYqNjVXTpk3tywICAlSnTh1t2LBBkrRhwwYFBgbag78kNW3aVC4uLtq4caO9TcOGDeXu7m5vExERof379+vMmTMZ1nbx4kUlJiY6PAAAAAAAyIuyFf5HjhypOnXqqEmTJvLy8pKXl5eaN2+uBx54IEfP+f/zzz81ZcoUlS1bVkuXLtWzzz6rF154wX5RwdjYWElSUFCQw+uCgoLs62JjY1WkSBGH9a6uripQoIBDm4y2cWUfVxs9erQCAgLsj+LFi9/k3gIAAAAAcGtka9q/u7u7vv76a40cOVI7duywX4ivZMmSOVpcamqqatasaf9C4d5779Xu3bs1depUp99GcMiQIerfv7/9eWJiIl8AAAAAAADypGyF/zR333237r777pyqJZ2QkBBVqFDBYVn58uX17bffSpKCg4MlSSdOnFBISIi9zYkTJ1StWjV7m7i4OIdtXL58WadPn7a/Pjg4WCdOnHBok/Y8rc3VPDw85OHhkc09AwAAAAAg92Qr/KekpGjmzJlavny54uLilJqa6rB+xYoVOVJcvXr1tH//fodlBw4csM8wCAsLU3BwsJYvX24P+4mJidq4caOeffZZSVJ4eLji4+O1detW1ahRw15famqq6tSpY2/zyiuv6NKlS3Jzc5MkLVu2TPfcc4/DnQUAAAAAALgdZeuc/z59+qhPnz5KSUlRpUqVVLVqVYdHTunXr59++eUXjRo1Sr///rtmz56tadOmqVevXpIkm82mvn376o033tCCBQu0a9cudenSRaGhoWrbtq2k/2YKtGjRQs8884w2bdqkdevWqXfv3urcubNCQ0MlSY8//rjc3d0VHR2tPXv26Ouvv9bEiRMdpvUDAAAAAHC7ytbI/1dffaU5c+aoZcuWOV2Pg1q1aum7777TkCFD9PrrryssLEwTJkxQZGSkvc2gQYN07tw5de/eXfHx8apfv76WLFkiT09Pe5tZs2apd+/eatKkiVxcXNShQwe999579vUBAQH68ccf1atXL9WoUUOFChXSsGHDuM0fAAAAAMASbMYYk9UXhYaG6ueff76l5/vfbhITExUQEKCEhAT5+/s7u5w8J3rmZmeXgNvM9Khazi4BAAAAyNOykkOzNe3/xRdf1MSJE5WN7w0AAAAAAEAuy9a0/7Vr12rlypVavHixKlasaL9IXpp58+blSHEAAAAAAODmZSv8BwYGql27djldCwAAAAAAuAWyFf5nzJiR03UAAAAAAIBbJFvn/EvS5cuX9dNPP+nDDz/U2bNnJUnHjh1TUlJSjhUHAAAAAABuXrZG/g8dOqQWLVro8OHDunjxopo1ayY/Pz+NGTNGFy9e1NSpU3O6TgAAAAAAkE3ZGvnv06ePatasqTNnzsjLy8u+vF27dlq+fHmOFQcAAAAAAG5etkb+16xZo/Xr18vd3d1healSpXT06NEcKQwAAAAAAOSMbI38p6amKiUlJd3yI0eOyM/P76aLAgAAAAAAOSdb4b958+aaMGGC/bnNZlNSUpJee+01tWzZMqdqAwAAAAAAOSBb0/7feecdRUREqEKFCrpw4YIef/xxHTx4UIUKFdKXX36Z0zUCAAAAAICbkK3wX6xYMe3YsUNfffWVdu7cqaSkJEVHRysyMtLhAoAAAAAAAMD5shX+JcnV1VVPPPFETtYCAAAAAABugWyF/88+++y667t06ZKtYgAAAAAAQM7LVvjv06ePw/NLly7p/Pnzcnd3l7e3N+EfAAAAAIA8JFtX+z9z5ozDIykpSfv371f9+vW54B8AAAAAAHlMtsJ/RsqWLau33nor3awAAAAAAADgXDkW/qX/LgJ47NixnNwkAAAAAAC4Sdk653/BggUOz40xOn78uCZNmqR69erlSGEAAAAAACBnZCv8t23b1uG5zWZT4cKF9cADD+idd97JiboAAAAAAEAOyVb4T01Nzek6AAAAAADALZKj5/wDAAAAAIC8J1sj//3798902/Hjx2enCwAAAAAAkEOyFf63bdumbdu26dKlS7rnnnskSQcOHFC+fPlUvXp1ezubzZYzVQIAAAAAgGzLVvhv3bq1/Pz89Omnnyp//vySpDNnzuipp55SgwYN9OKLL+ZokQAAAAAAIPtsxhiT1RcVLVpUP/74oypWrOiwfPfu3WrevLmOHTuWYwXeLhITExUQEKCEhAT5+/s7u5w8J3rmZmeXAFzX9Khazi4BAAAAyJKs5NBsXfAvMTFRJ0+eTLf85MmTOnv2bHY2CQAAAAAAbpFshf927drpqaee0rx583TkyBEdOXJE3377raKjo9W+ffucrhEAAAAAANyEbJ3zP3XqVA0YMECPP/64Ll269N+GXF0VHR2tcePG5WiBAAAAAADg5mQr/Ht7e2vy5MkaN26c/vjjD0lS6dKl5ePjk6PFAQAAAACAm5etaf9pjh8/ruPHj6ts2bLy8fFRNq4dCAAAAAAAbrFshf9Tp06pSZMmuvvuu9WyZUsdP35ckhQdHc1t/gAAAAAAyGOyFf779esnNzc3HT58WN7e3vblnTp10pIlS3KsOAAAAAAAcPOydc7/jz/+qKVLl6pYsWIOy8uWLatDhw7lSGEAAAAAACBnZGvk/9y5cw4j/mlOnz4tDw+Pmy4KAAAAAADknGyF/wYNGuizzz6zP7fZbEpNTdXYsWPVuHHjHCsOAAAAAADcvGxN+x87dqyaNGmiLVu2KDk5WYMGDdKePXt0+vRprVu3LqdrBAAAAAAANyFbI/+VKlXSgQMHVL9+fbVp00bnzp1T+/bttW3bNpUuXTqnawQAAAAAADchyyP/ly5dUosWLTR16lS98sort6ImAAAAAACQg7I88u/m5qadO3feiloAAAAAAMAtkK1p/0888YSmT5+e07UAAAAAAIBbIFsX/Lt8+bI++eQT/fTTT6pRo4Z8fHwc1o8fPz5HigMAAAAAADcvS+H/zz//VKlSpbR7925Vr15dknTgwAGHNjabLeeqAwAAAAAANy1L4b9s2bI6fvy4Vq5cKUnq1KmT3nvvPQUFBd2S4gAAAAAAwM3L0jn/xhiH54sXL9a5c+dytCAAAAAAAJCzsnXBvzRXfxkAAAAAAADyniyFf5vNlu6cfs7xBwAAAAAgb8vSOf/GGEVFRcnDw0OSdOHCBfXs2TPd1f7nzZuXcxUCAAAAAICbkqXw37VrV4fnTzzxRI4WAwAAAAAAcl6Wwv+MGTNuVR0AAAAAAOAWuakL/gEAAAAAgLyP8A8AAAAAgMUR/gEAAAAAsLgsnfMPAFYVPXNzrvY3PapWrvYHAACAOxsj/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAAAAAAi7utwv9bb70lm82mvn372pdduHBBvXr1UsGCBeXr66sOHTroxIkTDq87fPiwWrVqJW9vbxUpUkQDBw7U5cuXHdr8/PPPql69ujw8PFSmTBnNnDkzF/YIAAAAAIBb77YJ/5s3b9aHH36oKlWqOCzv16+f/ve//2nu3LlatWqVjh07pvbt29vXp6SkqFWrVkpOTtb69ev16aefaubMmRo2bJi9TUxMjFq1aqXGjRtr+/bt6tu3r55++mktXbo01/YPAAAAAIBb5bYI/0lJSYqMjNRHH32k/Pnz25cnJCRo+vTpGj9+vB544AHVqFFDM2bM0Pr16/XLL79Ikn788Uft3btXX3zxhapVq6YHH3xQI0eO1AcffKDk5GRJ0tSpUxUWFqZ33nlH5cuXV+/evdWxY0e9++67TtlfAAAAAABy0m0R/nv16qVWrVqpadOmDsu3bt2qS5cuOSwvV66cSpQooQ0bNkiSNmzYoMqVKysoKMjeJiIiQomJidqzZ4+9zdXbjoiIsG8jIxcvXlRiYqLDAwAAAACAvMjV2QXcyFdffaVff/1VmzdvTrcuNjZW7u7uCgwMdFgeFBSk2NhYe5srg3/a+rR112uTmJiof//9V15eXun6Hj16tEaMGJHt/QIAAAAAILfk6ZH/v//+W3369NGsWbPk6enp7HIcDBkyRAkJCfbH33//7eySAAAAAADIUJ4e+d+6davi4uJUvXp1+7KUlBStXr1akyZN0tKlS5WcnKz4+HiH0f8TJ04oODhYkhQcHKxNmzY5bDftbgBXtrn6DgEnTpyQv79/hqP+kuTh4SEPD4+b3kcAd6bomelnM91K06Nq5Wp/AAAAyFvy9Mh/kyZNtGvXLm3fvt3+qFmzpiIjI+3/dnNz0/Lly+2v2b9/vw4fPqzw8HBJUnh4uHbt2qW4uDh7m2XLlsnf318VKlSwt7lyG2lt0rYBAAAAAMDtLE+P/Pv5+alSpUoOy3x8fFSwYEH78ujoaPXv318FChSQv7+/nn/+eYWHh+u+++6TJDVv3lwVKlTQk08+qbFjxyo2NlZDhw5Vr1697CP3PXv21KRJkzRo0CB169ZNK1as0Jw5c7Ro0aLc3WEAAAAAAG6BPB3+M+Pdd9+Vi4uLOnTooIsXLyoiIkKTJ0+2r8+XL58WLlyoZ599VuHh4fLx8VHXrl31+uuv29uEhYVp0aJF6tevnyZOnKhixYrp448/VkREhDN2CQAAAACAHGUzxhhnF2EFiYmJCggIUEJCgvz9/Z1dTp6T2+c3A3DEOf8AAADWk5UcmqfP+QcAAAAAADeP8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMW5OrsAAMCtFz1zc672Nz2qVq72BwAAgOtj5B8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGEfwAAAAAALM7V2QUAAKwneubmXO1velStXO0PAADgdsPIPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLc3V2AQAA3KzomZtzvc/pUbVyvU8AAIDsYuQfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4gj/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsLg8Hf5Hjx6tWrVqyc/PT0WKFFHbtm21f/9+hzYXLlxQr169VLBgQfn6+qpDhw46ceKEQ5vDhw+rVatW8vb2VpEiRTRw4EBdvnzZoc3PP/+s6tWry8PDQ2XKlNHMmTNv9e4BAAAAAJArXJ1dwPWsWrVKvXr1Uq1atXT58mW9/PLLat68ufbu3SsfHx9JUr9+/bRo0SLNnTtXAQEB6t27t9q3b69169ZJklJSUtSqVSsFBwdr/fr1On78uLp06SI3NzeNGjVKkhQTE6NWrVqpZ8+emjVrlpYvX66nn35aISEhioiIcNr+AwDyruiZm3O1v+lRtXK1PwAAYC02Y4xxdhGZdfLkSRUpUkSrVq1Sw4YNlZCQoMKFC2v27Nnq2LGjJOm3335T+fLltWHDBt13331avHixHnroIR07dkxBQUGSpKlTp+qll17SyZMn5e7urpdeekmLFi3S7t277X117txZ8fHxWrJkSYa1XLx4URcvXrQ/T0xMVPHixZWQkCB/f/9b+C7cnnL7j2QAsBrCPwAAuFpiYqICAgIylUPz9LT/qyUkJEiSChQoIEnaunWrLl26pKZNm9rblCtXTiVKlNCGDRskSRs2bFDlypXtwV+SIiIilJiYqD179tjbXLmNtDZp28jI6NGjFRAQYH8UL148Z3YSAAAAAIAcdtuE/9TUVPXt21f16tVTpUqVJEmxsbFyd3dXYGCgQ9ugoCDFxsba21wZ/NPWp627XpvExET9+++/GdYzZMgQJSQk2B9///33Te8jAAAAAAC3Qp4+5/9KvXr10u7du7V27VpnlyJJ8vDwkIeHh7PLAAAAAADghm6Lkf/evXtr4cKFWrlypYoVK2ZfHhwcrOTkZMXHxzu0P3HihIKDg+1trr76f9rzG7Xx9/eXl5dXTu8OAAAAAAC5Kk+Hf2OMevfure+++04rVqxQWFiYw/oaNWrIzc1Ny5cvty/bv3+/Dh8+rPDwcElSeHi4du3apbi4OHubZcuWyd/fXxUqVLC3uXIbaW3StgEAAAAAwO0sT0/779Wrl2bPnq3vv/9efn5+9nP0AwIC5OXlpYCAAEVHR6t///4qUKCA/P399fzzzys8PFz33XefJKl58+aqUKGCnnzySY0dO1axsbEaOnSoevXqZZ+237NnT02aNEmDBg1St27dtGLFCs2ZM0eLFi1y2r4DAAAAAJBT8vTI/5QpU5SQkKD7779fISEh9sfXX39tb/Puu+/qoYceUocOHdSwYUMFBwdr3rx59vX58uXTwoULlS9fPoWHh+uJJ55Qly5d9Prrr9vbhIWFadGiRVq2bJmqVq2qd955Rx9//LEiIiJydX8BAAAAALgVbMYY4+wirCAr91e8E0XP3OzsEgDgtjY9qpazSwAAAHlMVnJonh75BwAAAAAAN4/wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFgc4R8AAAAAAIsj/AMAAAAAYHGuzi4AuS965mZnlwAAyKLc/uyeHlUrV/sDAAC3FiP/AAAAAABYHOEfAAAAAACLI/wDAAAAAGBxhH8AAAAAACyO8A8AAAAAgMUR/gEAAAAAsDjCPwAAAAAAFkf4BwAAAADA4lydXQAAAMh7omduztX+pkfVytX+AAC40zDyDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWxzn/sKTki8/man/uHlNytT8AAAAAyApG/gEAAAAAsDhG/gEAgNNxdwEAAG4tRv4BAAAAALA4Rv6RKw5oYq72VypXewMAAACAvI2RfwAAAAAALI7wDwAAAACAxTHtH7mi1MXfnF0CAAAAANyxGPkHAAAAAMDiCP8AAAAAAFgc0/6BHJB88dlc7c/dY0qu9gcAAADg9sbIPwAAAAAAFsfIPwAAuONEz9yc631Oj6qV630CAJCGkX8AAAAAACyO8A8AAAAAgMUx7R+4DT1/Ymiu9vd+0Bu52h8AAACAnEX4B25D7wSeytX+3HO1NwAAAAA5jWn/AAAAAABYHOEfAAAAAACLY9o/AABALsjt2wtya0EAwJUY+QcAAAAAwOIY+QdwQ9xdAAAAALi9Ef4B3FBu313gL03U3eqTq30CAAAAVsa0fwAAAAAALI6RfwAAAAviAoMAgCsx8g8AAAAAgMUx8g8gTzqgibnaH9cYAAAAgJUR/gHkOaUu/pb7nXrkfpcAYCWcZgAAeRvT/gEAAAAAsDhG/gEAAHDbYaYBAGQNI/8AAAAAAFgcI/8AIC4wCAAAAGsj/AOAnHCRQS4wCAAAgFzEtH8AAAAAACyOkX8AAADgBrjAIIDbHeEfAJwg+eKzudqfu8eUXO0PAAAAeQvT/gEAAAAAsDhG/gHgDsDdDAAAN8KpDYC1Ef4B4A7A3QwA4PaS20EcgPUR/gEAAADkOmYaALmL8A8AyHHPnxiaq/29H/RGrvYHAABwuyH8AwBy3DuBp3K1vxf5sgEAAOC6CP8AAAAALI/TDHCnI/wDAG57uT3T4C9N5I4GAADgtkL4BwAgi0pd/E3Px+feqQacZgAAt5874Y4NzG64vRD+AQDIhtycbcBMAwBAXsSpFLcXwv8d6IAm5nqfpXK9RwCwltz+7ObLBgAArIXwfwcqdfE3Z5cAAMgCZ3xuH/DgywYAQN7CTIObQ/gHAADp5PYXDnzZAADArUX4BwAATpfrsxs8crc7AACcjfAPAADuOMkXn3V2Cbfci/EFc60v7kgBAHkf4f8qH3zwgcaNG6fY2FhVrVpV77//vmrXru3ssgAAALIkN+9IoYvP5uqXDRJfOABAVhH+r/D111+rf//+mjp1qurUqaMJEyYoIiJC+/fvV5EiRZxdHgAAQJ6Vq182SJLFZ2/85VEu1/vkWhiAtdmMMcbZReQVderUUa1atTRp0iRJUmpqqooXL67nn39egwcPvu5rExMTFRAQoISEBPn7++dGudn25Ic1nV0CAAAAkKty+1SY3L5F6/snTuZqf1Luf+mX2z/D2+Fq/1nJoYz8/5/k5GRt3bpVQ4YMsS9zcXFR06ZNtWHDhnTtL168qIsXL9qfJyQkSPrvzc/rkv9NcXYJAAAAQK4a7RGXe53Fd1do7vUmSRrtjAuZ/pu73eXmz/Dwv28rMfGjXOsvu9LyZ2bG9An//+eff/5RSkqKgoKCHJYHBQXpt9/SX4F49OjRGjFiRLrlxYsXv2U1AgAAAAByw3YFPDfH2UVk2tmzZxUQEHDdNoT/bBoyZIj69+9vf56amqrTp0+rYMGCstlsTqkpMTFRxYsX199//53nTz3AnYVjE3kVxybyKo5N5GUcn8ir7sRj0xijs2fPKjT0xnNNCP//p1ChQsqXL59OnDjhsPzEiRMKDg5O197Dw0MeHo5zawIDA29liZnm7+9/xxzsuL1wbCKv4thEXsWxibyM4xN51Z12bN5oxD+Nyy2u47bh7u6uGjVqaPny5fZlqampWr58ucLDw51YGQAAAAAAN4eR/yv0799fXbt2Vc2aNVW7dm1NmDBB586d01NPPeXs0gAAAAAAyDbC/xU6deqkkydPatiwYYqNjVW1atW0ZMmSdBcBzKs8PDz02muvpTsdAXA2jk3kVRybyKs4NpGXcXwir+LYvD6bycw9AQAAAAAAwG2Lc/4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzh30I++OADlSpVSp6enqpTp442bdrk7JJgYaNHj1atWrXk5+enIkWKqG3bttq/f79DmwsXLqhXr14qWLCgfH191aFDB504ccKhzeHDh9WqVSt5e3urSJEiGjhwoC5fvpybuwKLe+utt2Sz2dS3b1/7Mo5NOMvRo0f1xBNPqGDBgvLy8lLlypW1ZcsW+3pjjIYNG6aQkBB5eXmpadOmOnjwoMM2Tp8+rcjISPn7+yswMFDR0dFKSkrK7V2BhaSkpOjVV19VWFiYvLy8VLp0aY0cOVJXXhecYxO5ZfXq1WrdurVCQ0Nls9k0f/58h/U5dSzu3LlTDRo0kKenp4oXL66xY8fe6l1zOsK/RXz99dfq37+/XnvtNf3666+qWrWqIiIiFBcX5+zSYFGrVq1Sr1699Msvv2jZsmW6dOmSmjdvrnPnztnb9OvXT//73/80d+5crVq1SseOHVP79u3t61NSUtSqVSslJydr/fr1+vTTTzVz5kwNGzbMGbsEC9q8ebM+/PBDValSxWE5xyac4cyZM6pXr57c3Ny0ePFi7d27V++8847y589vbzN27Fi99957mjp1qjZu3CgfHx9FRETowoUL9jaRkZHas2ePli1bpoULF2r16tXq3r27M3YJFjFmzBhNmTJFkyZN0r59+zRmzBiNHTtW77//vr0NxyZyy7lz51S1alV98MEHGa7PiWMxMTFRzZs3V8mSJbV161aNGzdOw4cP17Rp0275/jmVgSXUrl3b9OrVy/48JSXFhIaGmtGjRzuxKtxJ4uLijCSzatUqY4wx8fHxxs3NzcydO9feZt++fUaS2bBhgzHGmB9++MG4uLiY2NhYe5spU6YYf39/c/HixdzdAVjO2bNnTdmyZc2yZctMo0aNTJ8+fYwxHJtwnpdeesnUr1//mutTU1NNcHCwGTdunH1ZfHy88fDwMF9++aUxxpi9e/caSWbz5s32NosXLzY2m80cPXr01hUPS2vVqpXp1q2bw7L27dubyMhIYwzHJpxHkvnuu+/sz3PqWJw8ebLJnz+/w//pL730krnnnntu8R45FyP/FpCcnKytW7eqadOm9mUuLi5q2rSpNmzY4MTKcCdJSEiQJBUoUECStHXrVl26dMnhuCxXrpxKlChhPy43bNigypUrKygoyN4mIiJCiYmJ2rNnTy5WDyvq1auXWrVq5XAMShybcJ4FCxaoZs2aeuSRR1SkSBHde++9+uijj+zrY2JiFBsb63BsBgQEqE6dOg7HZmBgoGrWrGlv07RpU7m4uGjjxo25tzOwlLp162r58uU6cOCAJGnHjh1au3atHnzwQUkcm8g7cupY3LBhgxo2bCh3d3d7m4iICO3fv19nzpzJpb3Jfa7OLgA3759//lFKSorDH6mSFBQUpN9++81JVeFOkpqaqr59+6pevXqqVKmSJCk2Nlbu7u4KDAx0aBsUFKTY2Fh7m4yO27R1QHZ99dVX+vXXX7V58+Z06zg24Sx//vmnpkyZov79++vll1/W5s2b9cILL8jd3V1du3a1H1sZHXtXHptFihRxWO/q6qoCBQpwbCLbBg8erMTERJUrV0758uVTSkqK3nzzTUVGRkoSxybyjJw6FmNjYxUWFpZuG2nrrjwdy0oI/wBuWq9evbR7926tXbvW2aUA+vvvv9WnTx8tW7ZMnp6ezi4HsEtNTVXNmjU1atQoSdK9996r3bt3a+rUqeratauTq8OdbM6cOZo1a5Zmz56tihUravv27erbt69CQ0M5NgELYdq/BRQqVEj58uVLd6XqEydOKDg42ElV4U7Ru3dvLVy4UCtXrlSxYsXsy4ODg5WcnKz4+HiH9lcel8HBwRket2nrgOzYunWr4uLiVL16dbm6usrV1VWrVq3Se++9J1dXVwUFBXFswilCQkJUoUIFh2Xly5fX4cOHJf3/Y+t6/58HBwenu5jv5cuXdfr0aY5NZNvAgQM1ePBgde7cWZUrV9aTTz6pfv36afTo0ZI4NpF35NSxeKf+P0/4twB3d3fVqFFDy5cvty9LTU3V8uXLFR4e7sTKYGXGGPXu3VvfffedVqxYkW7qVI0aNeTm5uZwXO7fv1+HDx+2H5fh4eHatWuXwwf0smXL5O/vn+4PZCCzmjRpol27dmn79u32R82aNRUZGWn/N8cmnKFevXrpbol64MABlSxZUpIUFham4OBgh2MzMTFRGzdudDg24+PjtXXrVnubFStWKDU1VXXq1MmFvYAVnT9/Xi4ujrEgX758Sk1NlcSxibwjp47F8PBwrV69WpcuXbK3WbZsme655x7LTvmXxNX+reKrr74yHh4eZubMmWbv3r2me/fuJjAw0OFK1UBOevbZZ01AQID5+eefzfHjx+2P8+fP29v07NnTlChRwqxYscJs2bLFhIeHm/DwcPv6y5cvm0qVKpnmzZub7du3myVLlpjChQubIUOGOGOXYGFXXu3fGI5NOMemTZuMq6urefPNN83BgwfNrFmzjLe3t/niiy/sbd566y0TGBhovv/+e7Nz507Tpk0bExYWZv799197mxYtWph7773XbNy40axdu9aULVvWPPbYY87YJVhE165dTdGiRc3ChQtNTEyMmTdvnilUqJAZNGiQvQ3HJnLL2bNnzbZt28y2bduMJDN+/Hizbds2c+jQIWNMzhyL8fHxJigoyDz55JNm9+7d5quvvjLe3t7mww8/zPX9zU2Efwt5//33TYkSJYy7u7upXbu2+eWXX5xdEixMUoaPGTNm2Nv8+++/5rnnnjP58+c33t7epl27dub48eMO2/nrr7/Mgw8+aLy8vEyhQoXMiy++aC5dupTLewOruzr8c2zCWf73v/+ZSpUqGQ8PD1OuXDkzbdo0h/Wpqanm1VdfNUFBQcbDw8M0adLE7N+/36HNqVOnzGOPPWZ8fX2Nv7+/eeqpp8zZs2dzczdgMYmJiaZPnz6mRIkSxtPT09x1113mlVdecbgNGscmcsvKlSsz/Buza9euxpicOxZ37Nhh6tevbzw8PEzRokXNW2+9lVu76DQ2Y4xxzpwDAAAAAACQGzjnHwAAAAAAiyP8AwAAAABgcYR/AAAAAAAsjvAPAAAAAIDFEf4BAAAAALA4wj8AAAAAABZH+AcAAAAAwOII/wAAAAAAWBzhHwAA4Baw2WyaP3++s8sAAEAS4R8AgDzr5MmTevbZZ1WiRAl5eHgoODhYERERWrdunbNLyzPyQsAePny4qlWr5tQaAAC4EVdnFwAAADLWoUMHJScn69NPP9Vdd92lEydOaPny5Tp16pSzSwMAALcZRv4BAMiD4uPjtWbNGo0ZM0aNGzdWyZIlVbt2bQ0ZMkQPP/ywQ7unn35ahQsXlr+/vx544AHt2LHDYVtvvfWWgoKC5Ofnp+joaA0ePNhhpPr+++9X3759HV7Ttm1bRUVF2Z9fvHhRAwYMUNGiReXj46M6dero559/tq+fOXOmAgMDtXTpUpUvX16+vr5q0aKFjh8/7rDdTz75RBUrVpSHh4dCQkLUu3fvLO1LVn388ccqX768PD09Va5cOU2ePNm+7q+//pLNZtO8efPUuHFjeXt7q2rVqtqwYYPDNj766CMVL15c3t7eateuncaPH6/AwED7fo8YMUI7duyQzWaTzWbTzJkz7a/9559/1K5dO3l7e6ts2bJasGDBTe0PAADZRfgHACAP8vX1la+vr+bPn6+LFy9es90jjzyiuLg4LV68WFu3blX16tXVpEkTnT59WpI0Z84cDR8+XKNGjdKWLVsUEhLiEIAzq3fv3tqwYYO++uor7dy5U4888ohatGihgwcP2tucP39eb7/9tj7//HOtXr1ahw8f1oABA+zrp0yZol69eql79+7atWuXFixYoDJlymR6X7Jq1qxZGjZsmN58803t27dPo0aN0quvvqpPP/3Uod0rr7yiAQMGaPv27br77rv12GOP6fLly5KkdevWqWfPnurTp4+2b9+uZs2a6c0337S/tlOnTnrxxRdVsWJFHT9+XMePH1enTp3s60eMGKFHH31UO3fuVMuWLRUZGZnt/QEA4KYYAACQJ33zzTcmf/78xtPT09StW9cMGTLE7Nixw75+zZo1xt/f31y4cMHhdaVLlzYffvihMcaY8PBw89xzzzmsr1Onjqlatar9eaNGjUyfPn0c2rRp08Z07drVGGPMoUOHTL58+czRo0cd2jRp0sQMGTLEGGPMjBkzjCTz+++/29d/8MEHJigoyP48NDTUvPLKKxnua2b2JSOSzHfffZfhutKlS5vZs2c7LBs5cqQJDw83xhgTExNjJJmPP/7Yvn7Pnj1Gktm3b58xxphOnTqZVq1aOWwjMjLSBAQE2J+/9tprDu/nlbUNHTrU/jwpKclIMosXL77m/gAAcKsw8g8AQB7VoUMHHTt2TAsWLFCLFi30888/q3r16vZp5Tt27FBSUpIKFixonyng6+urmJgY/fHHH5Kkffv2qU6dOg7bDQ8Pz1Idu3btUkpKiu6++26HflatWmXvR5K8vb1VunRp+/OQkBDFxcVJkuLi4nTs2DE1adIkwz4ysy9Zce7cOf3xxx+Kjo522N4bb7yRbntVqlRxqDmtXknav3+/ateu7dD+6ufXc+W2fXx85O/vb982AAC5iQv+AQCQh3l6eqpZs2Zq1qyZXn31VT399NN67bXXFBUVpaSkJIWEhDice58m7Zz0zHBxcZExxmHZpUuX7P9OSkpSvnz5tHXrVuXLl8+hna+vr/3fbm5uDutsNpt9u15eXtetIaf25crtSf+dr3/1lx9X78OVddtsNklSampqlvvMSEbvSU5tGwCArCD8AwBwG6lQoYL91nbVq1dXbGysXF1dVapUqQzbly9fXhs3blSXLl3sy3755ReHNoULF3a4MF9KSop2796txo0bS5LuvfdepaSkKC4uTg0aNMhW3X5+fipVqpSWL19u3+6VMrMvWREUFKTQ0FD9+eefioyMzPZ27rnnHm3evNlh2dXP3d3dlZKSku0+AADIDYR/AADyoFOnTumRRx5Rt27dVKVKFfn5+WnLli0aO3as2rRpI0lq2rSpwsPD1bZtW40dO1Z33323jh07pkWLFqldu3aqWbOm+vTpo6ioKNWsWVP16tXTrFmztGfPHt111132vh544AH1799fixYtUunSpTV+/HjFx8fb1999992KjIxUly5d9M477+jee+/VyZMntXz5clWpUkWtWrXK1D4NHz5cPXv2VJEiRfTggw/q7NmzWrdunZ5//vlM7cu1xMTEaPv27Q7LypYtqxEjRuiFF15QQECAWrRooYsXL2rLli06c+aM+vfvn6man3/+eTVs2FDjx49X69attWLFCi1evNg+Q0CSSpUqZa+hWLFi8vPzk4eHR6a2DwBAbiH8AwCQB/n6+qpOnTp699139ccff+jSpUsqXry4nnnmGb388suS/ptC/sMPP+iVV17RU089pZMnTyo4OFgNGzZUUFCQpP+uRv/HH39o0KBBunDhgjp06KBnn31WS5cutffVrVs37dixQ126dJGrq6v69euXbnR+xowZeuONN/Tiiy/q6NGjKlSokO677z499NBDmd6nrl276sKFC3r33Xc1YMAAFSpUSB07dsz0vlxLRkF+zZo1evrpp+Xt7a1x48Zp4MCB8vHxUeXKldPd1vB66tWrp6lTp2rEiBEaOnSoIiIi1K9fP02aNMnepkOHDvbbBcbHx2vGjBkOt0kEACAvsJmrT/IDAACWNnz4cM2fPz/daDky55lnntFvv/2mNWvWOLsUAAAyjZF/AACA63j77bfVrFkz+fj4aPHixfr00081efJkZ5cFAECWEP4BAACuY9OmTRo7dqzOnj2ru+66S++9956efvppZ5cFAECWMO0fAAAAAACLc3F2AQAAAAAA4NYi/AMAAAAAYHGEfwAAAAAALI7wDwAAAACAxRH+AQAAAACwOMI/AAAAAAAWR/gHAAAAAMDiCP8AAAAAAFjc/wPbw9VywKTWYgAAAABJRU5ErkJggg==",
118
+ "text/plain": [
119
+ "<Figure size 1200x600 with 1 Axes>"
120
+ ]
121
+ },
122
+ "metadata": {},
123
+ "output_type": "display_data"
124
+ }
125
+ ],
126
+ "source": [
127
+ "# 2. Sequence lengths histogram\n",
128
+ "plt.figure(figsize=(12, 6))\n",
129
+ "for name, df in datasets.items():\n",
130
+ " plt.hist(df['Length'], bins=30, alpha=0.7, label=name, density=True)\n",
131
+ "\n",
132
+ "plt.title('UniRef50 (100k Sequences) - Train, Test, and Val Sequence Lengths')\n",
133
+ "plt.xlabel('Sequence Length')\n",
134
+ "plt.ylabel('Frequency')\n",
135
+ "plt.legend()\n",
136
+ "plt.show()"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": null,
142
+ "metadata": {},
143
+ "outputs": [],
144
+ "source": []
145
+ }
146
+ ],
147
+ "metadata": {
148
+ "kernelspec": {
149
+ "display_name": "Python 3",
150
+ "language": "python",
151
+ "name": "python3"
152
+ },
153
+ "language_info": {
154
+ "codemirror_mode": {
155
+ "name": "ipython",
156
+ "version": 3
157
+ },
158
+ "file_extension": ".py",
159
+ "mimetype": "text/x-python",
160
+ "name": "python",
161
+ "nbconvert_exporter": "python",
162
+ "pygments_lexer": "ipython3",
163
+ "version": "3.10.12"
164
+ }
165
+ },
166
+ "nbformat": 4,
167
+ "nbformat_minor": 2
168
+ }
data/uniref/100k_seqs/test.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/uniref/100k_seqs/train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa07e6863c4d4b3fdc707b539d9520d66fc8d52be68c9d1c444fa96abc3b77f
3
+ size 20059182
data/uniref/100k_seqs/val.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/uniref/200k_seqs/check_data.ipynb ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import seaborn as sns\n",
11
+ "import matplotlib.pyplot as plt\n",
12
+ "import numpy as np"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "path = \"/home/sg666/MDpLM/data/uniref50/200k_seqs\""
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "train = pd.read_csv(path + \"/train.csv\")\n",
31
+ "test = pd.read_csv(path + \"/test.csv\")\n",
32
+ "val = pd.read_csv(path + '/val.csv')"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 4,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "valid_residues = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V']\n",
42
+ "\n",
43
+ "for df in [train, test, val]:\n",
44
+ " df['Length'] = df['Sequence'].str.len()\n",
45
+ "\n",
46
+ " for residue in valid_residues:\n",
47
+ " df[residue] = 0\n",
48
+ "\n",
49
+ " for idx, row in df.iterrows():\n",
50
+ " sequence = row['Sequence']\n",
51
+ "\n",
52
+ " for residue in valid_residues:\n",
53
+ " df.at[idx, residue] = sequence.count(residue)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 5,
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "data": {
63
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/IAAAIjCAYAAACgdyAGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABru0lEQVR4nO3deZxO9f//8ec1+z62YewzhqyDEFkyxjaTIWRLZDCViiIhfGRJWSOy5pMlldJE8iWEkFCyZCkh2RlLGCGGmfP7w2+uj8tcsxgzc83hcb/drlvN+7yvc17nXGeOeV7nfc6xGIZhCAAAAAAAmIKTowsAAAAAAAAZR5AHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHcM+6du2qoKCgbF/OrVu3NGDAABUvXlxOTk5q1apVti/zbklJSapUqZLefffdHF92aoKCgtS8eXNHl4E0/P7773JxcdHevXsdXUqmDB8+XBaLxdFlPLQaNGigBg0a5Phyc+rYnllBQUHq2rVruv3mzZsni8WiI0eOZHtNuD8Z/UwBpESQBx5QyX+Inz9/3u70SpUqZekfihaLxebl5+ensLAwLV++PNPznDNnjsaPH6+2bdvq448/1uuvvy7p9j/8dy/PYrHopZdeSjGPS5cu6cUXX1RAQIC8vb0VHh6uHTt2ZLiGzz//XMePH1evXr2sbb/88ot69eqlihUrytvbWyVKlFD79u114MABu/PYt2+fIiMj5ePjo3z58um5557TuXPnUvRLSkrSuHHjFBwcLA8PD1WuXFmff/55hmvNiKSkJM2fP1+1atVSvnz55Ovrq0ceeURdunTRTz/9lKXLephVqFBBUVFRGjp0aJbON7V9/+7XvHnzsnS5WS05aKX3yqpQuXnzZg0fPlyXLl3Kkvk52o4dO2SxWDRkyJBU+xw8eFAWi0V9+/bN8uU3aNDA5nPy9PRU5cqVNWnSJCUlJWX58szk7m1z5+uPP/5wdHkAHiAuji4AgPn897//tfvHWpMmTdSlSxcZhqGjR49qxowZatGihVasWKGIiIh7Xs7333+vokWL6v33308xrWrVqnrjjTds2h555BGbn5OSkhQVFaVdu3apf//+KlCggKZPn64GDRpo+/btKlOmTLo1jB8/Xs8884z8/f2tbWPHjtWmTZvUrl07Va5cWXFxcZo6daqqVaumn376SZUqVbL2PXHihOrXry9/f3+NGjVKV65c0Xvvvac9e/Zo69atcnNzs/b9z3/+ozFjxuiFF17QY489pm+++UbPPvusLBaLnnnmmQxvt7S89tprmjZtmlq2bKlOnTrJxcVF+/fv14oVK1SqVCk9/vjjWbIcSC+99JKaNWumQ4cOKSQkJEvmOWnSJF25csX687fffqvPP/9c77//vgoUKGBtr1Onzn0tZ8iQIRo4cOB9zSMt9evX1yeffGLT9vzzz6tmzZp68cUXrW0+Pj5ZsrzNmzdrxIgR6tq1q/LkyZMl83SkatWqqVy5cvr888/1zjvv2O2zYMECSVLnzp2zpYZixYpp9OjRkqTz589rwYIFev3113Xu3LlsG8G0f/9+OTnl/nNQd26bOxUpUsQB1eRuZvlMgVzJAPBAGjZsmCHJOHfunN3pFStWNMLCwrJseZKMnj172rT9/vvvhiTjySefzNQ8w8PDjYoVK6ZoL1mypBEVFZXu+xcuXGhIMmJjY61tZ8+eNfLkyWN07Ngx3ffv2LHDkGSsWbPGpn3Tpk3GjRs3bNoOHDhguLu7G506dbJpf/nllw1PT0/j6NGj1rbVq1cbkowPP/zQ2nbixAnD1dXVZhsmJSUZTzzxhFGsWDHj1q1b1vaMrv/d4uLiDIvFYrzwwgsppiUlJRlnzpy553kidQkJCUbevHmNt956K9uWMX78eEOScfjw4TT7XblyJdtqyCre3t5GdHR0tsw7o9sptwgLC0v3+Dxy5EhDkrFlyxa708uWLWuUK1funpYbHR1tlCxZMkP13X1s/vfff42SJUsavr6+NscrR5g7d67DPm972yY9Zvj9BJD78BUYAEnS+vXrZbFY9OWXX+rdd99VsWLF5OHhoUaNGunPP/+06ZvR6yjLly+vAgUK6NChQzbtN27c0LBhw1S6dGm5u7urePHiGjBggG7cuCFJOnLkiCwWi9atW6fffvvNOixx/fr1NvNJSEjQ1atXU13+V199pUKFCunpp5+2tgUEBKh9+/b65ptvrMtLzZIlS+Tm5qb69evbtNepU8fmTLoklSlTRhUrVtS+ffts2hctWqTmzZurRIkS1rbGjRvrkUce0Zdffmlt++abb3Tz5k298sor1jaLxaKXX35ZJ06c0JYtW9Ks9eOPP5aLi4v69++fap/Dhw/LMAzVrVs3xTSLxaKCBQvatF26dEl9+vRR8eLF5e7urtKlS2vs2LEpRmNcunRJXbt2lb+/v/LkyaPo6Gj9+uuvKYZ4p3bdr739KSkpSZMmTVLFihXl4eGhQoUKqUePHrp48aJNv+T7Bfz444+qWbOmPDw8VKpUKc2fPz/Fci5duqTXX39dQUFBcnd3V7FixdSlSxeby0/S2zeTrV69WvXq1VOePHnk4+OjsmXLavDgwTZ9XF1d1aBBA33zzTcpaslOXbt2lY+Pjw4dOqRmzZrJ19dXnTp1kiRt3LhR7dq1U4kSJazr9/rrr+vff/+1mYe9a+QtFot69eqlJUuWqFKlSnJ3d1fFihW1cuXKbFuXkydPqnv37ipUqJB1eXPmzEnRb8qUKapYsaK8vLyUN29e1ahRw3pGevjw4dbfi+DgYOvx5F6vn75w4YL69eun0NBQ+fj4yM/PT08++aR27dpl0+9ejqWSNGvWLIWEhMjT01M1a9bUxo0bM1RP8meavJ532r59u/bv32/t88033ygqKkpFihSRu7u7QkJCNHLkSCUmJt7TNkiLh4eHHnvsMf3zzz86e/aszbRPP/1U1atXl6enp/Lly6dnnnlGx48ft+lz8OBBtWnTRoGBgfLw8FCxYsX0zDPPKD4+3trH3vXUv/32mxo2bChPT08VK1ZM77zzjt0RYxaLRcOHD0/Rbm+eGT32ZUZav58ZPe4ZhqF33nlHxYoVk5eXl8LDw/Xbb7+lWJfU7nWR2j0EVqxYoSeeeELe3t7y9fVVVFSUfvvtN7v1nzx5Uq1atZKPj48CAgLUr1+/FPtTUlKSJk+erNDQUHl4eCggIECRkZHatm2btc/9bP8vvvhC1atXl6+vr/z8/BQaGqrJkyenuf2BBwlD6wHYGDNmjJycnNSvXz/Fx8dr3Lhx6tSpk37++ed7nld8fLwuXrxoM6w4KSlJTz31lH788Ue9+OKLKl++vPbs2aP3339fBw4c0JIlSxQQEKBPPvlE7777rq5cuWIdoli+fHnrfL7//nt5eXkpMTFRJUuW1Ouvv67evXvbLH/nzp2qVq1aimF7NWvW1KxZs3TgwAGFhoamWv/mzZtVqVIlubq6pruuhmHozJkzqlixorXt5MmTOnv2rGrUqJGif82aNfXtt9/a1Ort7W2zjsn9kqfXq1fP7rJnzZqll156SYMHD051mK0klSxZUpIUGxurdu3aycvLK9W+165dU1hYmE6ePKkePXqoRIkS2rx5swYNGqTTp09r0qRJ1vVu2bKlfvzxR7300ksqX768vv76a0VHR6c674zo0aOH5s2bp27duum1117T4cOHNXXqVO3cuVObNm2y+Uz+/PNPtW3bVjExMYqOjtacOXPUtWtXVa9e3fp5XLlyRU888YT27dun7t27q1q1ajp//ryWLl2qEydOqECBAhnaN6XbwaF58+aqXLmy3n77bbm7u+vPP//Upk2bUqxH9erV9c033+jy5cvy8/O7r21yL27duqWIiAjVq1dP7733nvWzjo2N1bVr1/Tyyy8rf/782rp1q6ZMmaITJ04oNjY23fn++OOPWrx4sV555RX5+vrqgw8+UJs2bXTs2DHlz58/S9fhzJkzevzxx61fIAQEBGjFihWKiYnR5cuX1adPH0m3L/V57bXX1LZtW/Xu3VvXr1/X7t279fPPP+vZZ5/V008/rQMHDqS4BCEgIOCe6vnrr7+0ZMkStWvXTsHBwTpz5ow+/PBDhYWF6ffff08xbDojx9LZs2erR48eqlOnjvr06aO//vpLTz31lPLly6fixYunWU9wcLDq1KmjL7/8Uu+//76cnZ2t05LD/bPPPivpdnDz8fFR37595ePjo++//15Dhw7V5cuXNX78+HvaDmlJ/hL2zssX3n33Xb311ltq3769nn/+eZ07d05TpkxR/fr1tXPnTuXJk0cJCQmKiIjQjRs39OqrryowMFAnT57UsmXLdOnSJZtLm+4UFxen8PBw3bp1SwMHDpS3t7dmzZolT0/PTK9DRo99aUlMTExxfxoPDw/rpSKp/X5m9Lg3dOhQvfPOO2rWrJmaNWumHTt2qGnTpkpISMj0en/yySeKjo5WRESExo4dq2vXrmnGjBmqV6+edu7cafNla2JioiIiIlSrVi299957WrNmjSZMmKCQkBC9/PLL1n4xMTGaN2+ennzyST3//PO6deuWNm7cqJ9++snuv4tSxrf/6tWr1bFjRzVq1Ehjx46VdPt+NJs2bUrxtwDwwHLsgAAA2eVeh9avW7fOkGSUL1/eZtj45MmTDUnGnj17rG32hl9KMmJiYoxz584ZZ8+eNbZt22ZERkYakozx48db+33yySeGk5OTsXHjRpv3z5w505BkbNq0ydqW2hDFFi1aGGPHjjWWLFlizJ4923jiiScMScaAAQNs+nl7exvdu3dP8f7ly5cbkoyVK1fa3TbJihUrZrRp0ybNPneulyRj9uzZ1rZffvnFkGTMnz8/Rf/+/fsbkozr168bhmEYUVFRRqlSpVL0u3r1qiHJGDhwoLXtzqH1kydPNiwWizFy5MgM1dmlSxdDkpE3b16jdevWxnvvvWfs27cvRb+RI0ca3t7exoEDB2zaBw4caDg7OxvHjh0zDMMwlixZYkgyxo0bZ+1z69Yt62cyd+5ca3tqw4Xv3p82btxoSDI+++wzm34rV65M0V6yZElDkvHDDz9Y286ePWu4u7sbb7zxhrVt6NChhiRj8eLFKZaflJRkGEbG9833338/zd+tOy1YsMCQZPz888/p9s0Me0PGo6OjU+wzya5du5aibfTo0YbFYrG5/CP5+HEnSYabm5vx559/Wtt27dplSDKmTJly3+ty99D6mJgYo3Dhwsb58+dt+j3zzDOGv7+/dV1atmyZ7lDmrBhaf/36dSMxMdGm7fDhw4a7u7vx9ttvW9syeixNSEgwChYsaFStWtWm36xZswxJGbr0adq0aYYkY9WqVda2xMREo2jRokbt2rWtbfY+9x49ehheXl7WY5Bh3NvQ+nLlyhnnzp0zzp07Z/zxxx/WY9qdl/0cOXLEcHZ2Nt59912b9+/Zs8dwcXGxtu/cuTPFZVD2lCxZ0mYf6dOnT4rfr7Nnzxr+/v4pPm9JxrBhw9KdZ0aPfakJCwszJKV4JS8jtd/PjB73zp49a7i5uRlRUVHWY5dhGMbgwYNtlmMY9n+PDSPlpQf//POPkSdPnhSXXcXFxRn+/v427cn137nPG4ZhPProo0b16tWtP3///feGJOO1115Lsfw7687s9u/du7fh5+fn8Ms4AEdiaD0AG926dbMZNv7EE09Iun02Kj2zZ89WQECAChYsqBo1amjt2rUaMGCAzV2TY2NjVb58eZUrV07nz5+3vho2bChJWrduXbrLWbp0qQYMGKCWLVuqe/fu2rBhgyIiIjRx4kSdOHHC2u/ff/+Vu7t7ivd7eHhYp6fl77//Vt68edOt548//lDPnj1Vu3ZtmzPRyfPPSA2ZqXXcuHHq3bu3xo4dm+bdq+80d+5cTZ06VcHBwfr666/Vr18/lS9fXo0aNdLJkyet/WJjY/XEE08ob968Np9T48aNlZiYqB9++EHS7Zutubi42JyFcXZ21quvvpqheuyJjY2Vv7+/mjRpYrPs6tWry8fHJ8U+UqFCBet+Kt0+01q2bFmbfXbRokWqUqWKWrdunWJ5yUNPM7pvJp9t/Oabb9Idapu8/6T29IjsdOdnkuzOM5VXr17V+fPnVadOHRmGoZ07d6Y7z8aNG9uMsKlcubL8/PwydHy4F4ZhaNGiRWrRooUMw7D5PCIiIhQfH299+kSePHl04sQJ/fLLL1law93c3d2to3sSExP1999/Wy+rsPckjPSOpdu2bdPZs2f10ksv2fRLvkwlIzp06CBXV1eb4fUbNmzQyZMnrcO1JdvP/Z9//tH58+f1xBNP6Nq1a5m+k/off/yhgIAABQQEqFy5cho/fryeeuopm8tpFi9erKSkJLVv397mMwwMDFSZMmWsv1PJ67tq1Spdu3YtwzV8++23evzxx60jl6Tbv/93rvu9yuixLy1BQUFavXq1zWvAgAE2fe7+/czocW/NmjVKSEjQq6++ajNsPnmESmasXr1aly5dUseOHW2W7ezsrFq1atn9d/nup8Q88cQTKY65FotFw4YNS/HetB5tmdHtnydPHl29elWrV6/O7GoDpsfQeuAhZu8f0zuv5Zb+F0TuvkbPnpYtW6pXr15KSEjQL7/8olGjRunatWs2Q9sPHjyoffv2pTqs9e5rKzPCYrHo9ddf16pVq7R+/XrrXZo9PT3tXgd//fp16/T0GIaR5vS4uDhFRUXJ399fX331lc3w1uT5Z6SGe611w4YNWr58ud588800r4u/m5OTk3r27KmePXvq77//1qZNmzRz5kytWLFCzzzzjPX63IMHD2r37t3pfk5Hjx5V4cKFU9xdvGzZshmu6W4HDx5UfHx8imv27152srv3Wen2fnvnPnvo0CG1adMm3eVmZN/s0KGDPvroIz3//PMaOHCgGjVqpKefflpt27ZNcRlH8v6T1h+uCQkJunDhgk1bQECAzb50r1xcXFSsWLEU7ceOHdPQoUO1dOnSFL/Td16LnJqMbOuscO7cOV26dEmzZs3SrFmz7PZJ/jzefPNNrVmzRjVr1lTp0qXVtGlTPfvss3bvBXE/kq/3nT59ug4fPmxzPbC9ywrSO5YePXpUklI8PcPV1VWlSpXKUE358+dXRESEvv76a82cOVMeHh5asGCBXFxc1L59e2u/3377TUOGDNH333+vy5cv28wjI5+7PUFBQdYnmBw6dEjvvvuuzp07Z/3yUbr9O2UYRqpPCEkeKh4cHKy+fftq4sSJ+uyzz/TEE0/oqaeeUufOndP8UuPo0aOqVatWivb7Pf5k5NiXFm9vbzVu3DjV6fZ+PzN63EttvwkICMjQF8/2HDx4UJKsX1re7e7LgpKvd7+TvWNukSJFlC9fvnuuJSPb/5VXXtGXX36pJ598UkWLFlXTpk3Vvn17RUZG3tPyADMjyAMPqPTOOl+7ds3mD65kqYWH9AKtdPuRO8l/vDRr1kwFChRQr169FB4ebr3hXFJSkkJDQzVx4kS780jvutDUJL/vzkBUuHBhnT59OkXf5Lb0HgWUP3/+NANKfHy8nnzySV26dEkbN25MMb/ChQvbLO/uGvLly2c9C1+4cGGtW7dOhmHYhL7Uaq1YsaIuXbqkTz75RD169FBwcHCa65La+j311FN66qmn1KBBA23YsEFHjx5VyZIllZSUpCZNmqQ4i5Ts7kf9ZYTFYrG7H9m7QVLBggX12Wef2Z3P3X/g3c8+e/dyM7Jvenp66ocfftC6deu0fPlyrVy5UgsXLlTDhg313Xff2dSTvP/c+Wi4u23evFnh4eE2bYcPH76vZ6jfefY4WWJiopo0aaILFy7ozTffVLly5eTt7a2TJ0+qa9euGbqRV1Zt6/Qk19K5c+dU77dQuXJlSbfvnbF//34tW7ZMK1eu1KJFizR9+nQNHTpUI0aMyLKaRo0apbfeekvdu3fXyJEjlS9fPjk5OalPnz52t11ObavOnTtr2bJlWrZsmZ566iktWrRITZs2tf6eXLp0SWFhYfLz89Pbb7+tkJAQeXh4aMeOHXrzzTczfQO3u8Nq3bp1Va1aNQ0ePFgffPCBpNufo8Vi0YoVK+xujzu/AJwwYYK6du2qb775Rt99951ee+01jR49Wj/99JPdL6Wyir3jT1Yf++5m7/fzXo97GZHaF4j21lm6fZ18YGBgiv4uLrZx4X6+ZExPRrd/wYIF9euvv2rVqlVasWKFVqxYoblz56pLly76+OOPs60+IDchyAMPqOQbm+3fvz9FOL527ZqOHz+upk2bZmsNPXr00Pvvv68hQ4aodevWslgsCgkJ0a5du9SoUaM0z1Leq+QhfXf+sVO1alVt3LhRSUlJNn80/fzzz/Ly8kr3D7Jy5crp8OHDdqddv35dLVq00IEDB7RmzRpVqFAhRZ+iRYsqICDA5g69ybZu3aqqVava1PrRRx9p3759NvNKvjHWnX2l28Hwq6++Ur169dSoUSP9+OOP9/WM4ho1amjDhg06ffq0SpYsqZCQEF25ciXNs0rS7f1s7dq1unLlis0f5fv370/RN2/evHaHYCefYUoWEhKiNWvWqG7duvd106q757l37950+2R033RyclKjRo3UqFEjTZw4UaNGjdJ//vMfrVu3zmabHT58WE5OTmnua1WqVEkxPNTeH9P3a8+ePTpw4IA+/vhjdenSxdqeG4emBgQEyNfXV4mJienug9LtUNmhQwd16NBBCQkJevrpp/Xuu+9q0KBB8vDwyJJjzVdffaXw8HDNnj3bpv3SpUtpflGTmuRj9MGDB23OhN68eVOHDx9WlSpVMjSfp556Sr6+vlqwYIFcXV118eJFm6Hl69ev199//63FixfbPIEjtWNbZlWuXFmdO3fWhx9+qH79+qlEiRIKCQmRYRgKDg7OUAAODQ1VaGiohgwZos2bN6tu3bqaOXNmqjfxLFmypPVs8p1SO/5cunTJpi0hISHFF60ZPfZltYwe9+7cb+4cuXHu3LkUXzwnn6G/dOmSzQ0I7R1zpdvhOKvWOyQkRKtWrdKFCxfu6az8vWx/Nzc3tWjRQi1atFBSUpJeeeUVffjhh3rrrbdUunTp+ykfMAWukQceUI0aNZKbm5tmzJiR4ozLrFmzdOvWLT355JPZWoOLi4veeOMN7du3z/oIrvbt2+vkyZP673//m6L/v//+m+bj5KTbZ9zvPptw8+ZNjRkzRm5ubjZnNtu2baszZ85o8eLF1rbz588rNjZWLVq0sHtN+p1q166tvXv3phjynpiYqA4dOmjLli2KjY1V7dq1U51HmzZttGzZMptHLa1du1YHDhxQu3btrG0tW7aUq6urpk+fbm0zDEMzZ85U0aJFVadOnRTzLlasmNasWaN///1XTZo00d9//53m+sTFxen3339P0Z6QkKC1a9fKycnJ+sdP+/bttWXLFq1atSpF/0uXLunWrVuSbo+8uHXrlmbMmGGdnpiYqClTpqR4X0hIiP744w+dO3fO2rZr164Ud3tv3769EhMTNXLkyBTzuHXrVoo/xjOiTZs22rVrl77++usU05LPkGZ037x7GLz0vy9a7t5Xtm/frooVK6Y5PDhv3rxq3LixzcveaJn7lXwW7c4zwoZh5MrHNTk7O6tNmzZatGiR3S9g7tyH7t7v3dzcVKFCBRmGoZs3b0q6HfQlZWrfubOmu8+mx8bG2txb4l7UqFFDAQEBmjlzps3dxufNm3dPdXp6eqp169b69ttvNWPGDHl7e6tly5Y2dUu2n3tCQoLNsSarDBgwQDdv3rSOann66afl7OysESNGpNh2hmFYP7vLly9bjynJQkND5eTklOZjQps1a6affvpJW7dutbadO3fO7lntkJCQFNe3z5o1K8W/Jxk99mW1jB73GjduLFdXV02ZMsVmm9q7m35yQL9zva9evZrijHVERIT8/Pw0atQo6+/Mne78fcuoNm3ayDAMu6Ni0hqVktHtf/fvvZOTk3WUTnqPlgUeFJyRBx5QBQsW1NChQzVkyBDVr19fTz31lLy8vLR582Z9/vnnatq0qVq0aJHtdXTt2lVDhw7V2LFj1apVKz333HP68ssv9dJLL2ndunWqW7euEhMT9ccff+jLL7/UqlWrUn0sjXT7RnfvvPOO2rZtq+DgYF24cEELFizQ3r17NWrUKJszmW3bttXjjz+ubt266ffff1eBAgU0ffp0JSYmZmjIbcuWLTVy5Eht2LDBZvTCG2+8oaVLl6pFixa6cOGCPv30U5v3JV+jL0mDBw9WbGyswsPD1bt3b125ckXjx49XaGiounXrZu1XrFgx9enTR+PHj9fNmzf12GOPacmSJdq4caM+++yzVIcyli5dWt99950aNGigiIgIff/996k+5uzEiROqWbOmGjZsqEaNGikwMFBnz57V559/rl27dqlPnz7WM4v9+/fX0qVL1bx5c+uj3K5evao9e/boq6++0pEjR1SgQAG1aNFCdevW1cCBA3XkyBFVqFBBixcvtnvdbffu3TVx4kRFREQoJiZGZ8+e1cyZM1WxYkWb63bDwsLUo0cPjR49Wr/++quaNm0qV1dXHTx4ULGxsZo8ebLatm2b7ud3p/79++urr75Su3bt1L17d1WvXl0XLlzQ0qVLNXPmTFWpUiXD++bbb7+tH374QVFRUSpZsqTOnj2r6dOnq1ixYjaPCLx586Y2bNigV1555Z5qzS7lypVTSEiI+vXrp5MnT8rPz0+LFi3K8uvbpdtDesPCwrR+/fpMz2PMmDFat26datWqpRdeeEEVKlTQhQsXtGPHDq1Zs8b6hUrTpk0VGBiounXrqlChQtq3b5+mTp2qqKgo+fr6Srr9GEBJ+s9//qNnnnlGrq6uatGihby9vTV8+HCNGDFC69atU4MGDVKtp3nz5nr77bfVrVs31alTR3v27NFnn32W4evZ7+bq6qp33nlHPXr0UMOGDdWhQwcdPnxYc+fOved5du7cWfPnz9eqVavUqVMn6xcXklSnTh3lzZtX0dHReu2112SxWPTJJ59k+RB/6faNJ5s1a6aPPvpIb731lkJCQvTOO+9o0KBBOnLkiFq1aiVfX18dPnxYX3/9tV588UX169dP33//vXr16qV27drpkUce0a1bt/TJJ59Yv9BJzYABA/TJJ58oMjJSvXv3tj5+rmTJktq9e7dN3+eff14vvfSS2rRpoyZNmmjXrl1atWpVitEUGT32ZbWMHveSn9k+evRoNW/eXM2aNdPOnTu1YsWKFHU1bdpUJUqUUExMjPr37y9nZ2fNmTNHAQEBOnbsmLWfn5+fZsyYoeeee07VqlXTM888Y+2zfPly1a1bV1OnTr2n9QkPD9dzzz2nDz74QAcPHlRkZKSSkpK0ceNGhYeHq1evXnbfl9Ht//zzz+vChQtq2LChihUrpqNHj2rKlCmqWrVqise4Ag+snLo9PgDH+PTTT43HH3/c8Pb2Ntzd3Y1y5coZI0aMsHnkkGH875FJdz/+5/DhwykeI5ba4+d69uxpt4bhw4cbkox169YZhnH7sUtjx441KlasaLi7uxt58+Y1qlevbowYMcKIj4+3vs/e4+e2bdtmtGjRwihatKjh5uZm+Pj4GPXq1TO+/PJLu8u+cOGCERMTY+TPn9/w8vIywsLCjF9++SWtTWajcuXKRkxMjE1bao8XSn7dbe/evUbTpk0NLy8vI0+ePEanTp2MuLi4FP0SExONUaNGGSVLljTc3NyMihUrGp9++mmKfnc+fi7Zzz//bPj6+hr169e3+6gpwzCMy5cvG5MnTzYiIiKMYsWKGa6uroavr69Ru3Zt47///a/NI4EM4/YjiQYNGmSULl3acHNzMwoUKGDUqVPHeO+994yEhARrv7///tt47rnnDD8/P8Pf39947rnnrI+TunO/MYzb+2OpUqUMNzc3o2rVqsaqVatSfeTVrFmzjOrVqxuenp6Gr6+vERoaagwYMMA4depUmtvCMOw/6u7vv/82evXqZd13ihUrZkRHR9s83iwj++batWuNli1bGkWKFDHc3NyMIkWKGB07dkzxuKQVK1YYkoyDBw/a/TyyQmqPn/P29rbb//fffzcaN25s+Pj4GAUKFDBeeOEF6yPk7vysUnv8nL3f8bsfH/XPP/8Ykoxnnnnmntbl7sfPGYZhnDlzxujZs6dRvHhxw9XV1QgMDDQaNWpkzJo1y9rnww8/NOrXr2/kz5/fcHd3N0JCQoz+/fvbHEsM4/ZjrYoWLWo4OTnZbLM33njDsFgsdh/DeKfr168bb7zxhlG4cGHD09PTqFu3rrFly5YU+9q9HEsNwzCmT59uBAcHG+7u7kaNGjWMH374IdVHNabm1q1bRuHChQ1Jxrfffpti+qZNm4zHH3/c8PT0NIoUKWIMGDDAWLVqlc1x2TDu7fFzqT3yb/369Ske9bZo0SKjXr16hre3t+Ht7W2UK1fO6Nmzp7F//37DMAzjr7/+Mrp3726EhIQYHh4eRr58+Yzw8HBjzZo1NvO+e18zDMPYvXu3ERYWZnh4eBhFixY1Ro4cacyePTvF70ViYqLx5ptvGgUKFDC8vLyMiIgI488//7Q7z4we++512xhG2r+fhpGx415iYqIxYsQI677YoEEDY+/evXbXZfv27UatWrUMNzc3o0SJEsbEiRNTPH4u2bp164yIiAjD39/f8PDwMEJCQoyuXbsa27ZtS7d+e8eMW7duGePHjzfKlStnuLm5GQEBAcaTTz5pbN++3dons9v/q6++Mpo2bWoULFjQum49evQwTp8+neq2BR40FsPIhq9kAeAB8cknn6hnz546duyYzTWGSNuRI0cUHBysuXPnqmvXro4uxyFatWoli8Vidzj/g+zbb79V8+bNtWvXLoWGhjq6nHTVrFlTJUuWVGxsrKNLAe5LUFCQGjRoYPMIQAAPLq6RB4A0dOrUSSVKlNC0adMcXQpMZN++fVq2bJnd610fdOvWrdMzzzxjihB/+fJl7dq1S2+//bajSwEA4J5wjTwApMHJySndu50Ddytfvny23RQrtxs/fryjS8gwPz8/bowFADAlzsgDAAAAAGAiXCMPAAAAAICJcEYeAAAAAAATIcgDAAAAAGAi3OzOjqSkJJ06dUq+vr6yWCyOLgcAAAAA8IAzDEP//POPihQpIientM+5E+TtOHXqlIoXL+7oMgAAAAAAD5njx4+rWLFiafYhyNvh6+sr6fYG9PPzc3A1AAAAAIAH3eXLl1W8eHFrHk0LQd6O5OH0fn5+BHkAAAAAQI7JyOXd3OwOAAAAAAATIcgDAAAAAGAiDg/y06ZNU1BQkDw8PFSrVi1t3bo11b6//fab2rRpo6CgIFksFk2aNOm+5wkAAAAAgJk49Br5hQsXqm/fvpo5c6Zq1aqlSZMmKSIiQvv371fBggVT9L927ZpKlSqldu3a6fXXX8+SeQIAAAAA0peYmKibN286ugzTcnZ2louLS5Y84txiGIaRBTVlSq1atfTYY49p6tSpkm4/v7148eJ69dVXNXDgwDTfGxQUpD59+qhPnz5ZNs9kly9flr+/v+Lj47nZHQAAAICH3pUrV3TixAk5MD4+ELy8vFS4cGG5ubmlmHYvOdRhZ+QTEhK0fft2DRo0yNrm5OSkxo0ba8uWLTk6zxs3bujGjRvWny9fvpyp5QMAAADAgyYxMVEnTpyQl5eXAgICsuSM8sPGMAwlJCTo3LlzOnz4sMqUKSMnp8xf6e6wIH/+/HklJiaqUKFCNu2FChXSH3/8kaPzHD16tEaMGJGpZQIAAADAg+zmzZsyDEMBAQHy9PR0dDmm5enpKVdXVx09elQJCQny8PDI9LwcfrO73GDQoEGKj4+3vo4fP+7okgAAAAAgV+FM/P27n7Pwd3LYGfkCBQrI2dlZZ86csWk/c+aMAgMDc3Se7u7ucnd3z9QyAQAAAADISQ47I+/m5qbq1atr7dq11rakpCStXbtWtWvXzjXzBAAAAAAgN3Ho4+f69u2r6Oho1ahRQzVr1tSkSZN09epVdevWTZLUpUsXFS1aVKNHj5Z0+2Z2v//+u/X/T548qV9//VU+Pj4qXbp0huYJAAAAALh/QQOX5+jyjoyJytHl2ZPa09NymkODfIcOHXTu3DkNHTpUcXFxqlq1qlauXGm9Wd2xY8dsriE4deqUHn30UevP7733nt577z2FhYVp/fr1GZonAAAAAODBlt71/MOGDdPw4cPveb6//PKLvL29M1lV1nHoc+RzK54jDwAAAAC3Xb9+XYcPH1ZwcLDNndZz8xn5uLg46/8vXLhQQ4cO1f79+61tPj4+8vHxkXT70XCJiYlyccn+89ypbUvp3nIod60HAAAAADxQAgMDrS9/f39ZLBbrz3/88Yd8fX21YsUKVa9eXe7u7vrxxx916NAhtWzZUoUKFZKPj48ee+wxrVmzxma+QUFBmjRpkvVni8Wijz76SK1bt5aXl5fKlCmjpUuXZvv6EeQBAAAAAA+dgQMHasyYMdq3b58qV66sK1euqFmzZlq7dq127typyMhItWjRQseOHUtzPiNGjFD79u21e/duNWvWTJ06ddKFCxeytXaCPAAAAADgofP222+rSZMmCgkJUb58+VSlShX16NFDlSpVUpkyZTRy5EiFhISke4a9a9eu6tixo0qXLq1Ro0bpypUr2rp1a7bWTpAHAAAAADx0atSoYfPzlStX1K9fP5UvX1558uSRj4+P9u3bl+4Z+cqVK1v/39vbW35+fjp79my21JzMoXetBwAAAADAEe6++3y/fv20evVqvffeeypdurQ8PT3Vtm1bJSQkpDkfV1dXm58tFouSkpKyvN47EeQBAAAAAA+9TZs2qWvXrmrdurWk22fojxw54tiiUkGQBwAAOS69RxYd8Xg2/ZkMj8+iagAAkMqUKaPFixerRYsWslgseuutt7L9zHpmEeQBAIAphX4cmm6fPdF7cqASAHg43ctz3c1g4sSJ6t69u+rUqaMCBQrozTff1OXLlx1dll0WwzAMRxeR21y+fFn+/v6Kj4+Xn5+fo8sBAOCBkxVn5EODS6TbhyAPAPfv+vXrOnz4sIKDg+Xh4eHockwtrW15LzmUu9YDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiLo4uAAAAAABgQsP9c3h58Tm7vFyMM/IAAAAAgAeKxWJJ8zV8+PD7mveSJUuyrNbM4Iw8AAAAAOCBcvr0aev/L1y4UEOHDtX+/futbT4+Po4oK8twRh4AAAAA8EAJDAy0vvz9/WWxWGzavvjiC5UvX14eHh4qV66cpk+fbn1vQkKCevXqpcKFC8vDw0MlS5bU6NGjJUlBQUGSpNatW8tisVh/zmmckQcAAAAAPDQ+++wzDR06VFOnTtWjjz6qnTt36oUXXpC3t7eio6P1wQcfaOnSpfryyy9VokQJHT9+XMePH5ck/fLLLypYsKDmzp2ryMhIOTs7O2QdCPIAAAAAgIfGsGHDNGHCBD399NOSpODgYP3+++/68MMPFR0drWPHjqlMmTKqV6+eLBaLSpYsaX1vQECAJClPnjwKDAx0SP0SQR4AAAAA8JC4evWqDh06pJiYGL3wwgvW9lu3bsnf//Zd+Lt27aomTZqobNmyioyMVPPmzdW0aVNHlWwXQR4AAAAA8FC4cuWKJOm///2vatWqZTMteZh8tWrVdPjwYa1YsUJr1qxR+/bt1bhxY3311Vc5Xm9qCPIAAAAAgIdCoUKFVKRIEf3111/q1KlTqv38/PzUoUMHdejQQW3btlVkZKQuXLigfPnyydXVVYmJiTlYdUoEeQAAAADAQ2PEiBF67bXX5O/vr8jISN24cUPbtm3TxYsX1bdvX02cOFGFCxfWo48+KicnJ8XGxiowMFB58uSRdPvO9WvXrlXdunXl7u6uvHnz5vg6EOQBAAAAAPdueLyjK8iU559/Xl5eXho/frz69+8vb29vhYaGqk+fPpIkX19fjRs3TgcPHpSzs7Mee+wxffvtt3Jyuv309gkTJqhv377673//q6JFi+rIkSM5vg4WwzCMHF9qLnf58mX5+/srPj5efn5+ji4HAIAHTtDA5WlOP+LxbLrzCA0ukW6fPdF7MlwTAMC+69ev6/DhwwoODpaHh4ejyzG1tLblveRQp+wsEgAAAAAAZC2CPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBEXRxcAAAAAADCf0I9Dc3R5e6L35OjyGjRooKpVq2rSpEk5utyM4Iw8AAAAAOCB0qJFC0VGRtqdtnHjRlksFu3evTuHq8o6BHkAAAAAwAMlJiZGq1ev1okTJ1JMmzt3rmrUqKHKlSs7oLKsQZAHAAAAADxQmjdvroCAAM2bN8+m/cqVK4qNjVWrVq3UsWNHFS1aVF5eXgoNDdXnn3/umGIzgSAPAAAAAHiguLi4qEuXLpo3b54Mw7C2x8bGKjExUZ07d1b16tW1fPly7d27Vy+++KKee+45bd261YFVZxxBHgAAAADwwOnevbsOHTqkDRs2WNvmzp2rNm3aqGTJkurXr5+qVq2qUqVK6dVXX1VkZKS+/PJLB1accQR5AAAAAMADp1y5cqpTp47mzJkjSfrzzz+1ceNGxcTEKDExUSNHjlRoaKjy5csnHx8frVq1SseOHXNw1RlDkAcAAAAAPJBiYmK0aNEi/fPPP5o7d65CQkIUFham8ePHa/LkyXrzzTe1bt06/frrr4qIiFBCQoKjS84QgjwAAAAA4IHUvn17OTk5acGCBZo/f766d+8ui8WiTZs2qWXLlurcubOqVKmiUqVK6cCBA44uN8MI8gAAAACAB5KPj486dOigQYMG6fTp0+rataskqUyZMlq9erU2b96sffv2qUePHjpz5oxji70HLo4uAAAAAABgPnui9zi6hAyJiYnR7Nmz1axZMxUpUkSSNGTIEP3111+KiIiQl5eXXnzxRbVq1Urx8fEOrjZjCPIPg+H+Gehjjh0WAAAAAO5F7dq1bR5BJ0n58uXTkiVL0nzf+vXrs6+o+0SQN7mggcvT7XPEI/35hH4cmuZ0s3zbBgAAAAAPOq6RBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAANJ19w3jcO+yahsS5AEAAAAAqXJ2dpYkJSQkOLgS87t27ZokydXV9b7mw13rAQAAAACpcnFxkZeXl86dOydXV1c5OXE++F4ZhqFr167p7NmzypMnj/XLkcwiyAMAAAAAUmWxWFS4cGEdPnxYR48edXQ5ppYnTx4FBgbe93wI8gAAAACANLm5ualMmTIMr78Prq6u930mPhlBHgAAAACQLicnJ3l4eDi6DIib3QEAAAAAYCoEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATMThQX7atGkKCgqSh4eHatWqpa1bt6bZPzY2VuXKlZOHh4dCQ0P17bff2ky/cuWKevXqpWLFisnT01MVKlTQzJkzs3MVAAAAAADIMQ4N8gsXLlTfvn01bNgw7dixQ1WqVFFERITOnj1rt//mzZvVsWNHxcTEaOfOnWrVqpVatWqlvXv3Wvv07dtXK1eu1Keffqp9+/apT58+6tWrl5YuXZpTqwUAAAAAQLZxaJCfOHGiXnjhBXXr1s165tzLy0tz5syx23/y5MmKjIxU//79Vb58eY0cOVLVqlXT1KlTrX02b96s6OhoNWjQQEFBQXrxxRdVpUqVdM/0AwAAAABgBg4L8gkJCdq+fbsaN278v2KcnNS4cWNt2bLF7nu2bNli01+SIiIibPrXqVNHS5cu1cmTJ2UYhtatW6cDBw6oadOmqdZy48YNXb582eYFAAAAAEBu5LAgf/78eSUmJqpQoUI27YUKFVJcXJzd98TFxaXbf8qUKapQoYKKFSsmNzc3RUZGatq0aapfv36qtYwePVr+/v7WV/Hixe9jzQAAAAAAyD4Ov9ldVpsyZYp++uknLV26VNu3b9eECRPUs2dPrVmzJtX3DBo0SPHx8dbX8ePHc7BiAAAAAAAyzsVRCy5QoICcnZ115swZm/YzZ84oMDDQ7nsCAwPT7P/vv/9q8ODB+vrrrxUVFSVJqly5sn799Ve99957KYblJ3N3d5e7u/v9rhIAAAAAANnOYWfk3dzcVL16da1du9balpSUpLVr16p27dp231O7dm2b/pK0evVqa/+bN2/q5s2bcnKyXS1nZ2clJSVl8RoAAAAAAJDzHHZGXrr9qLjo6GjVqFFDNWvW1KRJk3T16lV169ZNktSlSxcVLVpUo0ePliT17t1bYWFhmjBhgqKiovTFF19o27ZtmjVrliTJz89PYWFh6t+/vzw9PVWyZElt2LBB8+fP18SJEx22ngAAAAAAZBWHBvkOHTro3LlzGjp0qOLi4lS1alWtXLnSekO7Y8eO2Zxdr1OnjhYsWKAhQ4Zo8ODBKlOmjJYsWaJKlSpZ+3zxxRcaNGiQOnXqpAsXLqhkyZJ699139dJLL+X4+gEAAAAAkNUshmEYji4it7l8+bL8/f0VHx8vPz8/R5eTpqCBy9Ptc8Tj2XT7hAaXSHP6nug9Ga4JAID0pPfvV1b82yXx7xcAwDzuJYc+cHetBwAAAADgQUaQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBEXRxcAAAAAIGsEDVyebp8jY6LS7RP6cWia0/dE78lwTQCyHkEeAAAAeJgM90+/T3CJ7K8DQKYxtB4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJiIi6MLAAA8XIIGLk+3z5ExUen2Cf04NM3pe6L3ZLgmAAAAM+GMPAAAAAAAJsIZeQBA7jPcP/0+wSWyvw4AAIBciDPyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIm4OLoAAAAAAAAyK/Tj0HT77InekwOV5ByCPAAAAADAIYIGLk+3zxGPZ9PuEFwii6oxD4bWAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMxMXRBQCAqQ33T2d6fM7UAQAAgIcGQR65S3qhSCIYwVRCPw5Nt8+e6D05UAkAAAAeFA4fWj9t2jQFBQXJw8NDtWrV0tatW9PsHxsbq3LlysnDw0OhoaH69ttvU/TZt2+fnnrqKfn7+8vb21uPPfaYjh07ll2rAAAAAABAjnHoGfmFCxeqb9++mjlzpmrVqqVJkyYpIiJC+/fvV8GCBVP037x5szp27KjRo0erefPmWrBggVq1aqUdO3aoUqVKkqRDhw6pXr16iomJ0YgRI+Tn56fffvtNHh4eOb16uEvQwOXp9jnCx4RchH0WAAAAuZFDg/zEiRP1wgsvqFu3bpKkmTNnavny5ZozZ44GDhyYov/kyZMVGRmp/v37S5JGjhyp1atXa+rUqZo5c6Yk6T//+Y+aNWumcePGWd8XEhKSA2uDnJLeUGWGKQMAAAB4kDlsaH1CQoK2b9+uxo0b/68YJyc1btxYW7ZssfueLVu22PSXpIiICGv/pKQkLV++XI888ogiIiJUsGBB1apVS0uWLEmzlhs3bujy5cs2LwAAAAAAciOHBfnz588rMTFRhQoVsmkvVKiQ4uLi7L4nLi4uzf5nz57VlStXNGbMGEVGRuq7775T69at9fTTT2vDhg2p1jJ69Gj5+/tbX8WLF7/PtQMAAAAAIHs4/GZ3WSkpKUmS1LJlS73++uuqWrWqBg4cqObNm1uH3tszaNAgxcfHW1/Hjx/PqZIBAAAAALgnDrtGvkCBAnJ2dtaZM2ds2s+cOaPAwEC77wkMDEyzf4ECBeTi4qIKFSrY9Clfvrx+/PHHVGtxd3eXu7t7ZlYDAAAAAIAc5bAz8m5ubqpevbrWrl1rbUtKStLatWtVu3Ztu++pXbu2TX9JWr16tbW/m5ubHnvsMe3fv9+mz4EDB1SyZMksXgMAAAAAAHKeQ+9a37dvX0VHR6tGjRqqWbOmJk2apKtXr1rvYt+lSxcVLVpUo0ePliT17t1bYWFhmjBhgqKiovTFF19o27ZtmjVrlnWe/fv3V4cOHVS/fn2Fh4dr5cqV+r//+z+tX7/eEasIAAAAAECWcmiQ79Chg86dO6ehQ4cqLi5OVatW1cqVK603tDt27JicnP43aKBOnTpasGCBhgwZosGDB6tMmTJasmSJ9RnyktS6dWvNnDlTo0eP1muvvaayZctq0aJFqlevXo6vHwAAAAAAWc2hQV6SevXqpV69etmdZu8sert27dSuXbs059m9e3d17949K8oDAAAAACBXeaDuWg8AAAAAwIMuU0H+r7/+yuo6AAAAAABABmQqyJcuXVrh4eH69NNPdf369ayuCQAAAAAApCJTQX7Hjh2qXLmy+vbtq8DAQPXo0UNbt27N6toAAAAAAMBdMhXkq1atqsmTJ+vUqVOaM2eOTp8+rXr16qlSpUqaOHGizp07l9V1AgAAAAAA3efN7lxcXPT0008rNjZWY8eO1Z9//ql+/fqpePHi6tKli06fPp1VdQIAAAAAAN1nkN+2bZteeeUVFS5cWBMnTlS/fv106NAhrV69WqdOnVLLli2zqk4AAAAAAKBMPkd+4sSJmjt3rvbv369mzZpp/vz5atasmZycbn8vEBwcrHnz5ikoKCgrawUAAAAA4KGXqSA/Y8YMde/eXV27dlXhwoXt9ilYsKBmz559X8UBAAAAAABbmQryBw8eTLePm5uboqOjMzN7AAAAAA+4oIHL0+1zZExUun1CPw5Nc/qe6D0Zrgkwi0wF+blz58rHx0ft2rWzaY+NjdW1a9cI8AAAAADu33D/9PsEl8j+OoBcJlM3uxs9erQKFCiQor1gwYIaNWrUfRcFAAAAAADsy1SQP3bsmIKDg1O0lyxZUseOHbvvogAAAAAAgH2ZCvIFCxbU7t27U7Tv2rVL+fPnv++iAAAAAACAfZm6Rr5jx4567bXX5Ovrq/r160uSNmzYoN69e+uZZ57J0gIBAEDO4aZRAADkfpkK8iNHjtSRI0fUqFEjubjcnkVSUpK6dOnCNfIAADhIeneAPuLxbPoz4aZRAADkepkK8m5ublq4cKFGjhypXbt2ydPTU6GhoSpZsmRW1wcAAAAAAO6QqSCf7JFHHtEjjzySVbUAAAAAAIB0ZCrIJyYmat68eVq7dq3Onj2rpKQkm+nff/99lhQHAAAAAABsZSrI9+7dW/PmzVNUVJQqVaoki8WS1XUBuAdZcV1saAaui+UmVwAAAIDjZSrIf/HFF/ryyy/VrFmzrK4HAAAAAACkIVPPkXdzc1Pp0qWzuhYAAAAAAJCOTAX5N954Q5MnT5ZhGFldDwAAAAAASEOmhtb/+OOPWrdunVasWKGKFSvK1dXVZvrixYuzpDgAAAAAAGArU0E+T548at26dVbXAgAAAAAA0pGpID937tysrgMAAAAAAGRApq6Rl6Rbt25pzZo1+vDDD/XPP/9Ikk6dOqUrV65kWXEAAAAAAMBWps7IHz16VJGRkTp27Jhu3LihJk2ayNfXV2PHjtWNGzc0c+bMrK4TAAAAAAAok2fke/furRo1aujixYvy9PS0trdu3Vpr167NsuIAAAAAAICtTJ2R37hxozZv3iw3Nzeb9qCgIJ08eTJLCgMAAAAAACll6ox8UlKSEhMTU7SfOHFCvr6+910UAAAAAACwL1NBvmnTppo0aZL1Z4vFoitXrmjYsGFq1qxZVtUGAAAAAADukqmh9RMmTFBERIQqVKig69ev69lnn9XBgwdVoEABff7551ldIwAAAAAA+P8yFeSLFSumXbt26YsvvtDu3bt15coVxcTEqFOnTjY3vwMAAAAAAFkrU0FeklxcXNS5c+esrAUAAAAwp+H+aU4ODS6R7iz2RO/JqmoAPOAyFeTnz5+f5vQuXbpkqhgAAAAgtwkauDzdPkc8cqAQAPj/MhXke/fubfPzzZs3de3aNbm5ucnLy4sgDwAAHh7pnImVOBsLAMhamQryFy9eTNF28OBBvfzyy+rfv/99FwUAAJBbpHc2ljOxAICclqnHz9lTpkwZjRkzJsXZegAAAAAAkHWyLMhLt2+Ad+rUqaycJQAAAAAAuEOmhtYvXbrU5mfDMHT69GlNnTpVdevWzZLCAAAAAABASpkK8q1atbL52WKxKCAgQA0bNtSECROyoi4AAAAAAGBHpoJ8UlJSVtcBAAAAAAAyIEuvkQcAAAAAANkrU2fk+/btm+G+EydOzMwiAAAAAACAHZkK8jt37tTOnTt18+ZNlS1bVpJ04MABOTs7q1q1atZ+Fosla6oEAAAAAACSMhnkW7RoIV9fX3388cfKmzevJOnixYvq1q2bnnjiCb3xxhtZWiQAAAAAALgtU9fIT5gwQaNHj7aGeEnKmzev3nnnHe5aDwAAAABANspUkL98+bLOnTuXov3cuXP6559/7rsoAAAAAABgX6aCfOvWrdWtWzctXrxYJ06c0IkTJ7Ro0SLFxMTo6aefzuoaAQAAAADA/5epa+Rnzpypfv366dlnn9XNmzdvz8jFRTExMRo/fnyWFggAAAAAAP4nU0Hey8tL06dP1/jx43Xo0CFJUkhIiLy9vbO0OAAAAAAAYCtTQ+uTnT59WqdPn1aZMmXk7e0twzCyqi4AAAAAAGBHpoL833//rUaNGumRRx5Rs2bNdPr0aUlSTEwMj54DAAAAACAbZSrIv/7663J1ddWxY8fk5eVlbe/QoYNWrlyZZcUBAAAAAABbmbpG/rvvvtOqVatUrFgxm/YyZcro6NGjWVIYAAAAAABIKVNn5K9evWpzJj7ZhQsX5O7uft9FAQAAAAAA+zIV5J944gnNnz/f+rPFYlFSUpLGjRun8PDwLCsOAAAAAADYytTQ+nHjxqlRo0batm2bEhISNGDAAP3222+6cOGCNm3alNU1AgAAAACA/y9TZ+QrVaqkAwcOqF69emrZsqWuXr2qp59+Wjt37lRISEhW1wgAAAAAAP6/ez4jf/PmTUVGRmrmzJn6z3/+kx01AQAAAACAVNzzGXlXV1ft3r07O2oBAAAAAADpyNTQ+s6dO2v27NlZXQsAAAAAAEhHpm52d+vWLc2ZM0dr1qxR9erV5e3tbTN94sSJWVIcAAAAAACwdU9B/q+//lJQUJD27t2ratWqSZIOHDhg08disWRddQAAAAAAwMY9BfkyZcro9OnTWrdunSSpQ4cO+uCDD1SoUKFsKQ4AAAAAANi6p2vkDcOw+XnFihW6evVqlhYEAAAAAABSl6mb3SW7O9gDAAAAAIDsdU9B3mKxpLgGnmviAQAAAADIOfd0jbxhGOratavc3d0lSdevX9dLL72U4q71ixcvzroKAQAAAACA1T0F+ejoaJufO3funKXFAAAAAACAtN1TkJ87d2521QEAAAAAADLgvm52BwAAAAAAchZBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwERyRZCfNm2agoKC5OHhoVq1amnr1q1p9o+NjVW5cuXk4eGh0NBQffvtt6n2femll2SxWDRp0qQsrhoAAAAAgJzn8CC/cOFC9e3bV8OGDdOOHTtUpUoVRURE6OzZs3b7b968WR07dlRMTIx27typVq1aqVWrVtq7d2+Kvl9//bV++uknFSlSJLtXAwAAAACAHOHi6AImTpyoF154Qd26dZMkzZw5U8uXL9ecOXM0cODAFP0nT56syMhI9e/fX5I0cuRIrV69WlOnTtXMmTOt/U6ePKlXX31Vq1atUlRUVJo13LhxQzdu3LD+fPny5axYNUChH4emOX1P9J4cqgQAAADAg8KhQT4hIUHbt2/XoEGDrG1OTk5q3LixtmzZYvc9W7ZsUd++fW3aIiIitGTJEuvPSUlJeu6559S/f39VrFgx3TpGjx6tESNGZG4l8MAKGrg8zelHxqT9BREAAAAAZAeHDq0/f/68EhMTVahQIZv2QoUKKS4uzu574uLi0u0/duxYubi46LXXXstQHYMGDVJ8fLz1dfz48XtcEwAAAAAAcobDh9Znte3bt2vy5MnasWOHLBZLht7j7u4ud3f3bK4MD5zh/un3CS6R/XUAAAAAeKg4NMgXKFBAzs7OOnPmjE37mTNnFBgYaPc9gYGBafbfuHGjzp49qxIl/hegEhMT9cYbb2jSpEk6cuRI1q4EAAAA7ll6l7BJ0hGPZ9OcHpqBL8y5Hw2AB5FDh9a7ubmpevXqWrt2rbUtKSlJa9euVe3ate2+p3bt2jb9JWn16tXW/s8995x2796tX3/91foqUqSI+vfvr1WrVmXfygAAAAAAkAMcPrS+b9++io6OVo0aNVSzZk1NmjRJV69etd7FvkuXLipatKhGjx4tSerdu7fCwsI0YcIERUVF6YsvvtC2bds0a9YsSVL+/PmVP39+m2W4uroqMDBQZcuWzdmVAwAAAAAgizk8yHfo0EHnzp3T0KFDFRcXp6pVq2rlypXWG9odO3ZMTk7/GzhQp04dLViwQEOGDNHgwYNVpkwZLVmyRJUqVXLUKgAAAAAAkGMcHuQlqVevXurVq5fdaevXr0/R1q5dO7Vr1y7D8+e6eAAAAADAg8Kh18gDAAAAAIB7Q5AHAAAAAMBEcsXQegDA/cmKxzhpeHwWVQMAAIDsRJAHAEiSQj8OTbcPz2MGAABwPIbWAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARF0cXAAAAAADIOkEDl6c5/ciYqHTnEfpxaLp99kTvyXBNyFoEeQAAAAB4mAz3T79PcInsrwOZxtB6AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmAhBHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJEOQBAAAAADARgjwAAAAAACZCkAcAAAAAwEQI8gAAAAAAmIiLowsAAABADhrun870+JypAwCQaQR5AABSETRwebp9jng8m26f0OASaU7fE70nwzUBacnYPpsDhQAAshVBHgAAAFahH4em24cvnwDAsbhGHgAAAAAAEyHIAwAAAABgIgR5AAAAAABMhCAPAAAAAICJ5IogP23aNAUFBcnDw0O1atXS1q1b0+wfGxurcuXKycPDQ6Ghofr222+t027evKk333xToaGh8vb2VpEiRdSlSxedOnUqu1cDAAAAAIBs5/Agv3DhQvXt21fDhg3Tjh07VKVKFUVEROjs2bN2+2/evFkdO3ZUTEyMdu7cqVatWqlVq1bau3evJOnatWvasWOH3nrrLe3YsUOLFy/W/v379dRTT+XkagEAAAAAkC0cHuQnTpyoF154Qd26dVOFChU0c+ZMeXl5ac6cOXb7T548WZGRkerfv7/Kly+vkSNHqlq1apo6daokyd/fX6tXr1b79u1VtmxZPf7445o6daq2b9+uY8eO5eSqAQAAAACQ5Rwa5BMSErR9+3Y1btzY2ubk5KTGjRtry5Ytdt+zZcsWm/6SFBERkWp/SYqPj5fFYlGePHnsTr9x44YuX75s8wIAAAAAIDdyaJA/f/68EhMTVahQIZv2QoUKKS4uzu574uLi7qn/9evX9eabb6pjx47y8/Oz22f06NHy9/e3vooXL56JtQEAAAAAIPs5fGh9drp586bat28vwzA0Y8aMVPsNGjRI8fHx1tfx48dzsEoAAAAAADLOxZELL1CggJydnXXmzBmb9jNnzigwMNDuewIDAzPUPznEHz16VN9//32qZ+Mlyd3dXe7u7plcCwAAAAAAco5Dz8i7ubmpevXqWrt2rbUtKSlJa9euVe3ate2+p3bt2jb9JWn16tU2/ZND/MGDB7VmzRrlz58/e1YAAAAAAIAc5tAz8pLUt29fRUdHq0aNGqpZs6YmTZqkq1evqlu3bpKkLl26qGjRoho9erQkqXfv3goLC9OECRMUFRWlL774Qtu2bdOsWbMk3Q7xbdu21Y4dO7Rs2TIlJiZar5/Ply+f3NzcHLOiAAAAAABkAYcH+Q4dOujcuXMaOnSo4uLiVLVqVa1cudJ6Q7tjx47Jyel/Awfq1KmjBQsWaMiQIRo8eLDKlCmjJUuWqFKlSpKkkydPaunSpZKkqlWr2ixr3bp1atCgQY6sFwAAAAAA2cHhQV6SevXqpV69etmdtn79+hRt7dq1U7t27ez2DwoKkmEYWVkeAAAAAAC5xgN913oAAAAAAB40BHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEzExdEFAHi4BA1cnub0Ix7Ppj+T4fFZVA0AAABgPgR5AKYT+nFoun32RO/JgUoAAACAnMfQegAAAAAATIQgDwAAAACAiTC0HgAAAADSke59fsZEpTuP9C4P5NJAZBRBHgAAAADu13D/9PsEl8j+OvBQYGg9AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwEYI8AAAAAAAmQpAHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyAMAAAAAYCIEeQAAAAAATIQgDwAAAACAiRDkAQAAAAAwkVwR5KdNm6agoCB5eHioVq1a2rp1a5r9Y2NjVa5cOXl4eCg0NFTffvutzXTDMDR06FAVLlxYnp6eaty4sQ4ePJidqwAAAAAAQI5weJBfuHCh+vbtq2HDhmnHjh2qUqWKIiIidPbsWbv9N2/erI4dOyomJkY7d+5Uq1at1KpVK+3du9faZ9y4cfrggw80c+ZM/fzzz/L29lZERISuX7+eU6sFAAAAAEC2cHiQnzhxol544QV169ZNFSpU0MyZM+Xl5aU5c+bY7T958mRFRkaqf//+Kl++vEaOHKlq1app6tSpkm6fjZ80aZKGDBmili1bqnLlypo/f75OnTqlJUuW5OCaAQAAAACQ9VwcufCEhARt375dgwYNsrY5OTmpcePG2rJli933bNmyRX379rVpi4iIsIb0w4cPKy4uTo0bN7ZO9/f3V61atbRlyxY988wzKeZ548YN3bhxw/pzfHy8JOny5cuZXrecknTjWrp9LluMdPsk/puY9jyyYFuYqVYp/XoftlqlnNkPzFSrlH691JqSmY4FZqpV4rhldz4ct1Iu5wE6FlBrSmY6bpmpVoljrN355JL9IKdqzW7JNRpG+p+PDAc6efKkIcnYvHmzTXv//v2NmjVr2n2Pq6ursWDBApu2adOmGQULFjQMwzA2bdpkSDJOnTpl06ddu3ZG+/bt7c5z2LBhhiRevHjx4sWLFy9evHjx4sXLoa/jx4+nm6UdekY+txg0aJDNWf6kpCRduHBB+fPnl8VicWBlWe/y5csqXry4jh8/Lj8/P0eXkyYz1SqZq15qzR7Umj2oNXuYqVbJXPVSa/ag1uxBrdnHTPVSa+5gGIb++ecfFSlSJN2+Dg3yBQoUkLOzs86cOWPTfubMGQUGBtp9T2BgYJr9k/975swZFS5c2KZP1apV7c7T3d1d7u7uNm158uS5l1UxHT8/P9Ps+GaqVTJXvdSaPag1e1Br9jBTrZK56qXW7EGt2YNas4+Z6qVWx/P3989QP4fe7M7NzU3Vq1fX2rVrrW1JSUlau3atateubfc9tWvXtukvSatXr7b2Dw4OVmBgoE2fy5cv6+eff051ngAAAAAAmIXDh9b37dtX0dHRqlGjhmrWrKlJkybp6tWr6tatmySpS5cuKlq0qEaPHi1J6t27t8LCwjRhwgRFRUXpiy++0LZt2zRr1ixJksViUZ8+ffTOO++oTJkyCg4O1ltvvaUiRYqoVatWjlpNAAAAAACyhMODfIcOHXTu3DkNHTpUcXFxqlq1qlauXKlChQpJko4dOyYnp/8NHKhTp44WLFigIUOGaPDgwSpTpoyWLFmiSpUqWfsMGDBAV69e1YsvvqhLly6pXr16WrlypTw8PHJ8/XIbd3d3DRs2LMWlBLmRmWqVzFUvtWYPas0e1Jo9zFSrZK56qTV7UGv2oNbsY6Z6qdV8LIaRkXvbAwAAAACA3MCh18gDAAAAAIB7Q5AHAAAAAMBECPIAAAAAAJgIQR4AAAAAABMhyD9ktmzZImdnZ0VFRTm6lFR17dpVFotFFotFrq6uCg4O1oABA3T9+nVHl5ZCcq1jxoyxaV+yZIksFouDqrLv7u1aqFAhNWnSRHPmzFFSUpKjy7MrLi5Or776qkqVKiV3d3cVL15cLVq00Nq1ax1dmo07t+2dr8jISEeXZnpxcXHq3bu3SpcuLQ8PDxUqVEh169bVjBkzdO3aNUeXZ9W1a1e7jzhdv369LBaLLl26lOM1ZURqdecmZqgxmb1av/rqK3l4eGjChAmOKeouycerl156KcW0nj17ymKxqGvXrjlfWBpSO8b++eefji4thTtrdXNzU+nSpfX222/r1q1bji7NrnPnzunll19WiRIl5O7ursDAQEVERGjTpk2OLs3K3md/52v48OGOLlEzZ86Ur6+vzed85coVubq6qkGDBjZ9k/9dOHToUA5XacswDDVu3FgREREppk2fPl158uTRiRMnHFBZSi1atEj1b6qNGzfKYrFo9+7dOVyV4xHkHzKzZ8/Wq6++qh9++EGnTp1ydDmpioyM1OnTp/XXX3/p/fff14cffqhhw4Y5uiy7PDw8NHbsWF28eNHRpaQrebseOXJEK1asUHh4uHr37q3mzZvnuj8yjhw5ourVq+v777/X+PHjtWfPHq1cuVLh4eHq2bOno8tLIXnb3vn6/PPPHV1WCsePH1f37t1VpEgRubm5qWTJkurdu7f+/vtvR5eWwl9//aVHH31U3333nUaNGqWdO3dqy5YtGjBggJYtW6Y1a9Y4ukQgTR999JE6deqkGTNm6I033nB0OVbFixfXF198oX///dfadv36dS1YsEAlSpRwYGWps3eMDQ4OdnRZdiXXevDgQb3xxhsaPny4xo8f7+iy7GrTpo127typjz/+WAcOHNDSpUvVoEGDXPVvwp2f+aRJk+Tn52fT1q9fP0eXqPDwcF25ckXbtm2ztm3cuFGBgYH6+eefbU5GrVu3TiVKlFBISIgjSrWyWCyaO3eufv75Z3344YfW9sOHD2vAgAGaMmWKihUr5sAK/ycmJkarV6+2+8XC3LlzVaNGDVWuXNkBlTmWw58jj5xz5coVLVy4UNu2bVNcXJzmzZunwYMHO7osu5K/FZZu/8HRuHFjrV69WmPHjnVwZSk1btxYf/75p0aPHq1x48Y5upw03bldixYtqmrVqunxxx9Xo0aNNG/ePD3//PMOrvB/XnnlFVksFm3dulXe3t7W9ooVK6p79+4OrMy+O7dtbvXXX3+pdu3aeuSRR/T5558rODhYv/32m/r3768VK1bop59+Ur58+RxdptUrr7wiFxcXbdu2zWYfKFWqlFq2bCmenorcbNy4cRo2bJi++OILtW7d2tHl2KhWrZoOHTqkxYsXq1OnTpKkxYsXq0SJErk2HJvhGJvszlpffvllff3111q6dKkGDRrk4MpsXbp0SRs3btT69esVFhYmSSpZsqRq1qzp4Mps3fm5+/v7y2Kx5Lp9oWzZsipcuLDWr1+vxx9/XNLtM+8tW7bU999/r59++sl6Zn79+vUKDw93YLX/U7x4cU2ePFm9evVS06ZNFRQUpJiYGDVt2lTPPfeco8uzat68uQICAjRv3jwNGTLE2n7lyhXFxsbm2i/Kshtn5B8iX375pcqVK6eyZcuqc+fOmjNnjin+EN67d682b94sNzc3R5dil7Ozs0aNGqUpU6bkmiFI96Jhw4aqUqWKFi9e7OhSrC5cuKCVK1eqZ8+eNgEuWZ48eXK+qAdAz5495ebmpu+++05hYWEqUaKEnnzySa1Zs0YnT57Uf/7zH0eXaPX333/ru+++S3UfkJTrLl8Bkr355psaOXKkli1blutCfLLu3btr7ty51p/nzJmjbt26ObCiB5enp6cSEhIcXUYKPj4+8vHx0ZIlS3Tjxg1Hl2N64eHhWrdunfXndevWqUGDBgoLC7O2//vvv/r5559zTZCXpOjoaDVq1Ejdu3fX1KlTtXfvXpsz9LmBi4uLunTponnz5tlkl9jYWCUmJqpjx44OrM5xCPIPkdmzZ6tz586Sbg/7io+P14YNGxxclX3Lli2Tj4+PPDw8FBoaqrNnz6p///6OLitVrVu3VtWqVXPt8P/0lCtXTkeOHHF0GVZ//vmnDMNQuXLlHF1KhiXvs3e+Ro0a5eiyrC5cuKBVq1bplVdekaenp820wMBAderUSQsXLsw1X+4l7wNly5a1aS9QoIB1+7755psOqs4+e/vAk08+6eiykMNWrFihcePG6ZtvvlGjRo0cXU6qOnfurB9//FFHjx7V0aNHtWnTJuvfCLnR3b9f7dq1c3RJ6TIMQ2vWrNGqVavUsGFDR5eTgouLi+bNm6ePP/5YefLkUd26dTV48OCH8lrjrBAeHq5Nmzbp1q1b+ueff7Rz506FhYWpfv36Wr9+vaTb96q6ceNGrgrykjRr1izt3btXffr00axZsxQQEODoklLo3r27Dh06ZJNd5s6dqzZt2sjf39+BlTkOQ+sfEvv379fWrVv19ddfS7p98O7QoYNmz56d4iYcuUF4eLhmzJihq1ev6v3335eLi4vatGnj6LLSNHbsWDVs2DBXXKt1rwzDyFVnN3NLmLwXyfvsnXLTMPWDBw/KMAyVL1/e7vTy5cvr4sWLOnfunAoWLJjD1WXc1q1blZSUpE6dOuW6M0j29oGff/45V4cjZL3KlSvr/PnzGjZsmGrWrCkfHx9Hl2RXQECAoqKirGe4oqKiVKBAAUeXlaq7f79SG6mTGyR/6XDz5k0lJSXp2WefzRU3ZLOnTZs2ioqK0saNG/XTTz9Zv4j66KOPct1ND3O7Bg0a6OrVq/rll1908eJFPfLIIwoICFBYWJi6deum69eva/369SpVqlSuuxdFwYIF1aNHDy1ZsiTX3ly0XLlyqlOnjubMmaMGDRrozz//1MaNG/X22287ujSHIcg/JGbPnq1bt26pSJEi1jbDMOTu7q6pU6fmum+yvL29Vbp0aUm3h/tVqVJFs2fPVkxMjIMrS139+vUVERGhQYMGme4fv3379uWq6yLLlCkji8WiP/74w9GlZNid+2xult6XJLnlEpbSpUvLYrFo//79Nu2lSpWSpBSjCnIDe/uAGS+3wf0pWrSovvrqK4WHhysyMlIrVqyQr6+vo8uyq3v37urVq5ckadq0aQ6uJm1mOcZK//vSwc3NTUWKFJGLS+7+c9vDw0NNmjRRkyZN9NZbb+n555/XsGHDTPe3jKOVLl1axYoV07p163Tx4kXrfQeKFCmi4sWLa/PmzVq3bl2uHJ0h3T7Jl9v31ZiYGL366quaNm2a5s6dq5CQEOt2fhgxtP4hcOvWLc2fP18TJkzQr7/+an3t2rVLRYoUyZV31r6Tk5OTBg8erCFDhtjcYTc3GjNmjP7v//5PW7ZscXQpGfb9999rz549uWrEQ758+RQREaFp06bp6tWrKabn1kd55WbJwXjfvn12p+/bt08BAQG55v4D+fPnV5MmTTR16lS7+wCQm5UsWVIbNmxQXFycIiMj9c8//zi6JLsiIyOVkJCgmzdv2n0EFTIn+UuHEiVK5PpgZE+FChU47mZSeHi41q9fr/Xr19uMeK1fv75WrFihrVu35rph9WbSvn17OTk5acGCBZo/f766d++eq0aU5jSC/ENg2bJlunjxomJiYlSpUiWbV5s2bTR79mxHl5iudu3aydnZOdefMQgNDVWnTp30wQcfOLoUu27cuKG4uDidPHlSO3bs0KhRo9SyZUs1b95cXbp0cXR5NqZNm6bExETVrFlTixYt0sGDB7Vv3z598MEHql27tqPLSyF52975On/+vKPLskoOxtOnT0/xhVhcXJw+++yzXHf2Zfr06bp165Zq1KihhQsXat++fdq/f78+/fRT/fHHH3J2dnZ0icgh8fHxNl9E//rrrzp+/Lijy0pT8eLFtX79ep09e1YRERG6fPmyo0tKwdnZWfv27dPvv//O79ND6O+//1bDhg316aefavfu3Tp8+LBiY2M1btw4tWzZ0tHlmVJ4eLh+/PFH/frrrzZnisPCwvThhx8qISGBIH8ffHx81KFDBw0aNEinT5/OdX+35DSC/ENg9uzZaty4sd3h823atNG2bdty/Y1NXFxc1KtXL40bNy7Xf0v89ttvKykpydFl2LVy5UoVLlxYQUFBioyM1Lp16/TBBx/om2++yXV/xJUqVUo7duxQeHi43njjDVWqVElNmjTR2rVrU1yHnBskb9s7X/Xq1XN0WTamTp2qGzduKCIiQj/88IOOHz+ulStXqkmTJnrkkUc0dOhQR5doIyQkRDt37lTjxo01aNAgValSRTVq1NCUKVPUr18/jRw50tElIoesX79ejz76qM1rxIgRji4rXcWKFdP69et1/vz5XBvm/fz85Ofn5+gy4AA+Pj6qVauW3n//fdWvX1+VKlXSW2+9pRdeeEFTp051dHmmFB4ern///VelS5dWoUKFrO1hYWH6559/rI+pQ+bFxMTo4sWLioiIsLlk+GFkMcx4VykAQKYcOXJEw4cP18qVK3X27FkZhqGnn35an3zyiby8vBxdHgAAADKAIA8AD7Fhw4Zp4sSJWr16tR5//HFHlwMAAIAMIMgDwENu7ty5io+P12uvvSYnJ664AgAAyO0I8gAAAAAAmAinXgAAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAIAUGjRooD59+ji6jHTNmzdPefLkSbPP8OHDVbVq1RypBwCAnECQBwDA5LZs2SJnZ2dFRUVl2TwXL16skSNHZtn80hMRESFnZ2f98ssv9/S+Dh066MCBA9lUFQAAuRNBHgAAk5s9e7ZeffVV/fDDDzp16lSWzDNfvnzy9fXNknml59ixY9q8ebN69eqlOXPm3NN7PT09VbBgwWyqDACA3IkgDwCAiV25ckULFy7Uyy+/rKioKM2bN89m+vr162WxWLRq1So9+uij8vT0VMOGDXX27FmtWLFC5cuXl5+fn5599lldu3bN+r67h9YHBQVp1KhR6t69u3x9fVWiRAnNmjXLZll79uxRw4YN5enpqfz58+vFF1/UlStX0l2HuXPnqnnz5nr55Zf1+eef699//7WZfunSJfXo0UOFChWSh4eHKlWqpGXLlkmyP7R+zJgxKlSokHx9fRUTE6Pr169nYEsCAGAeBHkAAEzsyy+/VLly5VS2bFl17txZc+bMkWEYKfoNHz5cU6dO1ebNm3X8+HG1b99ekyZN0oIFC7R8+XJ99913mjJlSprLmjBhgmrUqKGdO3fqlVde0csvv6z9+/dLkq5evaqIiAjlzZtXv/zyi2JjY7VmzRr16tUrzXkahqG5c+eqc+fOKleunEqXLq2vvvrKOj0pKUlPPvmkNm3apE8//VS///67xowZI2dn51S3x/DhwzVq1Cht27ZNhQsX1vTp09PbjAAAmApBHgAAE5s9e7Y6d+4sSYqMjFR8fLw2bNiQot8777yjunXr6tFHH1VMTIw2bNigGTNm6NFHH9UTTzyhtm3bat26dWkuq1mzZnrllVdUunRpvfnmmypQoID1PQsWLND169c1f/58VapUSQ0bNtTUqVP1ySef6MyZM6nOc82aNbp27ZoiIiIkSZ07d9bs2bNtpm/dulWLFy9WkyZNVKpUKTVv3lxPPvmk3flNmjRJMTExiomJUdmyZfXOO++oQoUKaW9EAABMhiAPAIBJ7d+/X1u3blXHjh0lSS4uLurQoYNNEE5WuXJl6/8XKlRIXl5eKlWqlE3b2bNn01zenfOwWCwKDAy0vmffvn2qUqWKvL29rX3q1q2rpKQk61l7e+bMmaMOHTrIxcVFktSxY0dt2rRJhw4dkiT9+uuvKlasmB555JE0a0u2b98+1apVy6atdu3aGXovAABmQZAHAMCkZs+erVu3bqlIkSJycXGRi4uLZsyYoUWLFik+Pt6mr6urq/X/LRaLzc/JbUlJSWkuLzPvScuFCxf09ddfa/r06db6ixYtqlu3bllveufp6Znp+QMA8KAiyAMAYEK3bt3S/PnzNWHCBP3666/W165du1SkSBF9/vnnOVpP+fLltWvXLl29etXatmnTJjk5Oals2bJ23/PZZ5+pWLFi2rVrl806TJgwQfPmzVNiYqIqV66sEydOZPgRc+XLl9fPP/9s0/bTTz9lfsUAAMiFCPIAAJjQsmXLdPHiRcXExKhSpUo2rzZt2tgdXp+dOnXqJA8PD0VHR2vv3r1at26dXn31VT333HMqVKiQ3ffMnj1bbdu2TVF/TEyMzp8/r5UrVyosLEz169dXmzZttHr1ah0+fFgrVqzQypUr7c6zd+/emjNnjubOnasDBw5o2LBh+u2337Jz1QEAyHEEeQAATGj27Nlq3Lix/P39U0xr06aNtm3bpt27d+dYPV5eXlq1apUuXLigxx57TG3btlWjRo00depUu/23b9+uXbt2qU2bNimm+fv7q1GjRtYvIxYtWqTHHntMHTt2VIUKFTRgwAAlJibanW+HDh301ltvacCAAapevbqOHj2ql19+OetWFACAXMBi2HtGDQAAAAAAyJU4Iw8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJkKQBwAAAADARAjyAAAAAACYCEEeAAAAAAATIcgDAAAAAGAiBHkAAAAAAEyEIA8AAAAAgIkQ5AEAAAAAMBGCPAAAAAAAJvL/AEDgNCp53Pm5AAAAAElFTkSuQmCC",
64
+ "text/plain": [
65
+ "<Figure size 1200x600 with 1 Axes>"
66
+ ]
67
+ },
68
+ "metadata": {},
69
+ "output_type": "display_data"
70
+ }
71
+ ],
72
+ "source": [
73
+ "amino_acid_frequencies = {}\n",
74
+ "\n",
75
+ "datasets = {'Train': train, 'Test': test, 'Val': val}\n",
76
+ "\n",
77
+ "\n",
78
+ "for name, df in datasets.items():\n",
79
+ " # Count total occurrences of each amino acid in each dataset\n",
80
+ " amino_acid_frequencies[name] = df[valid_residues].sum() / df['Length'].sum()\n",
81
+ "\n",
82
+ "# Convert frequencies to a dataframe for easier manipulation\n",
83
+ "freq_df = pd.DataFrame(amino_acid_frequencies)\n",
84
+ "\n",
85
+ "# Plot the histogram comparing amino acid frequencies\n",
86
+ "plt.figure(figsize=(12, 6))\n",
87
+ "\n",
88
+ "# Set bar width\n",
89
+ "bar_width = 0.2\n",
90
+ "\n",
91
+ "# Generate positions for the bars\n",
92
+ "amino_acids = list(valid_residues)\n",
93
+ "x = np.arange(len(amino_acids)) # positions for the amino acids\n",
94
+ "\n",
95
+ "# Plot the bars for each dataset with an offset\n",
96
+ "plt.bar(x - bar_width, freq_df['Train'], width=bar_width, label='Train', align='center')\n",
97
+ "plt.bar(x, freq_df['Test'], width=bar_width, label='Test', align='center')\n",
98
+ "plt.bar(x + bar_width, freq_df['Val'], width=bar_width, label='Val', align='center')\n",
99
+ "\n",
100
+ "plt.xticks(x, amino_acids)\n",
101
+ "\n",
102
+ "plt.title('UniRef50 (200k Sequences) - Train, Test, and Val Residue Frequencies')\n",
103
+ "plt.xlabel('Amino Acid')\n",
104
+ "plt.ylabel('Frequency')\n",
105
+ "\n",
106
+ "plt.legend()\n",
107
+ "plt.show()"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 6,
113
+ "metadata": {},
114
+ "outputs": [
115
+ {
116
+ "data": {
117
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAIjCAYAAABViau2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABzUklEQVR4nO3de3zP9f//8ft7Zpuxg9NOYWbkfD41csphWHIsoVhWUpQz+SSRiggpISXrQGUlidDMmSWnOeWUJjkMkc3Ixvb6/dF3r5+3Ddtse8/b7Xq5vC8X7+fr8X4+H6/3Xnvb4/18vl4vi2EYhgAAAAAAgN1ysHUCAAAAAAAgd1H8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDyDdCQ0NVtmzZXB/n+vXrGjlypEqXLi0HBwd16tQp18e8WWpqqqpVq6a33norz8e+lbJly+rRRx+1dRq4jd9++02Ojo7at2+frVPJlnHjxslisdg6jftW8+bN1bx58zwfN68+22GfLBaLBg4caOs0ALtA8Q8gS9L+eP/7778z3F6tWrUc/ePSYrFYPdzd3dWsWTMtX748231++umnmjJlirp166bPPvtMQ4YMkfRf8XvzeBaLRf3790/Xx8WLF9WvXz+VLFlShQsXVosWLbRz585M5/DVV1/pr7/+svqDZtu2bRo4cKCqVq2qwoULq0yZMnriiSd0+PDhDPs4cOCA2rZtqyJFiqhYsWJ6+umnde7cuXRxqampmjx5sgICAuTi4qIaNWroq6++ynSumZGamqrPP/9cDRs2VLFixeTm5qYHH3xQvXv31i+//JKjY93PqlSpopCQEI0dOzZH+73VsX/zIzw8PEfHzWnh4eGZ2o+cKkS3bNmicePG6eLFiznSn63t3LlTFotFY8aMuWXMkSNHZLFYNHTo0Bwfn8+R3BMaGqoiRYrYOo1bsrffJSC/crR1AgCQ5uOPP1Zqamq69tatW6t3794yDEN//vmnZs+erQ4dOmjFihUKDg7O8jhr1qzRAw88oOnTp6fbVqtWLQ0bNsyq7cEHH7R6npqaqpCQEO3evVsjRoxQiRIlNGvWLDVv3lw7duxQhQoV7pjDlClT9OSTT8rDw8Nse+edd7R582Y9/vjjqlGjhuLi4jRz5kzVqVNHv/zyi6pVq2bGnjhxQk2bNpWHh4fefvttJSYm6t1339XevXv166+/ysnJyYx99dVXNWnSJD333HOqX7++fvjhB/Xs2VMWi0VPPvlkpt+323n55Zf14YcfqmPHjurVq5ccHR116NAhrVixQuXKldNDDz2UI+NA6t+/v9q3b6+jR48qMDAwR/p87733lJiYaD7/6aef9NVXX2n69OkqUaKE2d6oUaO7GmfMmDF65ZVX7qqP22natKm++OILq7Znn31WDRo0UL9+/cy2nCqCtmzZovHjxys0NFSenp450qct1alTR5UqVdJXX32lN998M8OYhQsXSpKeeuqpHB+fz5H7l739LgH5lgEAWfD6668bkoxz585luL1q1apGs2bNcmw8ScaAAQOs2n777TdDktGuXbts9dmiRQujatWq6dr9/f2NkJCQO77+m2++MSQZERERZtvZs2cNT09Po0ePHnd8/c6dOw1JxurVq63aN2/ebCQlJVm1HT582HB2djZ69epl1f7CCy8YhQoVMv7880+zLTIy0pBkfPTRR2bbiRMnjIIFC1q9h6mpqUaTJk2MUqVKGdevXzfbM7v/N4uLizMsFovx3HPPpduWmppqnDlzJst94taSk5ONokWLGq+99lqujTFlyhRDkhEbG3vbuMTExFzLIacULlzY6NOnT670ndn3Kb9o1qzZHT+fJ0yYYEgyoqOjM9xesWJFo1KlSlkat0+fPoa/v/9tY/gcyV19+vQxChcubOs0bul2v0sZ/R0AIHtY9g8gV61bt04Wi0WLFi3SW2+9pVKlSsnFxUUtW7bU77//bhWb2fNCK1eurBIlSujo0aNW7UlJSXr99ddVvnx5OTs7q3Tp0ho5cqSSkpIkSceOHZPFYtHatWu1f/9+cwnwunXrrPpJTk7W5cuXbzn+t99+K29vb3Xp0sVsK1mypJ544gn98MMP5ni3smTJEjk5Oalp06ZW7Y0aNbKasZekChUqqGrVqjpw4IBV+3fffadHH31UZcqUMdtatWqlBx98UIsWLTLbfvjhB127dk0vvvii2WaxWPTCCy/oxIkTio6Ovm2un332mRwdHTVixIhbxsTGxsowDDVu3DjdNovFIi8vL6u2ixcvavDgwSpdurScnZ1Vvnx5vfPOO+lWfVy8eFGhoaHy8PCQp6en+vTpo5iYmHTLz291HnNGx1Nqaqree+89Va1aVS4uLvL29tbzzz+vf/75xyou7foHmzZtUoMGDeTi4qJy5crp888/TzfOxYsXNWTIEJUtW1bOzs4qVaqUevfubXVqzJ2OzTSRkZF6+OGH5enpqSJFiqhixYr63//+ZxVTsGBBNW/eXD/88EO6XHJT2rLho0ePqn379nJzc1OvXr0kSRs3btTjjz+uMmXKmPs3ZMgQ/fvvv1Z9ZHTOf9r5vEuWLFG1atXk7OysqlWrauXKlbm2LydPnlTfvn3l7e1tjvfpp5+mi/vggw9UtWpVubq6qmjRoqpXr5458z1u3Djz9yIgIMD8PDl27FiWcrlw4YKGDx+u6tWrq0iRInJ3d1e7du20e/duq7isfJZK0ty5cxUYGKhChQqpQYMG2rhxY6bySfuZpu3njXbs2KFDhw6ZMT/88INCQkLk5+cnZ2dnBQYGasKECUpJScnSeyDxOZKTnyN3Y+vWrWrbtq08PDzk6uqqZs2aafPmzVYxab/Hv//+uzlT7+HhoWeeeUZXrlyxiv3333/18ssvq0SJEnJzc9Njjz2mkydPymKxaNy4cWZ/mfldutNnxKVLlzR48GDzPfTy8lLr1q2zdEoeYO9Y9g8gT0yaNEkODg4aPny44uPjNXnyZPXq1Utbt27Ncl/x8fH6559/rJY8p6am6rHHHtOmTZvUr18/Va5cWXv37tX06dN1+PBhLVmyRCVLltQXX3yht956S4mJiZo4caKk/75MSLNmzRq5uroqJSVF/v7+GjJkiAYNGmQ1/q5du1SnTh05OFh/f9qgQQPNnTtXhw8fVvXq1W+Z/5YtW1StWjUVLFjwjvtqGIbOnDmjqlWrmm0nT57U2bNnVa9evXTxDRo00E8//WSVa+HCha32MS0ubfvDDz+c4dhz585V//799b///e+WS4Alyd/fX5IUERGhxx9/XK6urreMvXLlipo1a6aTJ0/q+eefV5kyZbRlyxaNHj1ap0+f1nvvvWfud8eOHbVp0yb1799flStX1vfff68+ffrcsu/MeP755xUeHq5nnnlGL7/8smJjYzVz5kzt2rVLmzdvtvqZ/P777+rWrZvCwsLUp08fffrppwoNDVXdunXNn0diYqKaNGmiAwcOqG/fvqpTp47+/vtvLV26VCdOnFCJEiUydWxK0v79+/Xoo4+qRo0aeuONN+Ts7Kzff/893R/eklS3bl398MMPSkhIkLu7+129J1lx/fp1BQcH6+GHH9a7775r/qwjIiJ05coVvfDCCypevLh+/fVXffDBBzpx4oQiIiLu2O+mTZu0ePFivfjii3Jzc9P777+vrl276vjx4ypevHiO7sOZM2f00EMPmV86lCxZUitWrFBYWJgSEhI0ePBgSf+dhvTyyy+rW7duGjRokK5evao9e/Zo69at6tmzp7p06aLDhw+nOz2iZMmSWcrnjz/+0JIlS/T4448rICBAZ86c0UcffaRmzZrpt99+k5+fn1V8Zj5L582bp+eff16NGjXS4MGD9ccff+ixxx5TsWLFVLp06dvmExAQoEaNGmnRokWaPn26ChQoYG5L+0KgZ8+ekv67xkKRIkU0dOhQFSlSRGvWrNHYsWOVkJCgKVOmZOl94HMkZz5H7saaNWvUrl071a1bV6+//rocHBw0f/58PfLII9q4caP5/0aaJ554QgEBAZo4caJ27typTz75RF5eXnrnnXfMmNDQUC1atEhPP/20HnroIa1fv14hISFW/WTmdykznxH9+/fXt99+q4EDB6pKlSo6f/68Nm3apAMHDqhOnTp3/f4AdsGm6w4A3HOyuux/7dq1hiSjcuXKVkvaZ8yYYUgy9u7da7ZltDRUkhEWFmacO3fOOHv2rLF9+3ajbdu2hiRjypQpZtwXX3xhODg4GBs3brR6/Zw5cwxJxubNm822Zs2aZbjsv0OHDsY777xjLFmyxJg3b57RpEkTQ5IxcuRIq7jChQsbffv2Tff65cuXG5KMlStXZvjepClVqpTRtWvX28bcuF+SjHnz5plt27ZtMyQZn3/+ebr4ESNGGJKMq1evGoZhGCEhIUa5cuXSxV2+fNmQZLzyyitm243L/mfMmGFYLBZjwoQJmcqzd+/ehiSjaNGiRufOnY13333XOHDgQLq4CRMmGIULFzYOHz5s1f7KK68YBQoUMI4fP24YhmEsWbLEkGRMnjzZjLl+/br5M5k/f77ZfqulzDcfTxs3bjQkGQsWLLCKW7lyZbp2f39/Q5KxYcMGs+3s2bOGs7OzMWzYMLNt7NixhiRj8eLF6cZPTU01DCPzx+b06dNv+7t1o4ULFxqSjK1bt94xNjsyWoLbp0+fdMdMmitXrqRrmzhxomGxWKxOTUn7/LiRJMPJycn4/fffzbbdu3cbkowPPvjgrvfl5mX/YWFhhq+vr/H3339bxT355JOGh4eHuS8dO3bM8HPiRjmx7P/q1atGSkqKVVtsbKzh7OxsvPHGG2ZbZj9Lk5OTDS8vL6NWrVpWcXPnzjUkZeq0rA8//NCQZKxatcpsS0lJMR544AEjKCjIbMvo5/78888brq6u5meQYWRu2b9h8DmSE58jt3KnZf+pqalGhQoVjODgYHNMw/jvZxwQEGC0bt3abEv7Pb75/8HOnTsbxYsXN5/v2LHDkGQMHjzYKi40NNSQZLz++utm252W/WfmM8LDw4PTA4A7YNk/gDzxzDPPWC1pb9KkiaT/Zr3uZN68eSpZsqS8vLxUr149RUVFaeTIkVZXm46IiFDlypVVqVIl/f333+bjkUcekSStXbv2juMsXbpUI0eOVMeOHdW3b1+tX79ewcHBmjZtmk6cOGHG/fvvv3J2dk73ehcXF3P77Zw/f15Fixa9Yz4HDx7UgAEDFBQUZDVTldZ/ZnLITq6TJ0/WoEGD9M4779z2qt83mj9/vmbOnKmAgAB9//33Gj58uCpXrqyWLVvq5MmTZlxERISaNGmiokWLWv2cWrVqpZSUFG3YsEHSfxecc3R01AsvvGC+tkCBAnrppZcylU9GIiIi5OHhodatW1uNXbduXRUpUiTdMVKlShXzOJX+m4WqWLGi1TH73XffqWbNmurcuXO68dKWt2f22Ey7yNUPP/yQ4YUvb5R2/Nzqrhu56cafSZpChQqZ/758+bL+/vtvNWrUSIZhaNeuXXfss1WrVlYreWrUqCF3d/dMfT5khWEY+u6779ShQwcZhmH18wgODlZ8fLy5RNjT01MnTpzQtm3bcjSHmzk7O5uriFJSUnT+/HnzlI+Mlivf6bN0+/btOnv2rPr3728Vl7b0PTO6d++uggULWi39X79+vU6ePGku+Zesf+6XLl3S33//rSZNmujKlSs6ePBgpsa6EZ8jd/85kl0xMTE6cuSIevbsqfPnz5v9X758WS1bttSGDRvSfS7dfCecJk2a6Pz580pISJAkc1n+jaedScrW+5+ZzwhPT09t3bpVp06dynL/wP2CZf8AclxG9/G+8dx06f8XLzefI5mRjh07auDAgUpOTta2bdv09ttv68qVK1bL7o8cOaIDBw7ccsnt2bNns7ILkv7bjyFDhmjVqlVat26deXXrQoUKZXiO5dWrV83td2IYxm23x8XFKSQkRB4eHvr222+tlt6m9Z+ZHLKa6/r167V8+XKNGjXqtuf538zBwUEDBgzQgAEDdP78eW3evFlz5szRihUr9OSTT5rnGx85ckR79uy548/pzz//lK+vb7qrslesWDHTOd3syJEjio+PT3fu8M1jp7n5mJX+O25vPGaPHj2qrl273nHczByb3bt31yeffKJnn31Wr7zyilq2bKkuXbqoW7du6U4xSTt+MvpdS5OcnKwLFy5YtZUsWdLqWMoqR0dHlSpVKl378ePHNXbsWC1dujTd73R8fPwd+83Me50Tzp07p4sXL2ru3LmaO3duhjFpP49Ro0Zp9erVatCggcqXL682bdqoZ8+eGZ6TfjdSU1M1Y8YMzZo1S7GxsVbny2d0ysOdPkv//PNPSUp315GCBQuqXLlymcqpePHiCg4O1vfff685c+bIxcVFCxculKOjo5544gkzbv/+/RozZozWrFljFnxpMvNzvxmfI7cfN6f/j7u5f0m3PSUiPj7e6ovr2x2L7u7u+vPPP+Xg4KCAgACruPLly2c5v8y8j5MnT1afPn1UunRp1a1bV+3bt1fv3r0zfdwD9wOKfwBZcqfZ7StXrpgxN7pVwXGnIliSSpUqpVatWkmS2rdvrxIlSmjgwIFq0aKFedG91NRUVa9eXdOmTcuwjzud53oraa+7sYjy9fXV6dOn08Wmtd18ju7NihcvftuiJj4+Xu3atdPFixe1cePGdP35+vpajXdzDsWKFTNn+319fbV27VoZhmFVKN4q16pVq+rixYv64osv9Pzzz6f7oy0zihcvrscee0yPPfaYmjdvrvXr1+vPP/+Uv7+/UlNT1bp1a40cOTLD1958W8XMsFgsGR5HN190LDU1VV5eXlqwYEGG/dz8R/XdHLM3j5uZY7NQoULasGGD1q5dq+XLl2vlypX65ptv9Mgjj+jnn3+2yift+LnxNnw327Jli1q0aGHVFhsbe1f3uL9xljpNSkqKWrdurQsXLmjUqFGqVKmSChcurJMnTyo0NPSOqxiknHuv7yQtl6eeeuqWRU6NGjUk/XctkEOHDmnZsmVauXKlvvvuO82aNUtjx47V+PHjcyynt99+W6+99pr69u2rCRMmqFixYnJwcNDgwYMzfO/y6r166qmntGzZMi1btkyPPfaYvvvuO7Vp08b8Pbl48aKaNWsmd3d3vfHGGwoMDJSLi4t27typUaNGZernfjt8jljLrf/jbuxf+u82tLVq1cow5uYvUfLqWMzsWE888YSaNGmi77//Xj///LOmTJmid955R4sXL1a7du1yPCfgXkTxDyBL0i7KdOjQoXR/bFy5ckV//fWX2rRpk6s5PP/885o+fbrGjBmjzp07y2KxKDAwULt371bLli1vOxuaVWlLCm/8g65WrVrauHGjUlNTrQqhrVu3ytXV9Y5/eFaqVEmxsbEZbrt69ao6dOigw4cPa/Xq1apSpUq6mAceeEAlS5bU9u3b02379ddfrf5wq1Wrlj755BMdOHDAqq+0i4Pd/EdeiRIl9O233+rhhx9Wy5YttWnTpjt+mXE79erV0/r163X69Gn5+/srMDBQiYmJ5pc5t+Lv76+oqCglJiZa/cF56NChdLFFixbNcHl42gxomsDAQK1evVqNGzfO1OqMzAgMDNS+ffvuGJPZY9PBwUEtW7ZUy5YtNW3aNL399tt69dVXtXbtWqv3LDY2Vg4ODrc91mrWrKnIyEirNh8fn0zsVdbs3btXhw8f1meffabevXub7TePnR+ULFlSbm5uSklJueMxKEmFCxdW9+7d1b17dyUnJ6tLly566623NHr0aLm4uOTIZ823336rFi1aaN68eVbtFy9evO2XO7eS9hl95MgRc0m4JF27dk2xsbGqWbNmpvp57LHH5ObmpoULF6pgwYL6559/rJb8r1u3TufPn9fixYut7lxyq8+2u8HnSNY+R7KbgyS5u7tn6ncjM9K+qImNjbVaiZLR3Slyap98fX314osv6sUXX9TZs2dVp04dvfXWWxT/wP/hnH8AWdKyZUs5OTlp9uzZ6WZ25s6dq+vXr+f6f7KOjo4aNmyYDhw4YN7u7IknntDJkyf18ccfp4v/999/b3vrPum/mf2bZ3iuXbumSZMmycnJyWoGtVu3bjpz5owWL15stv3999+KiIhQhw4dMjzH/kZBQUHat29fuuX4KSkp6t69u6KjoxUREaGgoKBb9tG1a1ctW7ZMf/31l9kWFRWlw4cP6/HHHzfbOnbsqIIFC2rWrFlmm2EYmjNnjh544AE1atQoXd+lSpXS6tWr9e+//6p169Y6f/78bfcnLi5Ov/32W7r25ORkRUVFycHBwVzm+cQTTyg6OlqrVq1KF3/x4kVdv35d0n8rPK5fv67Zs2eb21NSUvTBBx+ke11gYKAOHjyoc+fOmW27d+9Od5X8J554QikpKZowYUK6Pq5fv66LFy/edj8z0rVrV+3evVvff/99um1pM1KZPTZvXqIv/f8vZ24+Vnbs2KGqVave9hzuokWLqlWrVlaPjFbl3K20GbkbZ+AMw9CMGTNyfKy7VaBAAXXt2lXfffddhsXWjcfQzce9k5OTqlSpIsMwdO3aNUn/fTkgKVvHzo053TxTGhERYXWOe1bUq1dPJUuW1Jw5c5ScnGy2h4eHZynPQoUKqXPnzvrpp580e/ZsFS5cWB07drTKW7L+uScnJ1t91mQFnyM58zmSXXXr1lVgYKDeffddJSYmptt+4/uSWcHBwZKU7pjI6P2/29+llJSUdKeaeHl5yc/PL0dvhQjc65j5B5AlXl5eGjt2rMaMGaOmTZvqsccek6urq7Zs2aKvvvpKbdq0UYcOHXI9j9DQUI0dO1bvvPOOOnXqpKefflqLFi1S//79tXbtWjVu3FgpKSk6ePCgFi1apFWrVmV4a7w0S5cu1Ztvvqlu3bopICBAFy5c0MKFC7Vv3z69/fbbVjOm3bp100MPPaRnnnlGv/32m0qUKKFZs2YpJSUlU8uBO3bsqAkTJmj9+vVWqySGDRumpUuXqkOHDrpw4YK+/PJLq9elXXNAkv73v/8pIiJCLVq00KBBg5SYmKgpU6aoevXqeuaZZ8y4UqVKafDgwZoyZYquXbum+vXra8mSJdq4caMWLFhwy6WU5cuX188//6zmzZsrODhYa9asueUt5U6cOKEGDRrokUceUcuWLeXj46OzZ8/qq6++0u7duzV48GBzBnPEiBFaunSpHn30UfN2V5cvX9bevXv17bff6tixYypRooQ6dOigxo0b65VXXtGxY8dUpUoVLV68OMPziPv27atp06YpODhYYWFhOnv2rObMmaOqVatanYfcrFkzPf/885o4caJiYmLUpk0bFSxYUEeOHFFERIRmzJihbt263fHnd6MRI0bo22+/1eOPP66+ffuqbt26unDhgpYuXao5c+aoZs2amT4233jjDW3YsEEhISHy9/fX2bNnNWvWLJUqVcrqdozXrl3T+vXr011Ey1YqVaqkwMBADR8+XCdPnpS7u7u+++67HD9fX/pvdrBZs2Zat25dtvuYNGmS1q5dq4YNG+q5555TlSpVdOHCBe3cuVOrV682v4Rp06aNfHx81LhxY3l7e+vAgQOaOXOmQkJC5ObmJum/gkmSXn31VT355JMqWLCgOnTooMKFC2vcuHEaP3681q5dm+H949M8+uijeuONN/TMM8+oUaNG2rt3rxYsWJDt85QLFiyoN998U88//7weeeQRde/eXbGxsZo/f36W+3zqqaf0+eefa9WqVerVq5dZoElSo0aNVLRoUfXp00cvv/yyLBaLvvjii2wv+eZzJGc+R27n2rVrGd62tVixYnrxxRf1ySefqF27dqpataqeeeYZPfDAAzp58qTWrl0rd3d3/fjjj1nar7p166pr16567733dP78efNWf4cPH5ZkPdt/u9+lzLh06ZJKlSqlbt26qWbNmipSpIhWr16tbdu2aerUqVnKG7BreXdjAQD25MsvvzQeeugho3Dhwoazs7NRqVIlY/z48Va3dzKM/397qoiICKv22NjYdLdautWt/m51655x48YZkoy1a9cahvHfLa7eeecdo2rVqoazs7NRtGhRo27dusb48eON+Ph483UZ3epv+/btRocOHYwHHnjAcHJyMooUKWI8/PDDxqJFizIc+8KFC0ZYWJhRvHhxw9XV1WjWrJmxbdu2271lVmrUqGGEhYVZtTVr1syQdMvHzfbt22e0adPGcHV1NTw9PY1evXoZcXFx6eJSUlKMt99+2/D39zecnJyMqlWrGl9++WW6uBtv9Zdm69athpubm9G0adMMb+tlGIaRkJBgzJgxwwgODjZKlSplFCxY0HBzczOCgoKMjz/+2Oq2UYZhGJcuXTJGjx5tlC9f3nBycjJKlChhNGrUyHj33XeN5ORkM+78+fPG008/bbi7uxseHh7G008/bezatSvdcWMY/x2P5cqVM5ycnIxatWoZq1atuuXtxebOnWvUrVvXKFSokOHm5mZUr17dGDlypHHq1KnbvheGkfHtwM6fP28MHDjQPHZKlSpl9OnTx+pWcpk5NqOiooyOHTsafn5+hpOTk+Hn52f06NEj3e3MVqxYYUgyjhw5kuHPIyfc6lZ/t7pV2G+//Wa0atXKKFKkiFGiRAnjueeeM2/FdePP6la3+svod9zf39/qFn2XLl0yJBlPPvlklvbl5lv9GYZhnDlzxhgwYIBRunRpo2DBgoaPj4/RsmVLY+7cuWbMRx99ZDRt2tQoXry44ezsbAQGBhojRoyw+iwxjP9uO/fAAw8YDg4OVu/ZsGHDDIvFkuGt6m509epVY9iwYYavr69RqFAho3HjxkZ0dHS6Yy0rn6WGYRizZs0yAgICDGdnZ6NevXrGhg0bbnk7u1u5fv264evra0gyfvrpp3TbN2/ebDz00ENGoUKFDD8/P2PkyJHGqlWrrD6XDSNzt/rjcyRnPkduJe1WnRk9AgMDzbhdu3YZXbp0MY97f39/44knnjCioqLMmFvd8nf+/PnpPjcuX75sDBgwwChWrJhRpEgRo1OnTsahQ4cMScakSZOsXn+r36XMfEYkJSUZI0aMMGrWrGm4ubkZhQsXNmrWrGnMmjXrtu8LcL+xGEYuXJUDAHBbX3zxhQYMGKDjx4+bt3jDnR07dkwBAQGaP3++QkNDbZ2OTXTq1EkWiyXDJcL27KefftKjjz6q3bt3q3r16rZO544aNGggf39/RURE2DoV3ITPEduKiYlR7dq19eWXX1pdRwJA7uOcfwCwgV69eqlMmTL68MMPbZ0K7iEHDhzQsmXLMjzf2N6tXbtWTz755D1R+CckJGj37t164403bJ0KYFMZ3Rnovffek4ODg9WFIgHkDc75BwAbcHBwuOPVnYGbVa5c2byY2f1mypQptk4h09zd3bnIGCBp8uTJ2rFjh1q0aCFHR0etWLFCK1asUL9+/e769oQAso7iHwAAAECOa9SokSIjIzVhwgQlJiaqTJkyGjdunF599VVbpwbclzjnHwAAAAAAO8c5/wAAAAAA2DmKfwAAAAAA7Bzn/OeQ1NRUnTp1Sm5ubrJYLLZOBwAAAABg5wzD0KVLl+Tn5ycHh9vP7VP855BTp05x1VIAAAAAQJ7766+/VKpUqdvGUPznEDc3N0n/venu7u42zgYAAAAAYO8SEhJUunRpsx69HYr/HJK21N/d3Z3iHwAAAACQZzJz6jkX/AMAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHOc8w8AAAAAyBUpKSm6du2ardO4ZxUoUECOjo45cjt5in8AAAAAQI5LTEzUiRMnZBiGrVO5p7m6usrX11dOTk531Q/FPwAAAAAgR6WkpOjEiRNydXVVyZIlc2Tm+n5jGIaSk5N17tw5xcbGqkKFCnJwyP6Z+xT/AAAAAIAcde3aNRmGoZIlS6pQoUK2TueeVahQIRUsWFB//vmnkpOT5eLiku2+uOAfAAAAACBXMON/9+5mtt+qnxzpBQAAAAAA5FsU/wAAAAAA2DnO+QcAAAAA5Imw8G15Ot680Pp5Ol5GypYtq8GDB2vw4ME2zYOZfwAAAADAfc9isdz2MW7cuGz1u23bNvXr1y9nk80GZv4BAAAAAPe906dPm//+5ptvNHbsWB06dMhsK1KkiPlvwzCUkpIiR8c7l9QlS5bM2USziZl/AAAAAMB9z8fHx3x4eHjIYrGYzw8ePCg3NzetWLFCdevWlbOzszZt2qSjR4+qY8eO8vb2VpEiRVS/fn2tXr3aqt+yZcvqvffeM59bLBZ98skn6ty5s1xdXVWhQgUtXbo01/eP4h8AAAAAgEx45ZVXNGnSJB04cEA1atRQYmKi2rdvr6ioKO3atUtt27ZVhw4ddPz48dv2M378eD3xxBPas2eP2rdvr169eunChQu5mjvFPwAAAAAAmfDGG2+odevWCgwMVLFixVSzZk09//zzqlatmipUqKAJEyYoMDDwjjP5oaGh6tGjh8qXL6+3335biYmJ+vXXX3M1d4p/AAAAAAAyoV69elbPExMTNXz4cFWuXFmenp4qUqSIDhw4cMeZ/xo1apj/Lly4sNzd3XX27NlcyTkNF/wDAAAAACATChcubPV8+PDhioyM1Lvvvqvy5curUKFC6tatm5KTk2/bT8GCBa2eWywWpaam5ni+N6L4BwAAAAAgGzZv3qzQ0FB17txZ0n8rAY4dO2bbpG6B4h92KSx8W56ONy+0fp6OBwAAAMD2KlSooMWLF6tDhw6yWCx67bXXcn0GP7so/gEAAAAAecLeJs2mTZumvn37qlGjRipRooRGjRqlhIQEW6eVIYthGIatk7AHCQkJ8vDwUHx8vNzd3W2dzn2PmX8AAADAdq5evarY2FgFBATIxcXF1unc0273XmalDrXp1f4nTpyo+vXry83NTV5eXurUqZMOHTpkFXP16lUNGDBAxYsXV5EiRdS1a1edOXPGKub48eMKCQmRq6urvLy8NGLECF2/ft0qZt26dapTp46cnZ1Vvnx5hYeHp8vnww8/VNmyZeXi4qKGDRvm+q0WAAAAAADICzYt/tevX68BAwbol19+UWRkpK5du6Y2bdro8uXLZsyQIUP0448/KiIiQuvXr9epU6fUpUsXc3tKSopCQkKUnJysLVu26LPPPlN4eLjGjh1rxsTGxiokJEQtWrRQTEyMBg8erGeffVarVq0yY7755hsNHTpUr7/+unbu3KmaNWsqODg412+3AAAAAABAbstXy/7PnTsnLy8vrV+/Xk2bNlV8fLxKliyphQsXqlu3bpKkgwcPqnLlyoqOjtZDDz2kFStW6NFHH9WpU6fk7e0tSZozZ45GjRqlc+fOycnJSaNGjdLy5cu1b98+c6wnn3xSFy9e1MqVKyVJDRs2VP369TVz5kxJUmpqqkqXLq2XXnpJr7zyyh1zZ9l//sKyfwAAAMB2WPafc+xi2f/N4uPjJUnFihWTJO3YsUPXrl1Tq1atzJhKlSqpTJkyio6OliRFR0erevXqZuEvScHBwUpISND+/fvNmBv7SItJ6yM5OVk7duywinFwcFCrVq3MmJslJSUpISHB6gEAAAAAQH6Ub4r/1NRUDR48WI0bN1a1atUkSXFxcXJycpKnp6dVrLe3t+Li4syYGwv/tO1p224Xk5CQoH///Vd///23UlJSMoxJ6+NmEydOlIeHh/koXbp09nYcAAAAAIBclm+K/wEDBmjfvn36+uuvbZ1KpowePVrx8fHm46+//rJ1SgAAAAAAZMjR1glI0sCBA7Vs2TJt2LBBpUqVMtt9fHyUnJysixcvWs3+nzlzRj4+PmbMzVflT7sbwI0xN98h4MyZM3J3d1ehQoVUoEABFShQIMOYtD5u5uzsLGdn5+ztMAAAAAAAecimM/+GYWjgwIH6/vvvtWbNGgUEBFhtr1u3rgoWLKioqCiz7dChQzp+/LiCgoIkSUFBQdq7d6/VVfkjIyPl7u6uKlWqmDE39pEWk9aHk5OT6tataxWTmpqqqKgoMwYAAAAAgHuVTWf+BwwYoIULF+qHH36Qm5ubeX69h4eHChUqJA8PD4WFhWno0KEqVqyY3N3d9dJLLykoKEgPPfSQJKlNmzaqUqWKnn76aU2ePFlxcXEaM2aMBgwYYM7M9+/fXzNnztTIkSPVt29frVmzRosWLdLy5cvNXIYOHao+ffqoXr16atCggd577z1dvnxZzzzzTN6/MQAAAAAA5CCbFv+zZ8+WJDVv3tyqff78+QoNDZUkTZ8+XQ4ODuratauSkpIUHBysWbNmmbEFChTQsmXL9MILLygoKEiFCxdWnz599MYbb5gxAQEBWr58uYYMGaIZM2aoVKlS+uSTTxQcHGzGdO/eXefOndPYsWMVFxenWrVqaeXKlekuAggAAAAAyKaF3fN2vJ7f5O14+ZjFMAzD1knYg6zcXxG5Lyx8W56ONy+0fp6OBwAAAORnt7w3fT4u/i0Wy223v/766xo3bly20rBYLPr+++/VqVOnLL/2lu+lslaH5osL/gEAAAAAYEunT582//3NN99o7NixOnTokNlWpEgRW6SVY/LNrf4AAAAAALAVHx8f8+Hh4SGLxWLV9vXXX6ty5cpycXFRpUqVrE5HT05O1sCBA+Xr6ysXFxf5+/tr4sSJkqSyZctKkjp37iyLxWI+z2vM/AMAAAAAcBsLFizQ2LFjNXPmTNWuXVu7du3Sc889Z15z7v3339fSpUu1aNEilSlTRn/99Zf++usvSdK2bdvk5eWl+fPnq23btipQoIBN9oHiHwAAAACA23j99dc1depUdenSRdJ/F5X/7bff9NFHH6lPnz46fvy4KlSooIcfflgWi0X+/v7ma0uWLClJ8vT0lI+Pj03ylyj+AQAAAAC4pcuXL+vo0aMKCwvTc889Z7Zfv35dHh4ekqTQ0FC1bt1aFStWVNu2bfXoo4+qTZs2tko5QxT/AAAAAADcQmJioiTp448/VsOGDa22pS3hr1OnjmJjY7VixQqtXr1aTzzxhFq1aqVvv/02z/O9FYp/AAAAAABuwdvbW35+fvrjjz/Uq1evW8a5u7ure/fu6t69u7p166a2bdvqwoULKlasmAoWLKiUlJQ8zDo9in8AAAAAAG5j/Pjxevnll+Xh4aG2bdsqKSlJ27dv1z///KOhQ4dq2rRp8vX1Ve3ateXg4KCIiAj5+PjI09NT0n9X/I+KilLjxo3l7OysokWL5vk+UPwDAAAAAPJGz29snUG2PPvss3J1ddWUKVM0YsQIFS5cWNWrV9fgwYMlSW5ubpo8ebKOHDmiAgUKqH79+vrpp5/k4OAgSZo6daqGDh2qjz/+WA888ICOHTuW5/tgMQzDyPNR7VBCQoI8PDwUHx8vd3d3W6dz3wsL35an480LrZ+n4wEAAAD52dWrVxUbG6uAgAC5uLjYOp172u3ey6zUoQ65mSQAAAAAALA9in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DlHWycAAAAAALg/DIwamKfjzWw5M0/Ha968uWrVqqX33nsvT8fNDGb+AQAAAAD3vQ4dOqht27YZbtu4caMsFov27NmTx1nlHIp/AAAAAMB9LywsTJGRkTpx4kS6bfPnz1e9evVUo0YNG2SWMyj+AQAAAAD3vUcffVQlS5ZUeHi4VXtiYqIiIiLUqVMn9ejRQw888IBcXV1VvXp1ffXVV7ZJNhso/gEAAAAA9z1HR0f17t1b4eHhMgzDbI+IiFBKSoqeeuop1a1bV8uXL9e+ffvUr18/Pf300/r1119tmHXmccE/5Imw8G22TgEAAAAAbqtv376aMmWK1q9fr+bNm0v6b8l/165d5e/vr+HDh5uxL730klatWqVFixapQYMGNso485j5BwAAAABAUqVKldSoUSN9+umnkqTff/9dGzduVFhYmFJSUjRhwgRVr15dxYoVU5EiRbRq1SodP37cxllnDsU/AAAAAAD/JywsTN99950uXbqk+fPnKzAwUM2aNdOUKVM0Y8YMjRo1SmvXrlVMTIyCg4OVnJxs65QzheIfAAAAAID/88QTT8jBwUELFy7U559/rr59+8pisWjz5s3q2LGjnnrqKdWsWVPlypXT4cOHbZ1uplH8AwAAAADwf4oUKaLu3btr9OjROn36tEJDQyVJFSpUUGRkpLZs2aIDBw7o+eef15kzZ2ybbBZwwT8AAAAAQJ6Y2XKmrVPIlLCwMM2bN0/t27eXn5+fJGnMmDH6448/FBwcLFdXV/Xr10+dOnVSfHy8jbPNHIp/AAAAAABuEBQUZHW7P0kqVqyYlixZctvXrVu3LveSukss+wcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAABArrj5onnIupx6Dyn+AQAAAAA5qkCBApKk5ORkG2dy77ty5YokqWDBgnfVD7f6AwAAAADkKEdHR7m6uurcuXMqWLCgHByYd84qwzB05coVnT17Vp6enuYXKtlF8Q8AAAAAyFEWi0W+vr6KjY3Vn3/+aet07mmenp7y8fG5634o/gEAAAAAOc7JyUkVKlRg6f9dKFiw4F3P+Keh+AcAAAAA5AoHBwe5uLjYOg2IC/4BAAAAAGD3KP4BAAAAALBzNi3+N2zYoA4dOsjPz08Wi0VLliyx2m6xWDJ8TJkyxYwpW7Zsuu2TJk2y6mfPnj1q0qSJXFxcVLp0aU2ePDldLhEREapUqZJcXFxUvXp1/fTTT7myzwAAAAAA5DWbFv+XL19WzZo19eGHH2a4/fTp01aPTz/9VBaLRV27drWKe+ONN6ziXnrpJXNbQkKC2rRpI39/f+3YsUNTpkzRuHHjNHfuXDNmy5Yt6tGjh8LCwrRr1y516tRJnTp10r59+3JnxwEAAAAAyEM2veBfu3bt1K5du1tuv/l2Bj/88INatGihcuXKWbW7ubnd8tYHCxYsUHJysj799FM5OTmpatWqiomJ0bRp09SvXz9J0owZM9S2bVuNGDFCkjRhwgRFRkZq5syZmjNnzt3sIgAAAAAANnfPnPN/5swZLV++XGFhYem2TZo0ScWLF1ft2rU1ZcoUXb9+3dwWHR2tpk2bysnJyWwLDg7WoUOH9M8//5gxrVq1suozODhY0dHRt8wnKSlJCQkJVg8AAAAAAPKje+ZWf5999pnc3NzUpUsXq/aXX35ZderUUbFixbRlyxaNHj1ap0+f1rRp0yRJcXFxCggIsHqNt7e3ua1o0aKKi4sz226MiYuLu2U+EydO1Pjx43Ni1wAAAAAAyFX3TPH/6aefqlevXunuETl06FDz3zVq1JCTk5Oef/55TZw4Uc7OzrmWz+jRo63GTkhIUOnSpXNtPAAAAAAAsuueKP43btyoQ4cO6ZtvvrljbMOGDXX9+nUdO3ZMFStWlI+Pj86cOWMVk/Y87ToBt4q51XUEJMnZ2TlXv1wAAAAAACCn3BPn/M+bN09169ZVzZo17xgbExMjBwcHeXl5SZKCgoK0YcMGXbt2zYyJjIxUxYoVVbRoUTMmKirKqp/IyEgFBQXl4F4AAAAAAGAbNi3+ExMTFRMTo5iYGElSbGysYmJidPz4cTMmISFBERERevbZZ9O9Pjo6Wu+99552796tP/74QwsWLNCQIUP01FNPmYV9z5495eTkpLCwMO3fv1/ffPONZsyYYbVkf9CgQVq5cqWmTp2qgwcPaty4cdq+fbsGDhyYu28AAAAAAAB5wKbL/rdv364WLVqYz9MK8j59+ig8PFyS9PXXX8swDPXo0SPd652dnfX1119r3LhxSkpKUkBAgIYMGWJV2Ht4eOjnn3/WgAEDVLduXZUoUUJjx441b/MnSY0aNdLChQs1ZswY/e9//1OFChW0ZMkSVatWLZf2HAAAAACAvGMxDMOwdRL2ICEhQR4eHoqPj5e7u7ut08l3wsK32TqFXDUvtL6tUwAAAABwn8lKHXpPnPMPAAAAAACyj+IfAAAAAAA7R/EPAAAAAICds+kF/wB7kdfXNOAaAwAAAACygpl/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmbFv8bNmxQhw4d5OfnJ4vFoiVLllhtDw0NlcVisXq0bdvWKubChQvq1auX3N3d5enpqbCwMCUmJlrF7NmzR02aNJGLi4tKly6tyZMnp8slIiJClSpVkouLi6pXr66ffvopx/cXAAAAAABbsGnxf/nyZdWsWVMffvjhLWPatm2r06dPm4+vvvrKanuvXr20f/9+RUZGatmyZdqwYYP69etnbk9ISFCbNm3k7++vHTt2aMqUKRo3bpzmzp1rxmzZskU9evRQWFiYdu3apU6dOqlTp07at29fzu80AAAAAAB5zGIYhmHrJCTJYrHo+++/V6dOncy20NBQXbx4Md2KgDQHDhxQlSpVtG3bNtWrV0+StHLlSrVv314nTpyQn5+fZs+erVdffVVxcXFycnKSJL3yyitasmSJDh48KEnq3r27Ll++rGXLlpl9P/TQQ6pVq5bmzJmT4dhJSUlKSkoynyckJKh06dKKj4+Xu7v73bwVdiksfJutU7Ar80Lr2zoFAAAAADaWkJAgDw+PTNWh+f6c/3Xr1snLy0sVK1bUCy+8oPPnz5vboqOj5enpaRb+ktSqVSs5ODho69atZkzTpk3Nwl+SgoODdejQIf3zzz9mTKtWrazGDQ4OVnR09C3zmjhxojw8PMxH6dKlc2R/AQAAAADIafm6+G/btq0+//xzRUVF6Z133tH69evVrl07paSkSJLi4uLk5eVl9RpHR0cVK1ZMcXFxZoy3t7dVTNrzO8Wkbc/I6NGjFR8fbz7++uuvu9tZAAAAAAByiaOtE7idJ5980vx39erVVaNGDQUGBmrdunVq2bKlDTOTnJ2d5ezsbNMccP/K69MoOM0AAAAAuLfl65n/m5UrV04lSpTQ77//Lkny8fHR2bNnrWKuX7+uCxcuyMfHx4w5c+aMVUza8zvFpG0HAAAAAOBedk8V/ydOnND58+fl6+srSQoKCtLFixe1Y8cOM2bNmjVKTU1Vw4YNzZgNGzbo2rVrZkxkZKQqVqyookWLmjFRUVFWY0VGRiooKCi3dwkAAAAAgFxn0+I/MTFRMTExiomJkSTFxsYqJiZGx48fV2JiokaMGKFffvlFx44dU1RUlDp27Kjy5csrODhYklS5cmW1bdtWzz33nH799Vdt3rxZAwcO1JNPPik/Pz9JUs+ePeXk5KSwsDDt379f33zzjWbMmKGhQ4eaeQwaNEgrV67U1KlTdfDgQY0bN07bt2/XwIED8/w9AQAAAAAgp9m0+N++fbtq166t2rVrS5KGDh2q2rVra+zYsSpQoID27Nmjxx57TA8++KDCwsJUt25dbdy40epc+wULFqhSpUpq2bKl2rdvr4cfflhz5841t3t4eOjnn39WbGys6tatq2HDhmns2LHq16+fGdOoUSMtXLhQc+fOVc2aNfXtt99qyZIlqlatWt69GQAAAAAA5BKLYRiGrZOwB1m5v+L9KK8vUIecxQX/AAAAgPwnK3XoPXXOPwAAAAAAyDqKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHbOpsX/hg0b1KFDB/n5+clisWjJkiXmtmvXrmnUqFGqXr26ChcuLD8/P/Xu3VunTp2y6qNs2bKyWCxWj0mTJlnF7NmzR02aNJGLi4tKly6tyZMnp8slIiJClSpVkouLi6pXr66ffvopV/YZAAAAAIC8ZtPi//Lly6pZs6Y+/PDDdNuuXLminTt36rXXXtPOnTu1ePFiHTp0SI899li62DfeeEOnT582Hy+99JK5LSEhQW3atJG/v7927NihKVOmaNy4cZo7d64Zs2XLFvXo0UNhYWHatWuXOnXqpE6dOmnfvn25s+MAAAAAAOQhR1sO3q5dO7Vr1y7DbR4eHoqMjLRqmzlzpho0aKDjx4+rTJkyZrubm5t8fHwy7GfBggVKTk7Wp59+KicnJ1WtWlUxMTGaNm2a+vXrJ0maMWOG2rZtqxEjRkiSJkyYoMjISM2cOVNz5szJsN+kpCQlJSWZzxMSEjK/4wAAAAAA5KF76pz/+Ph4WSwWeXp6WrVPmjRJxYsXV+3atTVlyhRdv37d3BYdHa2mTZvKycnJbAsODtahQ4f0zz//mDGtWrWy6jM4OFjR0dG3zGXixIny8PAwH6VLl86BPQQAAAAAIOfdM8X/1atXNWrUKPXo0UPu7u5m+8svv6yvv/5aa9eu1fPPP6+3335bI0eONLfHxcXJ29vbqq+053FxcbeNSduekdGjRys+Pt58/PXXX3e9jwAAAAAA5AabLvvPrGvXrumJJ56QYRiaPXu21bahQ4ea/65Ro4acnJz0/PPPa+LEiXJ2ds61nJydnXO1fwAAAAAAckq+n/lPK/z//PNPRUZGWs36Z6Rhw4a6fv26jh07Jkny8fHRmTNnrGLSnqddJ+BWMbe6jgAAAAAAAPeSfF38pxX+R44c0erVq1W8ePE7viYmJkYODg7y8vKSJAUFBWnDhg26du2aGRMZGamKFSuqaNGiZkxUVJRVP5GRkQoKCsrBvQEAAAAAwDZsuuw/MTFRv//+u/k8NjZWMTExKlasmHx9fdWtWzft3LlTy5YtU0pKinkOfrFixeTk5KTo6Ght3bpVLVq0kJubm6KjozVkyBA99dRTZmHfs2dPjR8/XmFhYRo1apT27dunGTNmaPr06ea4gwYNUrNmzTR16lSFhITo66+/1vbt261uBwgAAAAAwL3KYhiGYavB161bpxYtWqRr79Onj8aNG6eAgIAMX7d27Vo1b95cO3fu1IsvvqiDBw8qKSlJAQEBevrppzV06FCr8/H37NmjAQMGaNu2bSpRooReeukljRo1yqrPiIgIjRkzRseOHVOFChU0efJktW/fPtP7kpCQIA8PD8XHx9/x1IT7UVj4NlungLswL7S+rVMAAAAAcJOs1KHZKv7/+OMPlStXLtsJ2iOK/9uj+L+3UfwDAAAA+U9W6tBsnfNfvnx5tWjRQl9++aWuXr2arSQBAAAAAEDeyNbMf0xMjObPn6+vvvpKycnJ6t69u8LCwtSgQYPcyPGewMz/7THzj6xitQEAAABwe7k+81+rVi3NmDFDp06d0qeffqrTp0/r4YcfVrVq1TRt2jSdO3cuW4kDAAAAAICcd1e3+nN0dFSXLl0UERGhd955R7///ruGDx+u0qVLq3fv3jp9+nRO5QkAAAAAALLpror/7du368UXX5Svr6+mTZum4cOH6+jRo4qMjNSpU6fUsWPHnMoTAAAAAABkk2N2XjRt2jTNnz9fhw4dUvv27fX555+rffv2cnD477uEgIAAhYeHq2zZsjmZKwAAAAAAyIZsFf+zZ89W3759FRoaKl9f3wxjvLy8NG/evLtKDgAAAAAA3L1sFf9Hjhy5Y4yTk5P69OmTne4BAAAAAEAOytY5//Pnz1dERES69oiICH322Wd3nRQAAAAAAMg52Sr+J06cqBIlSqRr9/Ly0ttvv33XSQEAAAAAgJyTreL/+PHjCggISNfu7++v48eP33VSAAAAAAAg52Sr+Pfy8tKePXvSte/evVvFixe/66QAAAAAAEDOyVbx36NHD7388stau3atUlJSlJKSojVr1mjQoEF68sknczpHAAAAAABwF7J1tf8JEybo2LFjatmypRwd/+siNTVVvXv35px/AAAAAADymWwV/05OTvrmm280YcIE7d69W4UKFVL16tXl7++f0/kBAAAAAIC7lK3iP82DDz6oBx98MKdyAQAAAAAAuSBbxX9KSorCw8MVFRWls2fPKjU11Wr7mjVrciQ5AAAAAABw97JV/A8aNEjh4eEKCQlRtWrVZLFYcjovAAAAAACQQ7JV/H/99ddatGiR2rdvn9P5AAAAAACAHJatW/05OTmpfPnyOZ0LAAAAAADIBdkq/ocNG6YZM2bIMIyczgcAAAAAAOSwbC3737Rpk9auXasVK1aoatWqKliwoNX2xYsX50hyAAAAAADg7mWr+Pf09FTnzp1zOhcAAAAAAJALslX8z58/P6fzAAAAAAAAuSRb5/xL0vXr17V69Wp99NFHunTpkiTp1KlTSkxMzLHkAAAAAADA3cvWzP+ff/6ptm3b6vjx40pKSlLr1q3l5uamd955R0lJSZozZ05O5wkAAAAAALIpWzP/gwYNUr169fTPP/+oUKFCZnvnzp0VFRWVY8kBAAAAAIC7l62Z/40bN2rLli1ycnKyai9btqxOnjyZI4kBAAAAAICcka2Z/9TUVKWkpKRrP3HihNzc3O46KQAAAAAAkHOyVfy3adNG7733nvncYrEoMTFRr7/+utq3b59TuQEAAAAAgByQrWX/U6dOVXBwsKpUqaKrV6+qZ8+eOnLkiEqUKKGvvvoqp3MEAAAAAAB3IVvFf6lSpbR79259/fXX2rNnjxITExUWFqZevXpZXQAQAAAAAADYXraKf0lydHTUU089lZO5AAAAAACAXJCt4v/zzz+/7fbevXtnKxkAAAAAAJDzslX8Dxo0yOr5tWvXdOXKFTk5OcnV1ZXiHwAAAACAfCRbV/v/559/rB6JiYk6dOiQHn74YS74BwAAAABAPpOt4j8jFSpU0KRJk9KtCgAAAAAAALaVY8W/9N9FAE+dOpWTXQIAAAAAgLuUrXP+ly5davXcMAydPn1aM2fOVOPGjXMkMQAAAAAAkDOyVfx36tTJ6rnFYlHJkiX1yCOPaOrUqTmRFwAAAAAAyCHZKv5TU1NzOg8AsBIWvi1Px5sXWj9PxwMAAADyUo6e8w8AAAAAAPKfbM38Dx06NNOx06ZNy84QAAAAAAAgh2Rr5n/Xrl369NNP9dFHH2ndunVat26d5s6dq3nz5mnXrl3mIyYm5rb9bNiwQR06dJCfn58sFouWLFlitd0wDI0dO1a+vr4qVKiQWrVqpSNHjljFXLhwQb169ZK7u7s8PT0VFhamxMREq5g9e/aoSZMmcnFxUenSpTV58uR0uURERKhSpUpycXFR9erV9dNPP2XnrQEAAAAAIN/JVvHfoUMHNW3aVCdOnNDOnTu1c+dO/fXXX2rRooUeffRRrV27VmvXrtWaNWtu28/ly5dVs2ZNffjhhxlunzx5st5//33NmTNHW7duVeHChRUcHKyrV6+aMb169dL+/fsVGRmpZcuWacOGDerXr5+5PSEhQW3atJG/v7927NihKVOmaNy4cZo7d64Zs2XLFvXo0UNhYWHatWuXOnXqpE6dOmnfvn3ZeXsAAAAAAMhXLIZhGFl90QMPPKCff/5ZVatWtWrft2+f2rRpo1OnTmU9EYtF33//vXknAcMw5Ofnp2HDhmn48OGSpPj4eHl7eys8PFxPPvmkDhw4oCpVqmjbtm2qV6+eJGnlypVq3769Tpw4IT8/P82ePVuvvvqq4uLi5OTkJEl65ZVXtGTJEh08eFCS1L17d12+fFnLli0z83nooYdUq1YtzZkzJ1P5JyQkyMPDQ/Hx8XJ3d8/y/tu7vL54G5BVXPAPAAAA95qs1KHZmvlPSEjQuXPn0rWfO3dOly5dyk6X6cTGxiouLk6tWrUy2zw8PNSwYUNFR0dLkqKjo+Xp6WkW/pLUqlUrOTg4aOvWrWZM06ZNzcJfkoKDg3Xo0CH9888/ZsyN46TFpI2TkaSkJCUkJFg9AAAAAADIj7JV/Hfu3FnPPPOMFi9erBMnTujEiRP67rvvFBYWpi5duuRIYnFxcZIkb29vq3Zvb29zW1xcnLy8vKy2Ozo6qlixYlYxGfVx4xi3iknbnpGJEyfKw8PDfJQuXTqruwgAAAAAQJ7IVvE/Z84ctWvXTj179pS/v7/8/f3Vs2dPtW3bVrNmzcrpHPOl0aNHKz4+3nz89ddftk4JAAAAAIAMZetWf66urpo1a5amTJmio0ePSpICAwNVuHDhHEvMx8dHknTmzBn5+vqa7WfOnFGtWrXMmLNnz1q97vr167pw4YL5eh8fH505c8YqJu35nWLStmfE2dlZzs7O2dgzAAAAAADyVrZm/tOcPn1ap0+fVoUKFVS4cGFl49qBtxQQECAfHx9FRUWZbQkJCdq6dauCgoIkSUFBQbp48aJ27NhhxqxZs0apqalq2LChGbNhwwZdu3bNjImMjFTFihVVtGhRM+bGcdJi0sYBAAAAAOBelq3i//z582rZsqUefPBBtW/fXqdPn5YkhYWFadiwYZnuJzExUTExMYqJiZH030X+YmJidPz4cVksFg0ePFhvvvmmli5dqr1796p3797y8/Mz7whQuXJltW3bVs8995x+/fVXbd68WQMHDtSTTz4pPz8/SVLPnj3l5OSksLAw7d+/X998841mzJihoUOHmnkMGjRIK1eu1NSpU3Xw4EGNGzdO27dv18CBA7Pz9gAAAAAAkK9kq/gfMmSIChYsqOPHj8vV1dVs7969u1auXJnpfrZv367atWurdu3akqShQ4eqdu3aGjt2rCRp5MiReumll9SvXz/Vr19fiYmJWrlypVxcXMw+FixYoEqVKqlly5Zq3769Hn74Yc2dO9fc7uHhoZ9//lmxsbGqW7euhg0bprFjx6pfv35mTKNGjbRw4ULNnTtXNWvW1LfffqslS5aoWrVq2Xl7AAAAAADIVyxGNtbq+/j4aNWqVapZs6bc3Ny0e/dulStXTn/88Ydq1KihxMTE3Mg1X8vK/RXvR2Hh22ydAnBb80Lr2zoFAAAAIEuyUodma+b/8uXLVjP+aS5cuMBF8AAAAAAAyGeyVfw3adJEn3/+ufncYrEoNTVVkydPVosWLXIsOQAAAAAAcPeydau/yZMnq2XLltq+fbuSk5M1cuRI7d+/XxcuXNDmzZtzOkcAAAAAAHAXsjXzX61aNR0+fFgPP/ywOnbsqMuXL6tLly7atWuXAgMDczpHAAAAAABwF7I883/t2jW1bdtWc+bM0auvvpobOQEAAAAAgByU5Zn/ggULas+ePbmRCwAAAAAAyAXZWvb/1FNPad68eTmdCwAAAAAAyAXZuuDf9evX9emnn2r16tWqW7euChcubLV92rRpOZIcAAAAAAC4e1kq/v/44w+VLVtW+/btU506dSRJhw8ftoqxWCw5lx0AAAAAALhrWSr+K1SooNOnT2vt2rWSpO7du+v999+Xt7d3riQHAAAAAADuXpbO+TcMw+r5ihUrdPny5RxNCAAAAAAA5KxsXfAvzc1fBgAAAAAAgPwnS8W/xWJJd04/5/gDAAAAAJC/Zemcf8MwFBoaKmdnZ0nS1atX1b9//3RX+1+8eHHOZQgAAAAAAO5Klor/Pn36WD1/6qmncjQZAAAAAACQ87JU/M+fPz+38gAAAAAAALnkri74BwAAAAAA8j+KfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAncvS1f4BwF6FhW/L0/HmhdbP0/EAAABwf2PmHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM452joBALgfhYVvy9Px5oXWz9PxAAAAkL8w8w8AAAAAgJ3L98V/2bJlZbFY0j0GDBggSWrevHm6bf3797fq4/jx4woJCZGrq6u8vLw0YsQIXb9+3Spm3bp1qlOnjpydnVW+fHmFh4fn1S4CAAAAAJCr8v2y/23btiklJcV8vm/fPrVu3VqPP/642fbcc8/pjTfeMJ+7urqa/05JSVFISIh8fHy0ZcsWnT59Wr1791bBggX19ttvS5JiY2MVEhKi/v37a8GCBYqKitKzzz4rX19fBQcH58FeAgAAAACQe/J98V+yZEmr55MmTVJgYKCaNWtmtrm6usrHxyfD1//888/67bfftHr1anl7e6tWrVqaMGGCRo0apXHjxsnJyUlz5sxRQECApk6dKkmqXLmyNm3apOnTp1P8AwAAAADuefl+2f+NkpOT9eWXX6pv376yWCxm+4IFC1SiRAlVq1ZNo0eP1pUrV8xt0dHRql69ury9vc224OBgJSQkaP/+/WZMq1atrMYKDg5WdHT0LXNJSkpSQkKC1QMAAAAAgPwo38/832jJkiW6ePGiQkNDzbaePXvK399ffn5+2rNnj0aNGqVDhw5p8eLFkqS4uDirwl+S+TwuLu62MQkJCfr3339VqFChdLlMnDhR48ePz8ndAwAAAAAgV9xTxf+8efPUrl07+fn5mW39+vUz/129enX5+vqqZcuWOnr0qAIDA3Mtl9GjR2vo0KHm84SEBJUuXTrXxgMAAAAAILvumeL/zz//1OrVq80Z/Vtp2LChJOn3339XYGCgfHx89Ouvv1rFnDlzRpLM6wT4+PiYbTfGuLu7ZzjrL0nOzs5ydnbO1r4AAAAAAJCX7plz/ufPny8vLy+FhITcNi4mJkaS5OvrK0kKCgrS3r17dfbsWTMmMjJS7u7uqlKlihkTFRVl1U9kZKSCgoJycA8AAAAAALCNe6L4T01N1fz589WnTx85Ov7/xQpHjx7VhAkTtGPHDh07dkxLly5V79691bRpU9WoUUOS1KZNG1WpUkVPP/20du/erVWrVmnMmDEaMGCAOXPfv39//fHHHxo5cqQOHjyoWbNmadGiRRoyZIhN9hcAAAAAgJx0TxT/q1ev1vHjx9W3b1+rdicnJ61evVpt2rRRpUqVNGzYMHXt2lU//vijGVOgQAEtW7ZMBQoUUFBQkJ566in17t1bb7zxhhkTEBCg5cuXKzIyUjVr1tTUqVP1ySefcJs/AAAAAIBdsBiGYdg6CXuQkJAgDw8PxcfHy93d3dbp5Dth4dtsnQJwX5sXWt/WKQAAACCHZaUOvSdm/gEAAAAAQPZR/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOUdbJwAAyH1h4dvydLx5ofXzdDwAAADcHjP/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzjnaOgEAgP0JC9+Wp+PNC62fp+MBAADca5j5BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7l6+L/3HjxslisVg9KlWqZG6/evWqBgwYoOLFi6tIkSLq2rWrzpw5Y9XH8ePHFRISIldXV3l5eWnEiBG6fv26Vcy6detUp04dOTs7q3z58goPD8+L3QMAAAAAIE/k6+JfkqpWrarTp0+bj02bNpnbhgwZoh9//FERERFav369Tp06pS5dupjbU1JSFBISouTkZG3ZskWfffaZwsPDNXbsWDMmNjZWISEhatGihWJiYjR48GA9++yzWrVqVZ7uJwAAAAAAucXR1gnciaOjo3x8fNK1x8fHa968eVq4cKEeeeQRSdL8+fNVuXJl/fLLL3rooYf0888/67ffftPq1avl7e2tWrVqacKECRo1apTGjRsnJycnzZkzRwEBAZo6daokqXLlytq0aZOmT5+u4ODgPN1XAAAAAAByQ76f+T9y5Ij8/PxUrlw59erVS8ePH5ck7dixQ9euXVOrVq3M2EqVKqlMmTKKjo6WJEVHR6t69ery9vY2Y4KDg5WQkKD9+/ebMTf2kRaT1setJCUlKSEhweoBAAAAAEB+lK+L/4YNGyo8PFwrV67U7NmzFRsbqyZNmujSpUuKi4uTk5OTPD09rV7j7e2tuLg4SVJcXJxV4Z+2PW3b7WISEhL077//3jK3iRMnysPDw3yULl36bncXAAAAAIBcka+X/bdr1878d40aNdSwYUP5+/tr0aJFKlSokA0zk0aPHq2hQ4eazxMSEvgCAAAAAACQL+Xrmf+beXp66sEHH9Tvv/8uHx8fJScn6+LFi1YxZ86cMa8R4OPjk+7q/2nP7xTj7u5+2y8YnJ2d5e7ubvUAAAAAACA/uqeK/8TERB09elS+vr6qW7euChYsqKioKHP7oUOHdPz4cQUFBUmSgoKCtHfvXp09e9aMiYyMlLu7u6pUqWLG3NhHWkxaHwAAAAAA3OvydfE/fPhwrV+/XseOHdOWLVvUuXNnFShQQD169JCHh4fCwsI0dOhQrV27Vjt27NAzzzyjoKAgPfTQQ5KkNm3aqEqVKnr66ae1e/durVq1SmPGjNGAAQPk7OwsSerfv7/++OMPjRw5UgcPHtSsWbO0aNEiDRkyxJa7DgAAAABAjsnX5/yfOHFCPXr00Pnz51WyZEk9/PDD+uWXX1SyZElJ0vTp0+Xg4KCuXbsqKSlJwcHBmjVrlvn6AgUKaNmyZXrhhRcUFBSkwoULq0+fPnrjjTfMmICAAC1fvlxDhgzRjBkzVKpUKX3yySfc5g8A7iFh4dvyfMx5ofXzfEwAAIDsshiGYdg6CXuQkJAgDw8PxcfHc/5/BmzxhzkA5CaKfwAAYGtZqUPz9bJ/AAAAAABw9yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA7R/EPAAAAAICdo/gHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM452joBAADuRWHh2/J0vHmh9fN0PAAAYF+Y+QcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAAAAAdo7iHwAAAAAAO0fxDwAAAACAnaP4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOUdbJwAAAO4sLHxbno43L7R+no4HAAByFzP/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2jnP+70N5fd4oAAAAAMC2mPkHAAAAAMDOUfwDAAAAAGDnKP4BAAAAALBzFP8AAAAAANg5in8AAAAAAOwcxT8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ1ztHUCAAAg/wkL35an480LrZ+n4wEAcL/J1zP/EydOVP369eXm5iYvLy916tRJhw4dsopp3ry5LBaL1aN///5WMcePH1dISIhcXV3l5eWlESNG6Pr161Yx69atU506deTs7Kzy5csrPDw8t3cPAAAAAIA8ka+L//Xr12vAgAH65ZdfFBkZqWvXrqlNmza6fPmyVdxzzz2n06dPm4/Jkyeb21JSUhQSEqLk5GRt2bJFn332mcLDwzV27FgzJjY2ViEhIWrRooViYmI0ePBgPfvss1q1alWe7SsAAAAAALklXy/7X7lypdXz8PBweXl5aceOHWratKnZ7urqKh8fnwz7+Pnnn/Xbb79p9erV8vb2Vq1atTRhwgSNGjVK48aNk5OTk+bMmaOAgABNnTpVklS5cmVt2rRJ06dPV3BwcO7tIHLVS2fG5Ol4H3i/mafjAQAAAEBm5euZ/5vFx8dLkooVK2bVvmDBApUoUULVqlXT6NGjdeXKFXNbdHS0qlevLm9vb7MtODhYCQkJ2r9/vxnTqlUrqz6Dg4MVHR19y1ySkpKUkJBg9QAAAAAAID/K1zP/N0pNTdXgwYPVuHFjVatWzWzv2bOn/P395efnpz179mjUqFE6dOiQFi9eLEmKi4uzKvwlmc/j4uJuG5OQkKB///1XhQoVSpfPxIkTNX78+BzdR+SsqZ7n83Q8pzwdDQAAAAAy754p/gcMGKB9+/Zp06ZNVu39+vUz/129enX5+vqqZcuWOnr0qAIDA3Mtn9GjR2vo0KHm84SEBJUuXTrXxgMAwJ5xdwEAAHLXPVH8Dxw4UMuWLdOGDRtUqlSp28Y2bNhQkvT7778rMDBQPj4++vXXX61izpw5I0nmdQJ8fHzMthtj3N3dM5z1lyRnZ2c5Oztna3/uR8lJL9g6BQAAAAC4b+Xrc/4Nw9DAgQP1/fffa82aNQoICLjja2JiYiRJvr6+kqSgoCDt3btXZ8+eNWMiIyPl7u6uKlWqmDFRUVFW/URGRiooKCiH9gQAAAAAANvJ18X/gAED9OWXX2rhwoVyc3NTXFyc4uLi9O+//0qSjh49qgkTJmjHjh06duyYli5dqt69e6tp06aqUaOGJKlNmzaqUqWKnn76ae3evVurVq3SmDFjNGDAAHPmvn///vrjjz80cuRIHTx4ULNmzdKiRYs0ZMgQm+07AAAAAAA5JV8X/7Nnz1Z8fLyaN28uX19f8/HNN99IkpycnLR69Wq1adNGlSpV0rBhw9S1a1f9+OOPZh8FChTQsmXLVKBAAQUFBempp55S79699cYbb5gxAQEBWr58uSIjI1WzZk1NnTpVn3zyCbf5AwAAAADYhXx9zr9hGLfdXrp0aa1fv/6O/fj7++unn366bUzz5s21a9euLOUHAAAAAMC9IF/P/AMAAAAAgLuXr2f+gXvJS2fG5Ol4H3i/mafjAQAAALh3UfwDOWSq5/k8Hc8pT0cDAAAAcC9j2T8AAAAAAHaO4h8AAAAAADvHsn8AAHDfCQvfludjzgutn+djAgCQhpl/AAAAAADsHDP/wD2KuwsAAAAAyCyKf+Aexd0FAAAAAGQWy/4BAAAAALBzzPwDAADkgby+yCAXGAQA3IiZfwAAAAAA7BzFPwAAAAAAdo5l/wAy5bBm5Ol4D2pQno4HAAAA2DOKfwCZUjbpYN4O6Jy3wwEAAAD2jOIfAADADnGBQQDAjTjnHwAAAAAAO0fxDwAAAACAnWPZPwAAAO4apxkAQP5G8Q8gX+LuAgAAAEDOofgHkC9xdwEAAAAg53DOPwAAAAAAdo6ZfwAAANxzuMYAAGQNxT8ASEpOeiFPx3Nynp2n4wEAAOD+xrJ/AAAAAADsHDP/AAAAwB1wmgGAex3FPwDYQF6fZiBxqgEAAMD9jGX/AAAAAADYOYp/AAAAAADsHMv+AeA+wR0NAODekdfXGJC4zgBg7yj+AQC54rBm5Ol4D2pQno4HAPaGixoC9o1l/wAAAAAA2Dlm/gEAuaJs0sG8HdA5b4cDANwdVhoAeYuZfwAAAAAA7Bwz/wAAu8AFDQEAt8NKA9zvKP4BAAAAIIfxZQPyG4p/AACygZUGAID8hNtD4k4o/u9Tef1HKwDg7rx0ZkyejveB95t5Oh4A4N7D6oZ7C8U/AAAAACDf48uGu0PxDwDAPWCq5/m8HdAGK8Q4tQEAgNxD8Q8AAPIFrqMAAEDuofgHAAD3Jb5sAADcTyj+AQAA8gBfNgAAbIni/yYffvihpkyZori4ONWsWVMffPCBGjRoYOu0AAAAssTe7+wz7GLxPB2PO2AAuNdR/N/gm2++0dChQzVnzhw1bNhQ7733noKDg3Xo0CF5eXnZOj0AAAD8n/vhIpj2ji9wgLxF8X+DadOm6bnnntMzzzwjSZozZ46WL1+uTz/9VK+88oqNswMAAADsh71/gZPXX2685F0yT8eTpLJJB/N0PE5nujsU//8nOTlZO3bs0OjRo802BwcHtWrVStHR0enik5KSlJSUZD6Pj4+XJCUkJOR+sncp+d9EJSel2DoNAAAAwG5NdD6bp+P5Xczb8SQpOa/H+7dfno6XkLA+T8fLjrT60zCMO8ZS/P+fv//+WykpKfL29rZq9/b21sGD6b/RmjhxosaPH5+uvXTp0rmWIwAAAAAgbywa4mHrFDLt0qVL8vC4fb4U/9k0evRoDR061HyempqqCxcuqHjx4rJYLDbJKSEhQaVLl9Zff/0ld3d3m+QAZIRjE/kVxybyK45N5Gccn8iv7sdj0zAMXbp0SX5+fneMpfj/PyVKlFCBAgV05swZq/YzZ87Ix8cnXbyzs7OcnZ2t2jw9PXMzxUxzd3e/bw523Fs4NpFfcWwiv+LYRH7G8Yn86n47Nu8045/GIZfzuGc4OTmpbt26ioqKMttSU1MVFRWloKAgG2YGAAAAAMDdYeb/BkOHDlWfPn1Ur149NWjQQO+9954uX75sXv0fAAAAAIB7EcX/Dbp3765z585p7NixiouLU61atbRy5cp0FwHMr5ydnfX666+nOx0BsDWOTeRXHJvIrzg2kZ9xfCK/4ti8PYuRmXsCAAAAAACAexbn/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFvx358MMPVbZsWbm4uKhhw4b69ddfbZ0S7NjEiRNVv359ubm5ycvLS506ddKhQ4esYq5evaoBAwaoePHiKlKkiLp27aozZ85YxRw/flwhISFydXWVl5eXRowYoevXr+flrsDOTZo0SRaLRYMHDzbbODZhKydPntRTTz2l4sWLq1ChQqpevbq2b99ubjcMQ2PHjpWvr68KFSqkVq1a6ciRI1Z9XLhwQb169ZK7u7s8PT0VFhamxMTEvN4V2JGUlBS99tprCggIUKFChRQYGKgJEyboxuuCc2wir2zYsEEdOnSQn5+fLBaLlixZYrU9p47FPXv2qEmTJnJxcVHp0qU1efLk3N41m6P4txPffPONhg4dqtdff107d+5UzZo1FRwcrLNnz9o6Ndip9evXa8CAAfrll18UGRmpa9euqU2bNrp8+bIZM2TIEP3444+KiIjQ+vXrderUKXXp0sXcnpKSopCQECUnJ2vLli367LPPFB4errFjx9pil2CHtm3bpo8++kg1atSwaufYhC38888/aty4sQoWLKgVK1bot99+09SpU1W0aFEzZvLkyXr//fc1Z84cbd26VYULF1ZwcLCuXr1qxvTq1Uv79+9XZGSkli1bpg0bNqhfv3622CXYiXfeeUezZ8/WzJkzdeDAAb3zzjuaPHmyPvjgAzOGYxN55fLly6pZs6Y+/PDDDLfnxLGYkJCgNm3ayN/fXzt27NCUKVM0btw4zZ07N9f3z6YM2IUGDRoYAwYMMJ+npKQYfn5+xsSJE22YFe4nZ8+eNSQZ69evNwzDMC5evGgULFjQiIiIMGMOHDhgSDKio6MNwzCMn376yXBwcDDi4uLMmNmzZxvu7u5GUlJS3u4A7M6lS5eMChUqGJGRkUazZs2MQYMGGYbBsQnbGTVqlPHwww/fcntqaqrh4+NjTJkyxWy7ePGi4ezsbHz11VeGYRjGb7/9Zkgytm3bZsasWLHCsFgsxsmTJ3Mvedi1kJAQo2/fvlZtXbp0MXr16mUYBscmbEeS8f3335vPc+pYnDVrllG0aFGr/9NHjRplVKxYMZf3yLaY+bcDycnJ2rFjh1q1amW2OTg4qFWrVoqOjrZhZrifxMfHS5KKFSsmSdqxY4euXbtmdVxWqlRJZcqUMY/L6OhoVa9eXd7e3mZMcHCwEhIStH///jzMHvZowIABCgkJsToGJY5N2M7SpUtVr149Pf744/Ly8lLt2rX18ccfm9tjY2MVFxdndWx6eHioYcOGVsemp6en6tWrZ8a0atVKDg4O2rp1a97tDOxKo0aNFBUVpcOHD0uSdu/erU2bNqldu3aSODaRf+TUsRgdHa2mTZvKycnJjAkODtahQ4f0zz//5NHe5D1HWyeAu/f3338rJSXF6o9USfL29tbBgwdtlBXuJ6mpqRo8eLAaN26satWqSZLi4uLk5OQkT09Pq1hvb2/FxcWZMRkdt2nbgOz6+uuvtXPnTm3bti3dNo5N2Moff/yh2bNna+jQofrf//6nbdu26eWXX5aTk5P69OljHlsZHXs3HpteXl5W2x0dHVWsWDGOTWTbK6+8ooSEBFWqVEkFChRQSkqK3nrrLfXq1UuSODaRb+TUsRgXF6eAgIB0faRtu/F0LHtC8Q/grg0YMED79u3Tpk2bbJ0KoL/++kuDBg1SZGSkXFxcbJ0OYEpNTVW9evX09ttvS5Jq166tffv2ac6cOerTp4+Ns8P9bNGiRVqwYIEWLlyoqlWrKiYmRoMHD5afnx/HJmBHWPZvB0qUKKECBQqku1L1mTNn5OPjY6OscL8YOHCgli1bprVr16pUqVJmu4+Pj5KTk3Xx4kWr+BuPSx8fnwyP27RtQHbs2LFDZ8+eVZ06deTo6ChHR0etX79e77//vhwdHeXt7c2xCZvw9fVVlSpVrNoqV66s48ePS/r/x9bt/j/38fFJdzHf69ev68KFCxybyLYRI0bolVde0ZNPPqnq1avr6aef1pAhQzRx4kRJHJvIP3LqWLxf/5+n+LcDTk5Oqlu3rqKiosy21NRURUVFKSgoyIaZwZ4ZhqGBAwfq+++/15o1a9Itnapbt64KFixodVweOnRIx48fN4/LoKAg7d271+oDOjIyUu7u7un+QAYyq2XLltq7d69iYmLMR7169dSrVy/z3xybsIXGjRunuyXq4cOH5e/vL0kKCAiQj4+P1bGZkJCgrVu3Wh2bFy9e1I4dO8yYNWvWKDU1VQ0bNsyDvYA9unLlihwcrMuCAgUKKDU1VRLHJvKPnDoWg4KCtGHDBl27ds2MiYyMVMWKFe12yb8krvZvL77++mvD2dnZCA8PN3777TejX79+hqenp9WVqoGc9MILLxgeHh7GunXrjNOnT5uPK1eumDH9+/c3ypQpY6xZs8bYvn27ERQUZAQFBZnbr1+/blSrVs1o06aNERMTY6xcudIoWbKkMXr0aFvsEuzYjVf7NwyOTdjGr7/+ajg6OhpvvfWWceTIEWPBggWGq6ur8eWXX5oxkyZNMjw9PY0ffvjB2LNnj9GxY0cjICDA+Pfff82Ytm3bGrVr1za2bt1qbNq0yahQoYLRo0cPW+wS7ESfPn2MBx54wFi2bJkRGxtrLF682ChRooQxcuRIM4ZjE3nl0qVLxq5du4xdu3YZkoxp06YZu3btMv7880/DMHLmWLx48aLh7e1tPP3008a+ffuMr7/+2nB1dTU++uijPN/fvETxb0c++OADo0yZMoaTk5PRoEED45dffrF1SrBjkjJ8zJ8/34z5999/jRdffNEoWrSo4erqanTu3Nk4ffq0VT/Hjh0z2rVrZxQqVMgoUaKEMWzYMOPatWt5vDewdzcX/xybsJUff/zRqFatmuHs7GxUqlTJmDt3rtX21NRU47XXXjO8vb0NZ2dno2XLlsahQ4esYs6fP2/06NHDKFKkiOHu7m4888wzxqVLl/JyN2BnEhISjEGDBhllypQxXFxcjHLlyhmvvvqq1W3QODaRV9auXZvh35h9+vQxDCPnjsXdu3cbDz/8sOHs7Gw88MADxqRJk/JqF23GYhiGYZs1BwAAAAAAIC9wzj8AAAAAAHaO4h8AAAAAADtH8Q8AAAAAgJ2j+AcAAAAAwM5R/AMAAAAAYOco/gEAAAAAsHMU/wAAAAAA2DmKfwAAAAAA7BzFPwAAQC6wWCxasmSJrdMAAEASxT8AAPnWuXPn9MILL6hMmTJydnaWj4+PgoODtXnzZlunlm/khwJ73LhxqlWrlk1zAADgThxtnQAAAMhY165dlZycrM8++0zlypXTmTNnFBUVpfPnz9s6NQAAcI9h5h8AgHzo4sWL2rhxo9555x21aNFC/v7+atCggUaPHq3HHnvMKu7ZZ59VyZIl5e7urkceeUS7d++26mvSpEny9vaWm5ubwsLC9Morr1jNVDdv3lyDBw+2ek2nTp0UGhpqPk9KStLw4cP1wAMPqHDhwmrYsKHWrVtnbg8PD5enp6dWrVqlypUrq0iRImrbtq1Onz5t1e+nn36qqlWrytnZWb6+vho4cGCW9iWrPvnkE1WuXFkuLi6qVKmSZs2aZW47duyYLBaLFi9erBYtWsjV1VU1a9ZUdHS0VR8ff/yxSpcuLVdXV3Xu3FnTpk2Tp6enud/jx4/X7t27ZbFYZLFYFB4ebr7277//VufOneXq6qoKFSpo6dKld7U/AABkF8U/AAD5UJEiRVSkSBEtWbJESUlJt4x7/PHHdfbsWa1YsUI7duxQnTp11LJlS124cEGStGjRIo0bN05vv/22tm/fLl9fX6sCOLMGDhyo6Ohoff3119qzZ48ef/xxtW3bVkeOHDFjrly5onfffVdffPGFNmzYoOPHj2v48OHm9tmzZ2vAgAHq16+f9u7dq6VLl6p8+fKZ3pesWrBggcaOHau33npLBw4c0Ntvv63XXntNn332mVXcq6++quHDhysmJkYPPvigevTooevXr0uSNm/erP79+2vQoEGKiYlR69at9dZbb5mv7d69u4YNG6aqVavq9OnTOn36tLp3725uHz9+vJ544gnt2bNH7du3V69evbK9PwAA3BUDAADkS99++61RtGhRw8XFxWjUqJExevRoY/fu3eb2jRs3Gu7u7sbVq1etXhcYGGh89NFHhmEYRlBQkPHiiy9abW/YsKFRs2ZN83mzZs2MQYMGWcV07NjR6NOnj2EYhvHnn38aBQoUME6ePGkV07JlS2P06NGGYRjG/PnzDUnG77//bm7/8MMPDW9vb/O5n5+f8eqrr2a4r5nZl4xIMr7//vsMtwUGBhoLFy60apswYYIRFBRkGIZhxMbGGpKMTz75xNy+f/9+Q5Jx4MABwzAMo3v37kZISIhVH7169TI8PDzM56+//rrV+3ljbmPGjDGfJyYmGpKMFStW3HJ/AADILcz8AwCQT3Xt2lWnTp3S0qVL1bZtW61bt0516tQxl5Xv3r1biYmJKl68uLlSoEiRIoqNjdXRo0clSQcOHFDDhg2t+g0KCspSHnv37lVKSooefPBBq3HWr19vjiNJrq6uCgwMNJ/7+vrq7NmzkqSzZ8/q1KlTatmyZYZjZGZfsuLy5cs6evSowsLCrPp788030/VXo0YNq5zT8pWkQ4cOqUGDBlbxNz+/nRv7Lly4sNzd3c2+AQDIS1zwDwCAfMzFxUWtW7dW69at9dprr+nZZ5/V66+/rtDQUCUmJsrX19fq3Ps0aeekZ4aDg4MMw7Bqu3btmvnvxMREFShQQDt27FCBAgWs4ooUKWL+u2DBglbbLBaL2W+hQoVum0NO7cuN/Un/na9/85cfN+/DjXlbLBZJUmpqapbHzEhG70lO9Q0AQFZQ/AMAcA+pUqWKeWu7OnXqKC4uTo6OjipbtmyG8ZUrV9bWrVvVu3dvs+2XX36xiilZsqTVhflSUlK0b98+tWjRQpJUu3ZtpaSk6OzZs2rSpEm28nZzc1PZsmUVFRVl9nujzOxLVnh7e8vPz09//PGHevXqle1+KlasqG3btlm13fzcyclJKSkp2R4DAIC8QPEPAEA+dP78eT3++OPq27evatSoITc3N23fvl2TJ09Wx44dJUmtWrVSUFCQOnXqpMmTJ+vBBx/UqVOntHz5cnXu3Fn16tXToEGDFBoaqnr16qlx48ZasGCB9u/fr3LlypljPfLIIxo6dKiWL1+uwMBATZs2TRcvXjS3P/jgg+rVq5d69+6tqVOnqnbt2jp37pyioqJUo0YNhYSEZGqfxo0bp/79+8vLy0vt2rXTpUuXtHnzZr300kuZ2pdbiY2NVUxMjFVbhQoVNH78eL388svy8PBQ27ZtlZSUpO3bt+uff/7R0KFDM5XzSy+9pKZNm2ratGnq0KGD1qxZoxUrVpgrBCSpbNmyZg6lSpWSm5ubnJ2dM9U/AAB5heIfAIB8qEiRImrYsKGmT5+uo0eP6tq1aypdurSee+45/e9//5P03xLyn376Sa+++qqeeeYZnTt3Tj4+PmratKm8vb0l/Xc1+qNHj2rkyJG6evWqunbtqhdeeEGrVq0yx+rbt692796t3r17y9HRUUOGDEk3Oz9//ny9+eabGjZsmE6ePKkSJUrooYce0qOPPprpferTp4+uXr2q6dOna/jw4SpRooS6deuW6X25lYwK+Y0bN+rZZ5+Vq6urpkyZohEjRqhw4cKqXr16utsa3k7jxo01Z84cjR8/XmPGjFFwcLCGDBmimTNnmjFdu3Y1bxd48eJFzZ8/3+o2iQAA5AcW4+aT/AAAgF0bN26clixZkm62HJnz3HPP6eDBg9q4caOtUwEAINOY+QcAALiNd999V61bt1bhwoW1YsUKffbZZ5o1a5at0wIAIEso/gEAAG7j119/1eTJk3Xp0iWVK1dO77//vp599llbpwUAQJaw7B8AAAAAADvnYOsEAAAAAABA7qL4BwAAAADAzlH8AwAAAABg5yj+AQAAAACwcxT/AAAAAADYOYp/AAAAAADsHMU/AAAAAAB2juIfAAAAAAA79/8ACZ13sxrznTkAAAAASUVORK5CYII=",
118
+ "text/plain": [
119
+ "<Figure size 1200x600 with 1 Axes>"
120
+ ]
121
+ },
122
+ "metadata": {},
123
+ "output_type": "display_data"
124
+ }
125
+ ],
126
+ "source": [
127
+ "# 2. Sequence lengths histogram\n",
128
+ "plt.figure(figsize=(12, 6))\n",
129
+ "for name, df in datasets.items():\n",
130
+ " plt.hist(df['Length'], bins=30, alpha=0.7, label=name)\n",
131
+ "\n",
132
+ "plt.title('UniRef50 (200k Sequences) - Train, Test, and Val Sequence Lengths')\n",
133
+ "plt.xlabel('Sequence Length')\n",
134
+ "plt.ylabel('Frequency')\n",
135
+ "plt.legend()\n",
136
+ "plt.show()"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": null,
142
+ "metadata": {},
143
+ "outputs": [],
144
+ "source": []
145
+ }
146
+ ],
147
+ "metadata": {
148
+ "kernelspec": {
149
+ "display_name": "Python 3",
150
+ "language": "python",
151
+ "name": "python3"
152
+ },
153
+ "language_info": {
154
+ "codemirror_mode": {
155
+ "name": "ipython",
156
+ "version": 3
157
+ },
158
+ "file_extension": ".py",
159
+ "mimetype": "text/x-python",
160
+ "name": "python",
161
+ "nbconvert_exporter": "python",
162
+ "pygments_lexer": "ipython3",
163
+ "version": "3.10.12"
164
+ }
165
+ },
166
+ "nbformat": 4,
167
+ "nbformat_minor": 2
168
+ }