Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,351 +1,366 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import random
|
| 3 |
-
import time
|
| 4 |
-
import json
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from typing import List, Dict, Tuple
|
| 7 |
-
from dataclasses import dataclass
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
|
| 11 |
@dataclass
|
| 12 |
class EmailResult:
|
| 13 |
email: str
|
| 14 |
exists: bool
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
@dataclass
|
| 21 |
-
class GenerationStats:
|
| 22 |
-
total_targets: int
|
| 23 |
-
found_in_leaks: int
|
| 24 |
-
high_priority: int
|
| 25 |
-
confirmed: int
|
| 26 |
-
generation_time: float
|
| 27 |
|
| 28 |
-
class
|
| 29 |
def __init__(self):
|
| 30 |
-
self.
|
| 31 |
-
self.
|
| 32 |
-
self.results = []
|
| 33 |
-
self.stats = GenerationStats(0, 0, 0, 0, 0.0)
|
| 34 |
|
| 35 |
-
def
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
],
|
| 42 |
-
'female': [
|
| 43 |
-
('sofia', 6), ('emily', 5), ('nina', 4), ('jana', 5), ('amelie', 4),
|
| 44 |
-
('anna', 8), ('lisa', 7), ('maria', 9), ('hannah', 6), ('laura', 5),
|
| 45 |
-
('lea', 7), ('emma', 8), ('lina', 4), ('clara', 3), ('lena', 6)
|
| 46 |
-
]
|
| 47 |
}
|
| 48 |
-
self.german_last_names = [
|
| 49 |
-
('müller', 9), ('schmidt', 8), ('schneider', 7), ('fischer', 6),
|
| 50 |
-
('weber', 5), ('meyer', 8), ('wagner', 6), ('becker', 5),
|
| 51 |
-
('schulz', 7), ('hoffmann', 6), ('schäfer', 5), ('koch', 4),
|
| 52 |
-
('richter', 4), ('klein', 7), ('wolf', 3), ('schröder', 5)
|
| 53 |
-
]
|
| 54 |
|
| 55 |
-
def
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
]
|
| 62 |
-
|
| 63 |
-
def _weighted_choice(self, choices):
|
| 64 |
-
total = sum(weight for _, weight in choices)
|
| 65 |
-
r = random.uniform(0, total)
|
| 66 |
-
upto = 0
|
| 67 |
-
for value, weight in choices:
|
| 68 |
-
if upto + weight >= r:
|
| 69 |
-
return value
|
| 70 |
-
upto += weight
|
| 71 |
-
return choices[-1][0]
|
| 72 |
-
|
| 73 |
-
def _get_random_name(self, gender=None):
|
| 74 |
-
if gender is None:
|
| 75 |
-
gender = random.choice(['male', 'female'])
|
| 76 |
-
first_name = self._weighted_choice(self.german_first_names[gender])
|
| 77 |
-
last_name = self._weighted_choice(self.german_last_names)
|
| 78 |
-
return first_name, last_name, gender
|
| 79 |
-
|
| 80 |
-
def generate_emails(self, domain: str, count: int) -> List[str]:
|
| 81 |
-
emails = []
|
| 82 |
-
used_combinations = set()
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
last_name = self._process_german_chars(last_name)
|
| 90 |
-
number = None
|
| 91 |
-
if '{number}' in pattern:
|
| 92 |
-
number = self._generate_realistic_number(pattern)
|
| 93 |
-
email = self._format_email(pattern, first_name, last_name, number)
|
| 94 |
-
email = self._clean_email(email) + '@' + domain
|
| 95 |
-
if email not in used_combinations:
|
| 96 |
-
emails.append(email)
|
| 97 |
-
used_combinations.add(email)
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
def _format_email(self, pattern: str, first: str, last: str, number: int) -> str:
|
| 128 |
try:
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
else:
|
| 132 |
-
return pattern.format(first=first, last=last, first0=first[0])
|
| 133 |
except:
|
| 134 |
-
return
|
| 135 |
-
|
| 136 |
-
def
|
| 137 |
-
|
| 138 |
-
for umlaut, replacement in replacements.items():
|
| 139 |
-
email = email.replace(umlaut, replacement)
|
| 140 |
-
email = email.replace('..', '.').replace('__', '_')
|
| 141 |
-
return email.lower().strip()
|
| 142 |
-
|
| 143 |
-
def simulate_sophisticated_check(self, email: str) -> Tuple[bool, int, float]:
|
| 144 |
start_time = time.time()
|
| 145 |
-
local_part = email.split('@')[0]
|
| 146 |
-
delay_factors = self._calculate_delay_factors(email, local_part)
|
| 147 |
-
total_delay = delay_factors['base_delay'] * delay_factors['total_factor']
|
| 148 |
-
time.sleep(total_delay)
|
| 149 |
-
timing = int((time.time() - start_time) * 1000)
|
| 150 |
-
exists, confidence = self._determine_existence(email, local_part, delay_factors)
|
| 151 |
-
return exists, timing, confidence
|
| 152 |
-
|
| 153 |
-
def _calculate_delay_factors(self, email: str, local_part: str) -> Dict:
|
| 154 |
-
base_delay = random.uniform(0.15, 0.25)
|
| 155 |
-
factors = {
|
| 156 |
-
'domain': 1.0, 'complexity': 1.0,
|
| 157 |
-
'network': random.uniform(0.9, 1.1), 'server_load': random.uniform(0.8, 1.2)
|
| 158 |
-
}
|
| 159 |
domain = email.split('@')[1]
|
| 160 |
-
domain_delays = {
|
| 161 |
-
'freenet.de': (0.8, 1.1), 'gmx.de': (0.9, 1.3),
|
| 162 |
-
'web.de': (0.7, 1.0), 't-online.de': (1.0, 1.4)
|
| 163 |
-
}
|
| 164 |
-
factors['domain'] = random.uniform(*domain_delays.get(domain, (0.8, 1.2)))
|
| 165 |
-
if len(local_part) > 15:
|
| 166 |
-
factors['complexity'] *= 1.1
|
| 167 |
-
if local_part.count('.') > 1:
|
| 168 |
-
factors['complexity'] *= 1.05
|
| 169 |
-
total_factor = factors['domain'] * factors['complexity'] * factors['network'] * factors['server_load']
|
| 170 |
-
return {'base_delay': base_delay, 'total_factor': total_factor, 'factors': factors}
|
| 171 |
-
|
| 172 |
-
def _determine_existence(self, email: str, local_part: str, delay_factors: Dict) -> Tuple[bool, float]:
|
| 173 |
-
email_hash = hash(email) % 1000
|
| 174 |
-
base_probability = 28
|
| 175 |
-
adjustments = 0
|
| 176 |
-
if '.' in local_part and not any(char.isdigit() for char in local_part):
|
| 177 |
-
adjustments += 12
|
| 178 |
-
elif '_' in local_part:
|
| 179 |
-
adjustments += 8
|
| 180 |
-
if len(local_part) < 10:
|
| 181 |
-
adjustments += 5
|
| 182 |
-
popular_names = ['max', 'paul', 'anna', 'lisa', 'tim']
|
| 183 |
-
if any(name in local_part.lower() for name in popular_names):
|
| 184 |
-
adjustments += 7
|
| 185 |
-
final_probability = min(70, base_probability + adjustments)
|
| 186 |
-
exists = email_hash < final_probability
|
| 187 |
-
confidence = 0.5
|
| 188 |
-
if exists:
|
| 189 |
-
if adjustments > 15:
|
| 190 |
-
confidence = 0.8
|
| 191 |
-
elif adjustments > 10:
|
| 192 |
-
confidence = 0.7
|
| 193 |
-
else:
|
| 194 |
-
confidence = 0.6
|
| 195 |
-
else:
|
| 196 |
-
if adjustments < 5:
|
| 197 |
-
confidence = 0.7
|
| 198 |
-
else:
|
| 199 |
-
confidence = 0.6
|
| 200 |
-
confidence += random.uniform(-0.1, 0.1)
|
| 201 |
-
confidence = max(0.3, min(0.9, confidence))
|
| 202 |
-
return exists, confidence
|
| 203 |
-
|
| 204 |
-
def classify_email(self, email: str, exists: bool, timing: int, confidence: float) -> EmailResult:
|
| 205 |
-
in_leaks = False
|
| 206 |
-
priority = "low"
|
| 207 |
-
if exists:
|
| 208 |
-
leak_probability = 35
|
| 209 |
-
email_hash = hash(email) % 100
|
| 210 |
-
in_leaks = email_hash < leak_probability
|
| 211 |
-
if timing < 400:
|
| 212 |
-
priority = "high"
|
| 213 |
-
elif timing < 700 and in_leaks:
|
| 214 |
-
priority = "high"
|
| 215 |
-
elif timing < 600:
|
| 216 |
-
priority = "medium"
|
| 217 |
-
return EmailResult(email=email, exists=exists, timing=timing, priority=priority, in_leaks=in_leaks, confidence=confidence)
|
| 218 |
-
|
| 219 |
-
def generate_and_check(self, domain: str, count: int) -> Tuple[pd.DataFrame, Dict]:
|
| 220 |
-
start_time = time.time()
|
| 221 |
-
emails = self.generate_emails(domain, count)
|
| 222 |
-
self.results = []
|
| 223 |
-
stats_counter = defaultdict(int)
|
| 224 |
-
|
| 225 |
-
for email in emails:
|
| 226 |
-
exists, timing, confidence = self.simulate_sophisticated_check(email)
|
| 227 |
-
result = self.classify_email(email, exists, timing, confidence)
|
| 228 |
-
self.results.append(result)
|
| 229 |
-
stats_counter['total'] += 1
|
| 230 |
-
if exists:
|
| 231 |
-
stats_counter['exists'] += 1
|
| 232 |
-
if result.in_leaks:
|
| 233 |
-
stats_counter['leaks'] += 1
|
| 234 |
-
if result.priority == "high":
|
| 235 |
-
stats_counter['high_priority'] += 1
|
| 236 |
-
if exists and result.priority == "high":
|
| 237 |
-
stats_counter['confirmed'] += 1
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
-
#
|
| 270 |
-
|
| 271 |
|
| 272 |
-
def
|
| 273 |
-
"""
|
| 274 |
-
|
| 275 |
-
if
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
#
|
| 281 |
stats_text = f"""
|
| 282 |
-
## 📊
|
| 283 |
|
| 284 |
-
| Metric | Value |
|
| 285 |
-
|
| 286 |
-
| Total
|
| 287 |
-
|
|
| 288 |
-
|
|
| 289 |
-
|
|
| 290 |
-
|
|
| 291 |
"""
|
| 292 |
|
|
|
|
| 293 |
return df, stats_text
|
| 294 |
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
gr.Markdown("""
|
| 298 |
-
#
|
| 299 |
-
*
|
| 300 |
""")
|
| 301 |
|
| 302 |
with gr.Row():
|
| 303 |
with gr.Column(scale=1):
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
with gr.Column(scale=2):
|
| 319 |
stats_output = gr.Markdown(label="Statistics")
|
| 320 |
|
| 321 |
with gr.Row():
|
| 322 |
results_table = gr.Dataframe(
|
| 323 |
-
headers=["Email", "Status", "
|
| 324 |
-
datatype=["str", "str", "
|
| 325 |
-
label="
|
| 326 |
-
interactive=False
|
|
|
|
| 327 |
)
|
| 328 |
|
| 329 |
gr.Markdown("""
|
| 330 |
-
###
|
| 331 |
-
- **
|
| 332 |
-
- **
|
| 333 |
-
- **
|
| 334 |
-
- **
|
| 335 |
|
| 336 |
### ⚠️ Important Notes:
|
| 337 |
-
- This tool
|
| 338 |
-
-
|
| 339 |
-
-
|
|
|
|
|
|
|
| 340 |
""")
|
| 341 |
|
| 342 |
-
#
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
outputs=[results_table, stats_output]
|
| 347 |
)
|
| 348 |
|
| 349 |
-
# For Hugging Face Spaces
|
| 350 |
if __name__ == "__main__":
|
| 351 |
demo.launch(share=True)
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
+
import asyncio
|
| 4 |
+
import aiohttp
|
| 5 |
+
import dns.resolver
|
| 6 |
+
import smtplib
|
| 7 |
+
import socket
|
| 8 |
from typing import List, Dict, Tuple
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
import random
|
| 11 |
+
import time
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 13 |
+
import re
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class EmailResult:
|
| 17 |
email: str
|
| 18 |
exists: bool
|
| 19 |
+
smtp_valid: bool
|
| 20 |
+
mx_valid: bool
|
| 21 |
+
disposable: bool
|
| 22 |
+
response_time: int
|
| 23 |
+
risk_score: int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
class RealEmailValidator:
|
| 26 |
def __init__(self):
|
| 27 |
+
self.disposable_domains = self._load_disposable_domains()
|
| 28 |
+
self.common_emails = self._load_common_patterns()
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
def _load_disposable_domains(self):
|
| 31 |
+
"""Загрузка списка временных email-доменов"""
|
| 32 |
+
return {
|
| 33 |
+
'tempmail.com', '10minutemail.com', 'guerrillamail.com',
|
| 34 |
+
'mailinator.com', 'yopmail.com', 'throwawaymail.com',
|
| 35 |
+
'fakeinbox.com', 'temp-mail.org', 'trashmail.com'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
def _load_common_patterns(self):
|
| 39 |
+
"""Загрузка распространенных немецких имен для генерации"""
|
| 40 |
+
german_first_names = [
|
| 41 |
+
'max', 'paul', 'leon', 'felix', 'lukas', 'tim', 'david', 'elias',
|
| 42 |
+
'ben', 'jonas', 'luca', 'finn', 'sofia', 'emily', 'nina', 'jana',
|
| 43 |
+
'amelie', 'anna', 'lisa', 'maria', 'hannah', 'laura', 'lea', 'emma'
|
| 44 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
german_last_names = [
|
| 47 |
+
'muller', 'schmidt', 'schneider', 'fischer', 'weber', 'meyer',
|
| 48 |
+
'wagner', 'becker', 'schulz', 'hoffmann', 'schaefer', 'koch',
|
| 49 |
+
'richter', 'klein', 'wolf', 'schroeder', 'neumann', 'schwarz'
|
| 50 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
return german_first_names, german_last_names
|
| 53 |
+
|
| 54 |
+
def generate_german_emails(self, domain: str, count: int) -> List[str]:
|
| 55 |
+
"""Генерация реалистичных немецких email-адресов"""
|
| 56 |
+
first_names, last_names = self.common_emails
|
| 57 |
+
emails = []
|
| 58 |
+
patterns = [
|
| 59 |
+
'{first}.{last}',
|
| 60 |
+
'{first}_{last}',
|
| 61 |
+
'{first}{last}',
|
| 62 |
+
'{last}.{first}',
|
| 63 |
+
'{first[0]}{last}',
|
| 64 |
+
'{first}{last}{number}',
|
| 65 |
+
'{first}.{last}{number}'
|
| 66 |
+
]
|
| 67 |
|
| 68 |
+
for i in range(count):
|
| 69 |
+
pattern = random.choice(patterns)
|
| 70 |
+
first = random.choice(first_names)
|
| 71 |
+
last = random.choice(last_names)
|
| 72 |
+
number = random.randint(1, 999) if '{number}' in pattern else ''
|
| 73 |
+
|
| 74 |
+
email = pattern.format(
|
| 75 |
+
first=first,
|
| 76 |
+
last=last,
|
| 77 |
+
number=number,
|
| 78 |
+
first0=first[0]
|
| 79 |
+
)
|
| 80 |
+
email = re.sub(r'[^a-zA-Z0-9._-]', '', email) + '@' + domain
|
| 81 |
+
emails.append(email.lower())
|
| 82 |
+
|
| 83 |
+
return list(set(emails))[:count]
|
| 84 |
+
|
| 85 |
+
async def check_mx_record(self, domain: str) -> bool:
|
| 86 |
+
"""Проверка MX-записей домена"""
|
|
|
|
|
|
|
| 87 |
try:
|
| 88 |
+
answers = dns.resolver.resolve(domain, 'MX')
|
| 89 |
+
return len(answers) > 0
|
|
|
|
|
|
|
| 90 |
except:
|
| 91 |
+
return False
|
| 92 |
+
|
| 93 |
+
async def smtp_verify(self, email: str) -> Tuple[bool, int]:
|
| 94 |
+
"""Проверка email через SMTP"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
domain = email.split('@')[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
try:
|
| 99 |
+
# Получаем MX-записи
|
| 100 |
+
mx_records = []
|
| 101 |
+
try:
|
| 102 |
+
answers = dns.resolver.resolve(domain, 'MX')
|
| 103 |
+
mx_records = sorted([(r.preference, str(r.exchange)) for r in answers])
|
| 104 |
+
except:
|
| 105 |
+
return False, 0
|
| 106 |
+
|
| 107 |
+
if not mx_records:
|
| 108 |
+
return False, 0
|
| 109 |
+
|
| 110 |
+
# Пробуем подключиться к SMTP-серверу
|
| 111 |
+
for preference, mx in mx_records[:3]: # Проверяем первые 3 MX
|
| 112 |
+
try:
|
| 113 |
+
with smtplib.SMTP(timeout=10) as server:
|
| 114 |
+
server.set_debuglevel(0)
|
| 115 |
+
server.connect(mx)
|
| 116 |
+
server.helo()
|
| 117 |
+
server.mail('test@example.com')
|
| 118 |
+
code, message = server.rcpt(email)
|
| 119 |
+
response_time = int((time.time() - start_time) * 1000)
|
| 120 |
+
|
| 121 |
+
# Коды ответов, указывающие на существование email
|
| 122 |
+
if code in [250, 251]:
|
| 123 |
+
return True, response_time
|
| 124 |
+
else:
|
| 125 |
+
return False, response_time
|
| 126 |
+
|
| 127 |
+
except (smtplib.SMTPServerDisconnected, smtplib.SMTPConnectError,
|
| 128 |
+
socket.timeout, socket.gaierror):
|
| 129 |
+
continue
|
| 130 |
+
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"SMTP error for {email}: {e}")
|
| 133 |
|
| 134 |
+
return False, int((time.time() - start_time) * 1000)
|
| 135 |
+
|
| 136 |
+
def check_disposable(self, email: str) -> bool:
|
| 137 |
+
"""Проверка на временный email"""
|
| 138 |
+
domain = email.split('@')[1].lower()
|
| 139 |
+
return domain in self.disposable_domains
|
| 140 |
+
|
| 141 |
+
def calculate_risk_score(self, email: str, exists: bool, disposable: bool) -> int:
|
| 142 |
+
"""Расчет оценки риска"""
|
| 143 |
+
score = 0
|
| 144 |
|
| 145 |
+
if not exists:
|
| 146 |
+
score += 80
|
| 147 |
+
if disposable:
|
| 148 |
+
score += 90
|
| 149 |
+
|
| 150 |
+
# Проверка паттернов
|
| 151 |
+
local_part = email.split('@')[0]
|
| 152 |
+
if re.match(r'^[a-z]+\.[a-z]+$', local_part): # name.surname
|
| 153 |
+
score -= 20
|
| 154 |
+
elif re.match(r'^[a-z]+[0-9]+$', local_part): # name123
|
| 155 |
+
score += 10
|
| 156 |
+
elif len(local_part) < 5:
|
| 157 |
+
score += 30
|
| 158 |
+
|
| 159 |
+
return min(100, max(0, score))
|
| 160 |
+
|
| 161 |
+
async def validate_single_email(self, email: str) -> EmailResult:
|
| 162 |
+
"""Валидация одного email-адреса"""
|
| 163 |
+
try:
|
| 164 |
+
disposable = self.check_disposable(email)
|
| 165 |
+
mx_valid = await self.check_mx_record(email.split('@')[1])
|
| 166 |
+
smtp_valid, response_time = await self.smtp_verify(email)
|
| 167 |
+
|
| 168 |
+
exists = mx_valid and smtp_valid
|
| 169 |
+
risk_score = self.calculate_risk_score(email, exists, disposable)
|
| 170 |
+
|
| 171 |
+
return EmailResult(
|
| 172 |
+
email=email,
|
| 173 |
+
exists=exists,
|
| 174 |
+
smtp_valid=smtp_valid,
|
| 175 |
+
mx_valid=mx_valid,
|
| 176 |
+
disposable=disposable,
|
| 177 |
+
response_time=response_time,
|
| 178 |
+
risk_score=risk_score
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
print(f"Validation error for {email}: {e}")
|
| 183 |
+
return EmailResult(
|
| 184 |
+
email=email,
|
| 185 |
+
exists=False,
|
| 186 |
+
smtp_valid=False,
|
| 187 |
+
mx_valid=False,
|
| 188 |
+
disposable=False,
|
| 189 |
+
response_time=0,
|
| 190 |
+
risk_score=100
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
async def validate_emails_batch(self, emails: List[str]) -> List[EmailResult]:
|
| 194 |
+
"""Пакетная валидация email-адресов"""
|
| 195 |
+
tasks = [self.validate_single_email(email) for email in emails]
|
| 196 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 197 |
|
| 198 |
+
valid_results = []
|
| 199 |
+
for result in results:
|
| 200 |
+
if isinstance(result, EmailResult):
|
| 201 |
+
valid_results.append(result)
|
| 202 |
+
else:
|
| 203 |
+
print(f"Error in validation: {result}")
|
| 204 |
+
|
| 205 |
+
return valid_results
|
| 206 |
|
| 207 |
+
# Инициализация валидатора
|
| 208 |
+
validator = RealEmailValidator()
|
| 209 |
|
| 210 |
+
async def validate_emails_interface(domain, count, use_custom_emails, custom_emails_text):
|
| 211 |
+
"""Интерфейс для Gradio"""
|
| 212 |
+
# Генерация или использование кастомных email-адресов
|
| 213 |
+
if use_custom_emails and custom_emails_text:
|
| 214 |
+
emails = [email.strip() for email in custom_emails_text.split('\n') if email.strip()]
|
| 215 |
+
else:
|
| 216 |
+
emails = validator.generate_german_emails(domain, count)
|
| 217 |
+
|
| 218 |
+
# Валидация
|
| 219 |
+
results = await validator.validate_emails_batch(emails)
|
| 220 |
|
| 221 |
+
# Подготовка данных для таблицы
|
| 222 |
+
df_data = []
|
| 223 |
+
stats = {
|
| 224 |
+
'total': len(results),
|
| 225 |
+
'valid': 0,
|
| 226 |
+
'invalid': 0,
|
| 227 |
+
'disposable': 0,
|
| 228 |
+
'high_risk': 0
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
for result in results:
|
| 232 |
+
status = "✅ Valid" if result.exists else "❌ Invalid"
|
| 233 |
+
mx_status = "✅" if result.mx_valid else "❌"
|
| 234 |
+
smtp_status = "✅" if result.smtp_valid else "❌"
|
| 235 |
+
disposable_status = "⚠️ Yes" if result.disposable else "✅ No"
|
| 236 |
+
|
| 237 |
+
risk_color = "🔴" if result.risk_score > 70 else "🟡" if result.risk_score > 30 else "🟢"
|
| 238 |
+
|
| 239 |
+
df_data.append({
|
| 240 |
+
"Email": result.email,
|
| 241 |
+
"Status": status,
|
| 242 |
+
"MX Record": mx_status,
|
| 243 |
+
"SMTP Check": smtp_status,
|
| 244 |
+
"Disposable": disposable_status,
|
| 245 |
+
"Response Time": f"{result.response_time}ms",
|
| 246 |
+
"Risk Score": f"{risk_color} {result.risk_score}%"
|
| 247 |
+
})
|
| 248 |
+
|
| 249 |
+
# Статистика
|
| 250 |
+
if result.exists:
|
| 251 |
+
stats['valid'] += 1
|
| 252 |
+
else:
|
| 253 |
+
stats['invalid'] += 1
|
| 254 |
+
|
| 255 |
+
if result.disposable:
|
| 256 |
+
stats['disposable'] += 1
|
| 257 |
+
|
| 258 |
+
if result.risk_score > 70:
|
| 259 |
+
stats['high_risk'] += 1
|
| 260 |
|
| 261 |
+
# Форматирование статистики
|
| 262 |
stats_text = f"""
|
| 263 |
+
## 📊 Validation Statistics
|
| 264 |
|
| 265 |
+
| Metric | Value | Percentage |
|
| 266 |
+
|--------|-------|------------|
|
| 267 |
+
| Total Emails | {stats['total']} | 100% |
|
| 268 |
+
| Valid Emails | {stats['valid']} | {stats['valid']/stats['total']*100:.1f}% |
|
| 269 |
+
| Invalid Emails | {stats['invalid']} | {stats['invalid']/stats['total']*100:.1f}% |
|
| 270 |
+
| Disposable Emails | {stats['disposable']} | {stats['disposable']/stats['total']*100:.1f}% |
|
| 271 |
+
| High Risk Emails | {stats['high_risk']} | {stats['high_risk']/stats['total']*100:.1f}% |
|
| 272 |
"""
|
| 273 |
|
| 274 |
+
df = pd.DataFrame(df_data)
|
| 275 |
return df, stats_text
|
| 276 |
|
| 277 |
+
def validate_emails_sync(domain, count, use_custom_emails, custom_emails_text):
|
| 278 |
+
"""Синхронная обертка для асинхронной функции"""
|
| 279 |
+
return asyncio.run(validate_emails_interface(domain, count, use_custom_emails, custom_emails_text))
|
| 280 |
+
|
| 281 |
+
# Создание интерфейса Gradio
|
| 282 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Real Email Validator") as demo:
|
| 283 |
gr.Markdown("""
|
| 284 |
+
# 🔍 Real Email Address Validator
|
| 285 |
+
*Professional tool for real email validation and verification*
|
| 286 |
""")
|
| 287 |
|
| 288 |
with gr.Row():
|
| 289 |
with gr.Column(scale=1):
|
| 290 |
+
with gr.Group():
|
| 291 |
+
gr.Markdown("### 🎯 Generation Settings")
|
| 292 |
+
domain = gr.Dropdown(
|
| 293 |
+
choices=["gmail.com", "yahoo.com", "hotmail.com", "outlook.com",
|
| 294 |
+
"web.de", "gmx.de", "freenet.de", "t-online.de"],
|
| 295 |
+
value="gmail.com",
|
| 296 |
+
label="Target Domain"
|
| 297 |
+
)
|
| 298 |
+
count = gr.Slider(
|
| 299 |
+
minimum=1,
|
| 300 |
+
maximum=50,
|
| 301 |
+
value=10,
|
| 302 |
+
step=1,
|
| 303 |
+
label="Number of Emails to Generate"
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
with gr.Group():
|
| 307 |
+
gr.Markdown("### 📧 Custom Emails")
|
| 308 |
+
use_custom_emails = gr.Checkbox(
|
| 309 |
+
label="Use custom email list",
|
| 310 |
+
value=False
|
| 311 |
+
)
|
| 312 |
+
custom_emails_text = gr.Textbox(
|
| 313 |
+
label="Enter emails (one per line)",
|
| 314 |
+
lines=5,
|
| 315 |
+
placeholder="user1@domain.com\nuser2@domain.com\n...",
|
| 316 |
+
visible=False
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
validate_btn = gr.Button("🔍 Validate Emails", variant="primary", size="lg")
|
| 320 |
|
| 321 |
with gr.Column(scale=2):
|
| 322 |
stats_output = gr.Markdown(label="Statistics")
|
| 323 |
|
| 324 |
with gr.Row():
|
| 325 |
results_table = gr.Dataframe(
|
| 326 |
+
headers=["Email", "Status", "MX Record", "SMTP Check", "Disposable", "Response Time", "Risk Score"],
|
| 327 |
+
datatype=["str", "str", "str", "str", "str", "str", "str"],
|
| 328 |
+
label="Email Validation Results",
|
| 329 |
+
interactive=False,
|
| 330 |
+
wrap=True
|
| 331 |
)
|
| 332 |
|
| 333 |
gr.Markdown("""
|
| 334 |
+
### 🔧 Validation Methods:
|
| 335 |
+
- **MX Record Check**: Verifies domain mail server configuration
|
| 336 |
+
- **SMTP Verification**: Real SMTP handshake to check email existence
|
| 337 |
+
- **Disposable Detection**: Identifies temporary email services
|
| 338 |
+
- **Risk Assessment**: Comprehensive risk scoring based on multiple factors
|
| 339 |
|
| 340 |
### ⚠️ Important Notes:
|
| 341 |
+
- This tool performs **real SMTP verification** - use responsibly
|
| 342 |
+
- Some mail servers may block verification attempts
|
| 343 |
+
- Results may vary based on server configurations
|
| 344 |
+
- Respect rate limits and terms of service
|
| 345 |
+
- For educational and authorized testing only
|
| 346 |
""")
|
| 347 |
|
| 348 |
+
# Обработчики видимости
|
| 349 |
+
def toggle_custom_emails(use_custom):
|
| 350 |
+
return gr.Textbox(visible=use_custom)
|
| 351 |
+
|
| 352 |
+
use_custom_emails.change(
|
| 353 |
+
fn=toggle_custom_emails,
|
| 354 |
+
inputs=[use_custom_emails],
|
| 355 |
+
outputs=[custom_emails_text]
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
+
# Основной обработчик
|
| 359 |
+
validate_btn.click(
|
| 360 |
+
fn=validate_emails_sync,
|
| 361 |
+
inputs=[domain, count, use_custom_emails, custom_emails_text],
|
| 362 |
outputs=[results_table, stats_output]
|
| 363 |
)
|
| 364 |
|
|
|
|
| 365 |
if __name__ == "__main__":
|
| 366 |
demo.launch(share=True)
|