from flask import Flask, render_template, request, jsonify import numpy as np import pandas as pd import joblib import os from sklearn.svm import SVR from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn import svm from sklearn.naive_bayes import GaussianNB # <--- Add this import from sklearn.feature_extraction.text import CountVectorizer from textblob import TextBlob import traceback from flask_cors import CORS from werkzeug.utils import secure_filename # For secure file names import io # To read CSV from memory import re from sklearn.cluster import KMeans, DBSCAN from PIL import Image import matplotlib.pyplot as plt from joblib import load # ✅ This is the missing line import traceback import pickle from sklearn.svm import SVC from sklearn.datasets import make_classification import plotly.graph_objs as go import json import requests from PIL import Image # from transformers import pipeline from dotenv import load_dotenv import os from urllib.parse import urlparse import tldextract import string #chatbotcode import zipfile import gdown import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # #login # from flask import Flask # from flask_jwt_extended import JWTManager # from flask_login import LoginManager # from flask_mail import Mail # from flask_login import LoginManager # from flask_sqlalchemy import SQLAlchemy # from flask_mail import Mail # from auth.models import db, User # from auth.routes import auth # from flask_login import login_required #chatbotcode # from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline # model_name = "microsoft/deberta-v3-small" # tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) # model = AutoModelForSequenceClassification.from_pretrained(model_name) # bert_checker = pipeline("text-classification", model=model, tokenizer=tokenizer) # Load environment variables from .env load_dotenv() #spam url import relateted import nltk, os # Tell NLTK to also check the local nltk_data folder nltk.data.path.append(os.path.join(os.path.dirname(__file__), "nltk_data")) from nltk.corpus import words # Load the words corpus valid_words = set(words.words()) print("engineering" in valid_words) # ✅ Should be True print("engineerigfnnxng" in valid_words) # ❌ Should be False import wordninja # Function to split words into valid parts import re from urllib.parse import urlparse from spellchecker import SpellChecker import wordninja # end urlspam import google.generativeai as genai # app.py # import streamlit as st # from load_file import load_file # st.title("Download HuggingFace Repo Files in Streamlit") # filename = st.text_input("Enter filename from repo:", "model.safetensors") # if st.button("Download"): # try: # local_path = load_file(filename) # st.success(f"✅ File downloaded to: {local_path}") # st.write("You can now use this file in your app.") # except Exception as e: # st.error(f"❌ Error: {str(e)}") # Set API key (no need to assign OpenAI() to client like that) # openai.api_key = os.getenv("OPENAI_API_KEY") # def ask_openai_scientific_validation(statement): # prompt = f"""Assess the scientific accuracy of: "{statement}"\nRespond with ✅ (possible) or ❌ (impossible), and explain simply.""" # try: # client = OpenAI() # This is correct placement # response = client.chat.completions.create( # model="gpt-3.5-turbo", # messages=[ # {"role": "system", "content": "You are a scientific fact-checker."}, # {"role": "user", "content": prompt} # ], # temperature=0.7, # max_tokens=150 # ) # return response.choices[0].message.content.strip() # except Exception as e: # return f"⚠️ Could not verify:\n\n{str(e)}" #huggung face code start REPO_ID = "deedrop1140/nero-ml" MODEL_DIR = "Models" def load_file(filename): """Try to load model from local folder; if missing, download from Hugging Face Hub.""" local_path = os.path.join(MODEL_DIR, filename) # 1️⃣ Check if file exists locally if os.path.exists(local_path): file_path = local_path else: # 2️⃣ Download from Hugging Face (Render case) file_path = hf_hub_download(repo_id=REPO_ID, filename=filename) # 3️⃣ Load based on file extension if filename.endswith((".pkl", ".joblib")): return joblib.load(file_path) elif filename.endswith(".npy"): return np.load(file_path, allow_pickle=True) elif filename.endswith((".pt", ".pth")): return torch.load(file_path, map_location="cpu") else: return file_path # # ===================== # # Replace your old model loads with this: # # ===================== # # Models # knn_model = load_file("Models/knn_model.pkl") # lasso_model = load_file("Models/lasso_model.pkl") # liar_model = load_file("Models/liar_model.joblib") # linear_model = load_file("Models/linear_model.pkl") # logistic_model = load_file("Models/logistic_model.pkl") # nb_url_model = load_file("Models/nb_url_model.pkl") # poly_model = load_file("Models/poly_model.pkl") # rf_model = load_file("Models/rf_model.pkl") # ridge_model = load_file("Models/ridge_model.pkl") # supervised_model = load_file("Models/supervised_model.pkl") # svr_model = load_file("Models/svr_model.pkl") # voting_url_model = load_file("Models/voting_url_model.pkl") # # Vectorizers / Encoders / Scalers # label_classes = load_file("Models/label_classes.npy") # label_encoder = load_file("Models/label_encoder.pkl") # lasso_scaler = load_file("Models/lasso_scaler.pkl") # liar_vectorizer = load_file("Models/liar_vectorizer.joblib") # nb_url_vectorizer = load_file("Models/nb_url_vectorizer.pkl") # poly_transform = load_file("Models/poly_transform.pkl") # ridge_scaler = load_file("Models/ridge_scaler.pkl") # svr_scaler_X = load_file("Models/svr_scaler_X.pkl") # svr_scaler_y = load_file("Models/svr_scaler_y.pkl") # tfidf_vectorizer = load_file("Models/tfidf_vectorizer.pkl") # url_vectorizer = load_file("Models/url_vectorizer.pkl") # vectorizer_joblib = load_file("Models/vectorizer.joblib") # vectorizer_pkl = load_file("Models/vectorizer.pkl") # # huggung face code end MODEL_DIR = "Models" DATA_DIR = "housedata" # Assuming your house data is here UPLOAD_FOLDER = 'static/uploads' # NEW: Folder for temporary user uploads app = Flask(__name__) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER CORS(app) #flasklogin # app.config["JWT_SECRET_KEY"] = "jwt-secret-key" # jwt = JWTManager(app) #authstart # app.config["SECRET_KEY"] = "super-secret" # app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///users.db" # Mail # app.config["MAIL_SERVER"] = "smtp.gmail.com" # app.config["MAIL_PORT"] = 587 # app.config["MAIL_USE_TLS"] = True # app.config["MAIL_USERNAME"] = "your_email@gmail.com" # app.config["MAIL_PASSWORD"] = "app_password" # mail = Mail(app) # login_manager = LoginManager(app) # login_manager.login_view = "auth.login" # db.init_app(app) # app.register_blueprint(auth) # jwt = JWTManager(app) # mail = Mail(app) # @login_manager.user_loader # def load_user(user_id): # return User.query.get(int(user_id)) # with app.app_context(): # db.create_all() #authend #chatbotcode # deedrop1140/qwen-ml-tutor-assets from transformers import ( AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList ) from peft import PeftModel from huggingface_hub import hf_hub_download import zipfile from transformers import TextIteratorStreamer import threading from flask import Response # ====================== # CONFIG # ====================== BASE_MODEL = "Qwen/Qwen2.5-1.5B" DATASET_REPO = "deedrop1140/qwen-ml-tutor-assets" ZIP_NAME = "qwen-ml-tutor-best-20251213T015537Z-1-001.zip" MODEL_DIR = "qwen-ml-tutor-best" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # ====================== # FLASK APP # ====================== app = Flask(__name__) # ====================== # DOWNLOAD MODEL ASSETS # ====================== if not os.path.exists(MODEL_DIR): print("⬇️ Downloading LoRA adapter...") zip_path = hf_hub_download( repo_id=DATASET_REPO, filename=ZIP_NAME, repo_type="dataset" ) print("📦 Extracting adapter...") with zipfile.ZipFile(zip_path, "r") as z: z.extractall(".") print("✅ Adapter ready") # ====================== # TOKENIZER (BASE MODEL) # ====================== # ====================== # LOAD TOKENIZER (FROM LORA MODEL) # ====================== tokenizer = AutoTokenizer.from_pretrained( MODEL_DIR, trust_remote_code=True ) if tokenizer.pad_token_id is None: tokenizer.pad_token = tokenizer.eos_token # ====================== # LOAD BASE MODEL # ====================== base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True ) # 🔥 THIS LINE IS THE FIX (DO NOT SKIP) base_model.resize_token_embeddings(len(tokenizer)) # MOVE MODEL TO DEVICE device = "cuda" if torch.cuda.is_available() else "cpu" base_model = base_model.to(device) # ====================== # LOAD LORA ADAPTER # ====================== llm_model = PeftModel.from_pretrained( base_model, MODEL_DIR, is_trainable=False ) llm_model.eval() print("✅ Model loaded successfully") # ====================== # STOPPING CRITERIA # ====================== class StopOnStrings(StoppingCriteria): def __init__(self, tokenizer, stop_strings): self.tokenizer = tokenizer self.stop_ids = [ tokenizer.encode(s, add_special_tokens=False) for s in stop_strings ] def __call__(self, input_ids, scores, **kwargs): for stop in self.stop_ids: if len(input_ids[0]) >= len(stop): if input_ids[0][-len(stop):].tolist() == stop: return True return False stop_criteria = StoppingCriteriaList([ StopOnStrings( tokenizer, stop_strings=["User:", "Instruction:", "Question:"] ) ]) # ============================= # ROUTES # ============================= @app.route("/chatbot") def chatbot(): return render_template("chatbot.html", active_page="chatbot") @app.route("/chat", methods=["POST"]) def chat(): data = request.json user_msg = data.get("message", "").strip() if not user_msg: return jsonify({"reply": "Please ask a machine learning question."}) prompt = f"""Instruction: Answer the following question clearly. Do NOT ask follow-up questions. Do NOT continue the conversation. Question: {user_msg} Answer:""" inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) streamer = TextIteratorStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True ) generation_kwargs = dict( **inputs, max_new_tokens=200, temperature=0.3, top_p=0.9, do_sample=True, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, stopping_criteria=stop_criteria, streamer=streamer ) # Run generation in background thread thread = threading.Thread( target=llm_model.generate, kwargs=generation_kwargs ) thread.start() def event_stream(): for token in streamer: yield f"data: {token}\n\n" yield "data: [DONE]\n\n" return Response( event_stream(), mimetype="text/event-stream" ) #chatbotcode genai.configure(api_key=os.getenv("GEMINI_API_KEY")) def ask_gemini(statement): model = genai.GenerativeModel("gemini-2.0-flash-001") response = model.generate_content(f"Verify this statement for truth: {statement}") return response.text #rfc # model = load("Models/liar_model.joblib") # vectorizer = load("Models/liar_vectorizer.joblib") # Load BERT fact-checker pipeline (local model) # bert_checker = pipeline("text-classification", model="microsoft/deberta-v3-small") #endrfc #svm # ==== SVM Setup ==== X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, n_clusters_per_class=1, n_classes=2, random_state=42) scaler = StandardScaler() X = scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train SVM svm_model = SVC(kernel="linear") svm_model.fit(X_train, y_train) #endsvm #deision tree GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" #end deision tree # Ensure directories exist os.makedirs(MODEL_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(UPLOAD_FOLDER, exist_ok=True) # NEW: Create upload folder def clean_text(text): if pd.isnull(text): return "" text = text.lower() text = re.sub(r"http\S+|www\S+|https\S+", '', text) text = text.translate(str.maketrans('', '', string.punctuation)) text = re.sub(r'\d+', '', text) text = re.sub(r'\s+', ' ', text).strip() return text # --- Helper functions for data generation (conceptual for demo) --- def generate_linear_data(n_samples=100, noise=0.5): X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) y = 2 * X.squeeze() + 5 + noise * np.random.randn(n_samples) return X, y def generate_non_linear_data(n_samples=100, noise=0.5): X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) y = np.sin(X.squeeze()) * 10 + noise * np.random.randn(n_samples) return X, y def generate_noisy_data(n_samples=100, noise_factor=3.0): X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) y = 2 * X.squeeze() + 5 + noise_factor * np.random.randn(n_samples) # Increased noise return X, y # Function to generate house price data (using your existing data structure for consistency) def get_house_data(): try: df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv')) # Using a subset of features for simplicity in demo features = ['GrLivArea', 'OverallQual', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] # Check if all required columns exist if not all(col in df.columns for col in features + ['SalePrice']): print("Warning: Missing one or more required columns in train.csv for house data.") return None, None X = df[features] y = df['SalePrice'] return X, y except FileNotFoundError: print(f"Error: train.csv not found in {DATA_DIR}. Please ensure your data is there.") return None, None except Exception as e: print(f"Error loading house data: {e}") return None, None # Dictionary to hold all loaded models loaded_models = {} # Load logistic model and vectorizer for SMS # vectorizer = joblib.load("Models/logvectorizer.pkl") # model = joblib.load("Models/logistic_model.pkl") # vectorizer = load_file("Models/logvectorizer.pkl") # model = load_file("Models/logistic_model.pkl") # # Load models once NB+DT+SVM is trained # try: # model = load_file("Models/logistic_model.pkl") # # vectorizer = joblib.load("Models/logvectorizer.pkl") # # model = joblib.load("Models/logistic_model.pkl") # vectorizer = load_file("Models/vectorizer.pkl") # print("✅ Model and vectorizer loaded into memory successfully!") # except Exception as e: # vectorizer = None # model = None # print(f"❌ Error: Could not load model or vectorizer. Please check your file paths. Error: {e}") # #END NB+DT+SVM # === Naive Bayes URL Spam Classifier (NB_spam.html) === # === Load Model & Vectorizer === # VT_API_KEY = os.getenv("VT_API_KEY") # nb_model = load_file("Models/nb_url_model.pkl") # vectorizer = load_file("Models/nb_url_vectorizer.pkl") # if nb_model is not None and vectorizer is not None: # print("✅ Loaded model and vectorizer.") # else: # print("❌ Model or vectorizer not found.") def load_all_models(): """ Loads all necessary models into the loaded_models dictionary when the app starts. """ global loaded_models # Load Supervised Model # Load Supervised Model try: supervised_model_path = load_file("linear_model.pkl") # Debug: check what load_file actually returned print("DEBUG -> supervised_model_path type:", type(supervised_model_path)) # If load_file returned a path (string), load with joblib if isinstance(supervised_model_path, str): loaded_models['supervised'] = joblib.load(supervised_model_path) else: # If load_file already returned the model object loaded_models['supervised'] = supervised_model_path print("Supervised model loaded successfully") except FileNotFoundError: print(f"Error: Supervised model file not found at {supervised_model_path}. " "Please run train_model.py first.") loaded_models['supervised'] = None # Mark as not loaded except Exception as e: print(f"Error loading supervised model: {e}") loaded_models['supervised'] = None # Load models when Flask app context is ready with app.app_context(): load_all_models() @app.route('/') def frontpage(): return render_template('frontpage.html') @app.route('/home') def home(): return render_template('home.html') @app.route("/about") def about(): return render_template("about.html", active_page="about") @app.route("/privacy") def privacy(): return render_template("privacy.html", active_page="privacy") @app.route("/contact") def contact(): return render_template("contact.html", active_page="contact") @app.route('/Optimization') def Optimization(): return render_template('Optimization.html', active_page='Optimization') @app.route('/supervise') def supervise(): return render_template('supervise.html', active_page='supervise') @app.route('/unsupervised') def unsupervised(): return render_template('unsupervised.html', active_page='unsupervised') # Semi-Supervised Learning page @app.route('/semi-supervised') def semi_supervised(): return render_template('semi_supervised.html', active_page='semi_supervised') # Reinforcement Learning page @app.route('/reinforcement') def reinforcement(): return render_template('reinforcement.html', active_page='reinforcement') # Ensemble Learning page @app.route('/ensemble') def ensemble(): return render_template('ensemble.html', active_page='ensemble') @app.route('/supervised', methods=['GET', 'POST']) def supervised(): prediction = None hours_studied_input = None if loaded_models['supervised'] is None: return "Error: Supervised model could not be loaded. Please check server logs.", 500 if request.method == 'POST': try: hours_studied_input = float(request.form['hours']) input_data = np.array([[hours_studied_input]]) predicted_score = loaded_models['supervised'].predict(input_data)[0] prediction = round(predicted_score, 2) except ValueError: print("Invalid input for hours studied.") prediction = "Error: Please enter a valid number." except Exception as e: print(f"An error occurred during prediction: {e}") prediction = "Error during prediction." return render_template('supervised.html', prediction=prediction, hours_studied_input=hours_studied_input) @app.route('/polynomial', methods=['GET', 'POST']) def polynomial(): if request.method == 'POST': try: hours = float(request.form['hours']) # model = joblib.load('Models/poly_model.pkl') # poly = joblib.load('Models/poly_transform.pkl') # model = load_file("Models/poly_model.pkl") # poly= load_file("Models/poly_transform.pkl") model = load_file("poly_model.pkl") poly= load_file("poly_transform.pkl") transformed_input = poly.transform([[hours]]) prediction = model.predict(transformed_input)[0] return render_template("poly.html", prediction=round(prediction, 2), hours=hours) except Exception as e: print(f"Error: {e}") return render_template("poly.html", error="Something went wrong.") return render_template("poly.html") @app.route('/random_forest', methods=['GET', 'POST']) def random_forest(): if request.method == 'POST': try: hours = float(request.form['hours']) model = load_file("rf_model.pkl") # model = joblib.load('Models/rf_model.pkl') prediction = model.predict([[hours]])[0] return render_template("rf.html", prediction=round(prediction, 2), hours=hours) except Exception as e: print(f"[ERROR] {e}") return render_template("rf.html", error="Prediction failed. Check your input.") return render_template("rf.html") @app.route('/prediction_flow') def prediction_flow(): return render_template('prediction_flow.html') @app.route("/lasso", methods=["GET", "POST"]) def lasso(): if request.method == "POST": try: inputs = [float(request.form.get(f)) for f in ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']] # model = load_file("Models/lasso_model.pkl") # scaler = load_file("Models/lasso_scaler.pkl") # model = joblib.load("Models/lasso_model.pkl") # scaler = joblib.load("Models/lasso_scaler.pkl") model = load_file("lasso_model.pkl") scaler = load_file("lasso_scaler.pkl") scaled_input = scaler.transform([inputs]) prediction = model.predict(scaled_input)[0] return render_template("lasso.html", prediction=round(prediction, 2)) except Exception as e: return render_template("lasso.html", error=str(e)) return render_template("lasso.html") @app.route('/ridge', methods=['GET', 'POST']) def ridge(): prediction = None error = None try: # model = load_file("Models/ridge_model.pkl") # scaler = load_file("Models/ridge_scaler.pkl") # model = joblib.load(os.path.join(MODEL_DIR, 'ridge_model.pkl')) # scaler = joblib.load(os.path.join(MODEL_DIR, 'ridge_scaler.pkl')) model = load_file("ridge_model.pkl") scaler = load_file("ridge_scaler.pkl") except Exception as e: return f"❌ Error loading Ridge model: {e}", 500 if request.method == 'POST': try: features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] input_data = [float(request.form[feature]) for feature in features] input_scaled = scaler.transform([input_data]) prediction = model.predict(input_scaled)[0] except Exception as e: error = str(e) return render_template('ridge.html', prediction=prediction, error=error) @app.route('/dtr', methods=['GET', 'POST']) def dtr(): if request.method == 'GET': return render_template('dtr.html') if request.method == 'POST': data = request.get_json() data_points = data.get('dataPoints') if data else None print("Received data:", data_points) return jsonify({'message': 'Data received successfully!', 'receivedData': data_points}) @app.route('/dtrg') def drg(): return render_template('desiciongame.html') # --- SVR Routes --- @app.route('/svr') # This route is for the initial GET request to load the page def svr_page(): return render_template('svr.html') # @app.route('/decision-tree') # def decision_tree(): # return render_template('decision-Tree.html') # @app.route('/decision-tree-game') # def decision_tree_game(): # return render_template('Decision-Tree-Game.html') @app.route('/run_svr_demo', methods=['POST']) def run_svr_demo(): try: # Check if the request contains JSON (for predefined datasets) or FormData (for file uploads) if request.is_json: data = request.json else: # For FormData, data is accessed via request.form for fields, request.files for files data = request.form dataset_type = data.get('dataset_type', 'linear') kernel_type = data.get('kernel', 'rbf') C_param = float(data.get('C', 1.0)) gamma_param = float(data.get('gamma', 0.1)) epsilon_param = float(data.get('epsilon', 0.1)) X, y = None, None if dataset_type == 'linear': X, y = generate_linear_data() elif dataset_type == 'non_linear': X, y = generate_non_linear_data() elif dataset_type == 'noisy': X, y = generate_noisy_data() elif dataset_type == 'house_data': X_house, y_house = get_house_data() if X_house is not None and not X_house.empty: X = X_house[['GrLivArea']].values # Only GrLivArea for simple 1D plotting y = y_house.values else: X, y = generate_linear_data() # Fallback if house data is missing/invalid elif dataset_type == 'custom_csv': # NEW: Handle custom CSV upload uploaded_file = request.files.get('file') x_column_name = data.get('x_column_name') y_column_name = data.get('y_column_name') if not uploaded_file or uploaded_file.filename == '': return jsonify({'error': 'No file uploaded for custom CSV.'}), 400 if not x_column_name or not y_column_name: return jsonify({'error': 'X and Y column names are required for custom CSV.'}), 400 try: # Read CSV into a pandas DataFrame from in-memory BytesIO object df = pd.read_csv(io.BytesIO(uploaded_file.read())) if x_column_name not in df.columns or y_column_name not in df.columns: missing_cols = [] if x_column_name not in df.columns: missing_cols.append(x_column_name) if y_column_name not in df.columns: missing_cols.append(y_column_name) return jsonify({'error': f"Missing columns in uploaded CSV: {', '.join(missing_cols)}"}), 400 X = df[[x_column_name]].values # Ensure X is 2D for scikit-learn y = df[y_column_name].values except Exception as e: return jsonify({'error': f"Error reading or processing custom CSV: {str(e)}"}), 400 else: # Fallback for unknown dataset types X, y = generate_linear_data() if X is None or y is None or len(X) == 0: return jsonify({'error': 'Failed to generate or load dataset.'}), 500 # Scale data scaler_X = StandardScaler() scaler_y = StandardScaler() X_scaled = scaler_X.fit_transform(X) y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten() X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42) # Train SVR model svr_model = SVR(kernel=kernel_type, C=C_param, gamma=gamma_param, epsilon=epsilon_param) svr_model.fit(X_train, y_train) # Make predictions y_pred_scaled = svr_model.predict(X_test) # Inverse transform predictions to original scale for metrics y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten() # Calculate metrics mse = mean_squared_error(y_test_original, y_pred) r2 = r2_score(y_test_original, y_pred) support_vectors_count = len(svr_model.support_vectors_) # Prepare data for plotting plot_X_original = scaler_X.inverse_transform(X_scaled) plot_y_original = scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten() x_plot = np.linspace(plot_X_original.min(), plot_X_original.max(), 500).reshape(-1, 1) x_plot_scaled = scaler_X.transform(x_plot) y_plot_scaled = svr_model.predict(x_plot_scaled) y_plot_original = scaler_y.inverse_transform(y_plot_scaled.reshape(-1, 1)).flatten() y_upper_scaled = y_plot_scaled + epsilon_param y_lower_scaled = y_plot_scaled - epsilon_param y_upper_original = scaler_y.inverse_transform(y_upper_scaled.reshape(-1, 1)).flatten() y_lower_original = scaler_y.inverse_transform(y_lower_scaled.reshape(-1, 1)).flatten() plot_data = { 'data': [ { 'x': plot_X_original.flatten().tolist(), 'y': plot_y_original.tolist(), 'mode': 'markers', 'type': 'scatter', 'name': 'Original Data' }, { 'x': x_plot.flatten().tolist(), 'y': y_plot_original.tolist(), 'mode': 'lines', 'type': 'scatter', 'name': 'SVR Prediction', 'line': {'color': 'red'} }, { 'x': x_plot.flatten().tolist(), 'y': y_upper_original.tolist(), 'mode': 'lines', 'type': 'scatter', 'name': 'Epsilon Tube (Upper)', 'line': {'dash': 'dash', 'color': 'green'}, 'fill': 'tonexty', 'fillcolor': 'rgba(0,128,0,0.1)' }, { 'x': x_plot.flatten().tolist(), 'y': y_lower_original.tolist(), 'mode': 'lines', 'type': 'scatter', 'name': 'Epsilon Tube (Lower)', 'line': {'dash': 'dash', 'color': 'green'} } ], 'layout': { 'title': f'SVR Regression (Kernel: {kernel_type.upper()})', 'xaxis': {'title': 'Feature Value'}, 'yaxis': {'title': 'Target Value'}, 'hovermode': 'closest' } } return jsonify({ 'mse': mse, 'r2_score': r2, 'support_vectors_count': support_vectors_count, 'plot_data': plot_data }) except Exception as e: print(f"Error in SVR demo: {e}") return jsonify({'error': str(e)}), 500 def clean_text(text): return text.lower().strip() # Gradient-desent route @app.route('/gradient-descent') def gradient_descent(): return render_template('Gradient-Descen.html') #new @app.route('/gradient-descent-three') def gradient_descent_three(): return render_template('gradient-descent-three.html') # Gradient-boosting route @app.route('/gradient-boosting') def gradient_boosting(): return render_template('Gradient-Boosting.html') #new @app.route('/gradient-boosting-three') def gradient_boosting_three(): return render_template('gradient-boosting-three.html') # Gradient-xgboost route @app.route('/xgboost-regression') def xgboost_regression(): return render_template('XGBoost-Regression.html') @app.route('/xgboost-tree-three') def xgboost_regression_three(): return render_template('xboost-tree-three.html') @app.route('/xgboost-graph-three2') def xgboost_regression_three2(): return render_template('xbost-graph-three.html') #Gradient-lightgbm route @app.route('/lightgbm') def lightgbm(): return render_template('LightGBM-Regression.html') @app.route('/Naive-Bayes-Simulator') def Naive_Bayes_Simulator(): return render_template('Naive-Bayes-Simulator.html') @app.route('/svm-model-three') def svm_model_three(): return render_template('SVM_Simulator_3D.html') #nerual network route for calssifcation @app.route('/neural-network-classification') def neural_network_classification(): return render_template('Neural-Networks-for-Classification.html') @app.route('/Neural-Networks-for-Classification-three') def Neural_Networks_for_Classification_three(): return render_template('Neural-Networks-for-Classification-three.html') #hierarchical clustering route @app.route('/hierarchical-clustering') def hierarchical_clustering(): return render_template('Hierarchical-Clustering.html') @app.route('/hierarchical-three') def hierarchical_three(): return render_template('Hierarchical-three.html') #Gaussian-mixture-models route @app.route('/gaussian-mixture-models') def gaussian_mixture_models(): return render_template('Gaussian-Mixture-Models.html') @app.route('/gaussian-mixture-three') def gaussian_mixture_three(): return render_template('gmm-threejs.html') #Principal-Component-Analysis @app.route('/pca') def pca(): return render_template('Principal-Component-Analysis.html') @app.route('/pca-three') def pca_three(): return render_template('pca-threejs.html') #t-sne @app.route('/t-sne') def tsne(): return render_template('t-SNE.html') @app.route('/t-sne-three') def tsne_three(): return render_template('t-sne-three.html') # liner-discriminant-analysis @app.route('/lda') def lda(): return render_template('Linear-Discriminant-Analysis.html') @app.route('/lda-three') def lda_three(): return render_template('lda-three.html') # Independent-Component-Analysis @app.route('/ica') def ica(): return render_template('Independent-Component-Analysis.html') @app.route('/ica-three') def ica_three(): return render_template('ica-threejs.html') #Apriori @app.route('/apriori') def apriori(): return render_template('Apriori-Algorithm.html') @app.route('/apriori-three') def apriori_three(): return render_template('Apriori-Simulator-three.html') # Eclat Algorithm @app.route('/eclat') def eclat(): return render_template('Eclat-Algorithm.html') @app.route('/eclat-three') def eclat_three(): return render_template('Eclat-Algorithm-three.html') #genrative models @app.route('/generative-models') def generative_models(): return render_template('Generative-Models.html') #self training @app.route('/self-training') def self_training(): return render_template('Self-Training.html') # TRANSDUCTIVE SVM @app.route('/transductive-svm') def transductive_svm(): return render_template('Transductive-SVM.html') #Graph-Based Methods @app.route('/graph-based-methods') def graph_based_methods(): return render_template('Graph-Based-Method.html') #Agent-Environment-State @app.route('/agent-environment-state') def agent_environment_state(): return render_template('Agent-Environment-State.html') #Action and Policy @app.route('/action-and-policy') def action_and_policy(): return render_template('Action-and-Policy.html') #Reward-ValueFunction @app.route('/reward-valuefunction') def reward_valuefunction(): return render_template('Reward-ValueFunction.html') #Q-Learning @app.route('/q-learning') def q_learning(): return render_template('Q-Learning.html') #Deep Reinforcement Learning @app.route('/deep-reinforcement-learning') def deep_reinforcement_learning(): return render_template('Deep-Reinforcement-Learning.html') #Bagging @app.route('/bagging') def bagging(): return render_template('Bagging.html') #Boosting @app.route('/boosting') def boosting(): return render_template('Boosting.html') # stacking @app.route('/stacking') def stacking(): return render_template('Stacking.html') # voting @app.route('/voting') def voting(): return render_template('Voting.html') import re # Load saved model and vectorizer # model = joblib.load("Models/logistic_model.pkl") # vectorizer = joblib.load("Models/logvectorizer.pkl") # Text cleaning def clean_text(text): text = text.lower() text = re.sub(r'\W', ' ', text) text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text) text = re.sub(r'\s+', ' ', text) return text.strip() @app.route('/logistic', methods=['GET', 'POST']) def logistic(): prediction, confidence_percentage, cleaned, tokens, probability = None, None, None, None, None # model = load_file("Models/logistic_model.pkl") # vectorizer = load_file("Models/logvectorizer.pkl") model = load_file("logistic_model.pkl") vectorizer = load_file("logvectorizer.pkl") if request.method == "POST": msg = request.form.get('message', '') cleaned = clean_text(msg) tokens = cleaned.split() try: vector = vectorizer.transform([cleaned]) probability = model.predict_proba(vector)[0][1] prediction = "Spam" if probability >= 0.5 else "Not Spam" confidence_percentage = round(probability * 100, 2) except Exception as e: print("Error predicting:", e) prediction = "Error" confidence_percentage = 0 return render_template( "logistic.html", prediction=prediction, confidence_percentage=confidence_percentage, cleaned=cleaned, tokens=tokens, probability=round(probability, 4) if probability else None, source="sms" ) @app.route('/logistic-sms', methods=['POST']) def logistic_sms(): try: data = request.get_json() msg = data.get('message', '') cleaned = clean_text(msg) tokens = cleaned.split() vector = vectorizer.transform([cleaned]) probability = model.predict_proba(vector)[0][1] prediction = "Spam" if probability >= 0.5 else "Not Spam" confidence_percentage = round(probability * 100, 2) return jsonify({ "prediction": prediction, "confidence": confidence_percentage, "probability": round(probability, 4), "cleaned": cleaned, "tokens": tokens, "source": "json" }) except Exception as e: print("Error in /logistic-sms:", e) return jsonify({"error": "Internal server error", "details": str(e)}), 500 # @app.route("/logistic", methods=["GET", "POST"]) # def logistic(): # prediction = None # error = None # if request.method == "POST": # try: # input_text = request.form.get("message") # # Load the vectorizer and logistic model from Models folder # vectorizer = joblib.load("Models/vectorizer.pkl") # model = joblib.load("Models/logistic_model.pkl") # # Transform input and make prediction # input_vector = vectorizer.transform([input_text]) # result = model.predict(input_vector)[0] # prediction = "✅ Not Spam" if result == 0 else "🚨 Spam" # except Exception as e: # error = str(e) # return render_template("logistic.html", prediction=prediction, error=error) #---------- LOAD MODEL & LABELS ONCE (startup) ---------- MODEL_PATH = os.path.join("Models", "knnmodel.joblib") # adjust if your filename is different LABELS_PATH = os.path.join("Models", "label_classes.npy") try: model = joblib.load(MODEL_PATH) except Exception as e: # Keep model as None so routes can return clear error if it's missing current_app.logger if hasattr(current_app, "logger") else print print(f"Failed to load model from {MODEL_PATH}: {e}") model = None try: label_classes = np.load(LABELS_PATH, allow_pickle=True) except Exception as e: print(f"Failed to load label_classes from {LABELS_PATH}: {e}") label_classes = None # ---------- KNN VISUAL ROUTES (unchanged) ---------- @app.route("/knn") def knn_visual(): return render_template("knn.html") @app.route('/knn_visual_predict', methods=['POST']) def knn_visual_predict(): data = request.get_json() points = np.array(data['points']) # shape: (N, 3) test_point = np.array(data['test_point']) # shape: (2,) k = int(data['k']) X = points[:, :2] y = points[:, 2].astype(int) knn_local = KNeighborsClassifier(n_neighbors=k) knn_local.fit(X, y) pred = knn_local.predict([test_point])[0] dists = np.linalg.norm(X - test_point, axis=1) neighbor_indices = np.argsort(dists)[:k] neighbors = X[neighbor_indices] return jsonify({ 'prediction': int(pred), 'neighbors': neighbors.tolist() }) # ---------- IMAGE PREDICTION ROUTE (fixed) ---------- @app.route("/knn_image") def knn_image_page(): return render_template("knn_image.html") @app.route("/predict_image", methods=["POST"]) def predict_image(): if "image" not in request.files: return jsonify({"error": "No image uploaded"}), 400 file = request.files["image"] try: # Convert to grayscale exactly like MNIST image = Image.open(file.stream).convert("L") image = image.resize((28, 28)) # MNIST size img_array = np.array(image).reshape(1, -1).astype("float32") # 784 features except Exception as e: return jsonify({"error": f"Invalid image. {str(e)}"}), 400 # Load model & labels model = joblib.load("Models/knnmodel.joblib") label_classes = np.load("Models/label_classes.npy", allow_pickle=True) # Predict class probs = model.predict_proba(img_array)[0] pred_index = np.argmax(probs) pred_label = label_classes[pred_index] confidence = round(float(probs[pred_index]) * 100, 2) return jsonify({ "prediction": str(pred_label), "confidence": f"{confidence}%", "all_probabilities": { str(label_classes[i]): round(float(probs[i]) * 100, 2) for i in range(len(probs)) } }) @app.route("/rfc") def random_forest_page(): return render_template("Random_Forest_Classifier.html") # Your beautiful HTML goes in rfc.html @app.route('/rf_visual_predict', methods=['POST']) def rf_visual_predict(): try: data = request.get_json() print("📦 Incoming JSON data:", data) labeled_points = data.get('points') test_point = data.get('test_point') if not labeled_points or not test_point: return jsonify({"error": "Missing points or test_point"}), 400 df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) X = df[['X1', 'X2']] y = df['Class'] rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42) rf_model.fit(X, y) test_point_np = np.array(test_point).reshape(1, -1) prediction = int(rf_model.predict(test_point_np)[0]) x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) Z = rf_model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) return jsonify({ 'prediction': prediction, 'decision_boundary_z': Z.tolist(), 'decision_boundary_x_coords': xx[0, :].tolist(), 'decision_boundary_y_coords': yy[:, 0].tolist() }) except Exception as e: import traceback print("❌ Exception in /rf_visual_predict:") traceback.print_exc() # Print full error stack trace return jsonify({"error": str(e)}), 500 @app.route("/liar") def liar_input_page(): return render_template("rfc_liar_predict.html") @app.route("/ref/liar/predictor", methods=["POST"]) def liar_predictor(): try: data = request.get_json() statement = data.get("statement", "") if not statement: return jsonify({"success": False, "error": "Missing statement"}), 400 try: # 🔍 LIAR Model Prediction features = vectorizer.transform([statement]) prediction = model.predict(features)[0] liar_label_map = { 0: "It can be false 🔥", 1: "False ❌", 2: "Mostly false but can be true 🤏", 3: "Half True 🌓", 4: "Mostly True 👍", 5: "True ✅" } prediction_label = liar_label_map.get(int(prediction), "Unknown") except ValueError as ve: if "features" in str(ve): # Fallback to Gemini API prediction_label = ask_gemini(statement) else: raise ve # 🧠 BERT-Based Scientific Check bert_result = bert_checker(statement)[0] bert_label = bert_result["label"] bert_score = round(bert_result["score"] * 100, 2) science_label_map = { "LABEL_0": "✅ Scientifically Possible", "LABEL_1": "❌ Scientifically Impossible" } scientific_check = f"{science_label_map.get(bert_label, bert_label)} ({bert_score:.2f}%)" return jsonify({ "success": True, "prediction": prediction_label, "reason": "Predicted from linguistic and content-based patterns, or Gemini fallback.", "scientific_check": scientific_check }) except Exception as e: traceback.print_exc() return jsonify({"success": False, "error": str(e)}), 500 #svm @app.route("/svm") def svm_page(): return render_template("svm.html") @app.route('/svm_visual_predict', methods=['POST']) def svm_visual_predict(): data = request.json labeled_points = data['points'] test_point = data['test_point'] svm_type = data['svm_type'] c_param = float(data['c_param']) gamma_param = float(data['gamma_param']) # Will be ignored for linear kernel df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) X = df[['X1', 'X2']] y = df['Class'] # 1. Train the SVM Classifier if svm_type == 'linear': svm_model = svm.SVC(kernel='linear', C=c_param, random_state=42) elif svm_type == 'rbf': svm_model = svm.SVC(kernel='rbf', C=c_param, gamma=gamma_param, random_state=42) else: return jsonify({'error': 'Invalid SVM type'}), 400 svm_model.fit(X, y) # 2. Predict for the test point test_point_np = np.array(test_point).reshape(1, -1) prediction = int(svm_model.predict(test_point_np)[0]) # 3. Get Support Vectors # support_vectors_ refers to indices of support vectors # svc_model.support_vectors_ gives the actual support vectors support_vectors = svm_model.support_vectors_.tolist() # 4. Generate data for the decision boundary # Create a meshgrid of points to predict across the entire plot area x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 # Extend range slightly to ensure test point is within boundary if it's an outlier x_min = min(x_min, test_point_np[0,0] - 1) x_max = max(x_max, test_point_np[0,0] + 1) y_min = min(y_min, test_point_np[0,1] - 1) y_max = max(y_max, test_point_np[0,1] + 1) xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) # Predict class for each point in the meshgrid Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Convert numpy arrays to lists for JSON serialization decision_boundary_z = Z.tolist() decision_boundary_x_coords = xx[0, :].tolist() decision_boundary_y_coords = yy[:, 0].tolist() return jsonify({ 'prediction': prediction, 'decision_boundary_z': decision_boundary_z, 'decision_boundary_x_coords': decision_boundary_x_coords, 'decision_boundary_y_coords': decision_boundary_y_coords, 'support_vectors': support_vectors }) @app.route('/api/explain', methods=['POST']) def explain(): # In a real deployed environment, you'd secure your API key. # For Canvas, it's automatically injected if GEMINI_API_KEY is empty string. # If running locally and not in Canvas, set GEMINI_API_KEY in your environment variables. if not GEMINI_API_KEY and not os.getenv("FLASK_ENV") == "development": # Allow empty key in dev for local testing return jsonify({'error': 'Missing API key'}), 500 payload = request.get_json() try: response = requests.post( f"{GEMINI_URL}?key={GEMINI_API_KEY}", headers={"Content-Type": "application/json"}, json=payload ) response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) return jsonify(response.json()) except requests.exceptions.RequestException as e: app.logger.error(f"Error calling Gemini API: {e}") # Log the error on the server side return jsonify({'error': str(e)}), 500 @app.route('/decision_tree') def decision_tree_page(): # This route serves your Decision Tree visualization page # Ensure the HTML file name matches (e.g., 'decision_tree_viz.html' or 'decision_tree.html') return render_template('decision_tree.html') # Check your actual HTML file name here @app.route('/game') def decision_tree_game(): """Renders the interactive game page for decision trees.""" return render_template('decision_tree_game.html') @app.route('/dt_visual_predict', methods=['POST']) def dt_visual_predict(): try: data = request.json labeled_points = data['points'] test_point = data['test_point'] max_depth = int(data['max_depth']) # Convert labeled_points to a pandas DataFrame df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) X = df[['X1', 'X2']] y = df['Class'] # Check if there's enough data to train if X.empty or len(X) < 2: return jsonify({'error': 'Not enough data points to train the model.'}), 400 # 1. Train the Decision Tree Classifier (This is the "model" part) dt_model = DecisionTreeClassifier(max_depth=max_depth, random_state=42) dt_model.fit(X, y) # 2. Predict for the test point test_point_np = np.array(test_point).reshape(1, -1) prediction = int(dt_model.predict(test_point_np)[0]) # 3. Generate data for the decision boundary x_min, x_max = X['X1'].min(), X['X1'].max() y_min, y_max = X['X2'].min(), X['X2'].max() # Add a buffer to the plot range to make sure points are not on the edge # And handle cases where min == max (e.g., all points have same X1 value) x_buffer = 1.0 if (x_max - x_min) == 0 else (x_max - x_min) * 0.1 y_buffer = 1.0 if (y_max - y_min) == 0 else (y_max - y_min) * 0.1 x_min -= x_buffer x_max += x_buffer y_min -= y_buffer y_max += y_buffer # Ensure test point is also comfortably within the range x_min = min(x_min, test_point_np[0,0] - 0.5) x_max = max(x_max, test_point_np[0,0] + 0.5) y_min = min(y_min, test_point_np[0,1] - 0.5) y_max = max(y_max, test_point_np[0,1] + 0.5) # Create a meshgrid for plotting the decision boundary xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) # Predict class for each point in the meshgrid using the trained model Z = dt_model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Convert numpy arrays to lists for JSON serialization decision_boundary_z = Z.tolist() decision_boundary_x_coords = xx[0, :].tolist() decision_boundary_y_coords = yy[:, 0].tolist() return jsonify({ 'prediction': prediction, 'decision_boundary_z': decision_boundary_z, 'decision_boundary_x_coords': decision_boundary_x_coords, 'decision_boundary_y_coords': decision_boundary_y_coords }) except Exception as e: # This will print the actual error to your terminal print(f"An error occurred in /dt_visual_predict: {e}") # Return a more informative error message to the frontend return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 # --- Naive Bayes Routes --- from urllib.parse import urlparse from sklearn.naive_bayes import GaussianNB from nltk.corpus import words nb_model = load_file("nb_url_model.pkl") vectorizer = load_file("nb_url_vectorizer.pkl") # if nb_model is not None and vectorizer is not None: # print("✅ Loaded Naive Bayes URL model") # else: # nb_model, vectorizer = None, None # print("❌ vectorizer not found") @app.route('/nb_spam') def nb_spam_page(): return render_template('NB_spam.html') import re from urllib.parse import urlparse from spellchecker import SpellChecker import wordninja # ---- Whitelist (your full one, unchanged) ---- whitelist = set([ # Search Engines 'google', 'bing', 'yahoo', 'duckduckgo', 'baidu', 'ask', # Social Media 'facebook', 'instagram', 'twitter', 'linkedin', 'snapchat', 'tiktok', 'threads', 'pinterest', 'reddit', 'quora', # Communication Tools 'whatsapp', 'telegram', 'skype', 'zoom', 'meet', 'discord', 'teams', 'signal', 'messenger', # Global E-commerce 'amazon', 'ebay', 'shopify', 'alibaba', 'walmart', 'target', 'etsy', 'shein', 'bestbuy', 'costco', 'newegg', # Indian E-commerce / Services 'flipkart', 'myntra', 'ajio', 'nykaa', 'meesho', 'snapdeal', 'paytm', 'phonepe', 'mobikwik', 'zomato', 'swiggy', 'ola', 'uber', 'bookmyshow', 'ixigo', 'makemytrip', 'yatra', 'redbus', 'bigbasket', 'grofers', 'blinkit', 'universalcollegeofengineering', # Education / Productivity 'youtube', 'docs', 'drive', 'calendar', 'photos', 'gmail', 'notion', 'edx', 'coursera', 'udemy', 'khanacademy', 'byjus', 'unacademy', # News / Media / Tech 'bbc', 'cnn', 'nyt', 'forbes', 'bloomberg', 'reuters', 'ndtv', 'indiatimes', 'thehindu', 'hindustantimes', 'indiatoday', 'techcrunch', 'verge', 'wired', # Streaming / Entertainment 'netflix', 'hotstar', 'primevideo', 'spotify', 'gaana', 'wynk', 'saavn', 'voot', # Dev & Tools 'github', 'stackoverflow', 'medium', 'gitlab', 'bitbucket', 'adobe', 'figma', 'canva', # Financial / Banking 'hdfcbank', 'icicibank', 'sbi', 'axisbank', 'kotak', 'boi', 'upi', 'visa', 'mastercard', 'paypal', 'stripe', 'razorpay', 'phonepe', 'paytm', # Government / Utilities 'gov', 'nic', 'irctc', 'uidai', 'mygov', 'incometax', 'aadhar', 'rbi', # Others Common 'airtel', 'jio', 'bsnl', 'vi', 'speedtest', 'cricbuzz', 'espn', 'espncricinfo', 'wikipedia', 'mozilla', 'opera', 'chrome', 'android', 'apple', 'windows', 'microsoft' ]) # ... your full whitelist from before ... # ---- Trusted & Bad TLDs ---- trusted_tlds = [ '.gov', '.nic.in', '.edu', '.ac.in', '.mil', '.org', '.int', '.co.in', '.gov.in', '.res.in', '.net.in', '.nic.gov.in' ] # Expanded Bad TLDs (Rule 4) bad_tlds = [ '.xyz', '.tk', '.ml', '.ga', '.cf', '.top', '.gq', '.cn', '.ru', '.pw', '.bid', '.link', '.loan', '.party', '.science', '.stream', '.webcam', '.online', '.site', '.website', '.space', '.club', '.buzz', '.info' ] # Suspicious extensions (Rule 13) suspicious_extensions = ['.exe', '.zip', '.rar', '.js', '.php', '.asp', '.aspx', '.jsp', '.sh'] # Phishing keywords (Rule 11, your full list) phishing_keywords = [ 'login', 'verify', 'secure', 'account', 'update', 'confirm', 'authenticate', 'free', 'bonus', 'offer', 'prize', 'winner', 'gift', 'coupon', 'discount', 'bank', 'paypal', 'creditcard', 'mastercard', 'visa', 'amex', 'westernunion', 'signin', 'click', 'password', 'unlock', 'recover', 'validate', 'urgency', 'limitedtime', 'expires', 'suspicious', 'alert', 'important', 'actionrequired' ] # ---- Rules 5–14 ---- rules = { 5: r"https?://\d{1,3}(\.\d{1,3}){3}", 6: r"@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", 7: r"(free money|win now|click here)", 8: r"https?://[^\s]*\.(ru|cn|tk)", 9: r"https?://.{0,6}\..{2,6}/.{0,6}", 10: r"[0-9]{10,}", 12: r"https?://[^\s]*@[^\s]+", 13: r"https?://[^\s]*//[^\s]+", 14: r"https?://[^\s]*\?(?:[^=]+=[^&]*&){5,}", } # ---- Gibberish Check Helper (Rule 15) ---- def is_gibberish_word(word): vowels = "aeiou" v_count = sum(c in vowels for c in word) return v_count / len(word) < 0.25 # # ---- Utility: Extract words from URL ---- # def extract_words(url): # parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) # raw = parsed.netloc.replace('-', '') + parsed.path.replace('-', '') # # Split using wordninja # words = wordninja.split(raw.lower()) # # Keep only alphabetic words of length >= 3 # words = [w for w in words if w.isalpha() and len(w) >= 3] # return words # ---- Extract words from URL ---- def extract_words(url): parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) parts = re.split(r'\W+', parsed.netloc + parsed.path) final_words = [] for word in parts: if len(word) > 2 and word.isalpha(): split_words = wordninja.split(word.lower()) if len(split_words) <= 1: split_words = [word.lower()] final_words.extend(split_words) return final_words # --- Your original predict function, now inside the Flask app --- @app.route("/predict", methods=["POST"]) def predict(): try: data = request.get_json() url = data.get("url", "").lower() if not url: return jsonify({'error': 'No URL provided'}), 400 parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) path = parsed.path # ---- SpellChecker using built-in dictionary ---- spell = SpellChecker(distance=1) # ---- Extract words and check spelling ---- words = extract_words(url) # ignore known TLDs tlds_to_ignore = [tld.replace('.', '',"/") for tld in trusted_tlds + bad_tlds] words_for_spellcheck = [w for w in words if w not in tlds_to_ignore] misspelled = spell.unknown(words_for_spellcheck) steps = [{"word": w, "valid": (w not in misspelled) or (w in tlds_to_ignore)} for w in words] if misspelled: return jsonify({ "prediction": 1, "reason": f"🧾 Spelling errors: {', '.join(misspelled)}", "steps": steps }) else: return jsonify({ "prediction": 0, "reason": "✅ No spelling issues", "steps": steps }) except Exception as e: return jsonify({'error': f"An issue occurred during spell checking: {str(e)}"}), 500 @app.route('/naive_bayes') def naive_bayes_page(): return render_template('naive_bayes_viz.html') # --- New Naive Bayes Prediction Route --- @app.route('/nb_visual_predict', methods=['POST']) def nb_visual_predict(): try: data = request.json labeled_points = data['points'] test_point = data['test_point'] df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) X = df[['X1', 'X2']] y = df['Class'] # Ensure enough data and at least two classes for classification if X.empty or len(X) < 2: return jsonify({'error': 'Not enough data points to train the model.'}), 400 if len(y.unique()) < 2: return jsonify({'error': 'Need at least two different classes to classify.'}), 400 # Train Gaussian Naive Bayes Model # GaussianNB is suitable for continuous data nb_model = GaussianNB() nb_model.fit(X, y) # Predict for the test point test_point_np = np.array(test_point).reshape(1, -1) prediction = int(nb_model.predict(test_point_np)[0]) # Generate data for the decision boundary x_min, x_max = X['X1'].min(), X['X1'].max() y_min, y_max = X['X2'].min(), X['X2'].max() x_buffer = 1.0 if x_max - x_min == 0 else (x_max - x_min) * 0.1 y_buffer = 1.0 if y_max - y_min == 0 else (y_max - y_min) * 0.1 x_min -= x_buffer x_max += x_buffer y_min -= y_buffer y_max += y_buffer x_min = min(x_min, test_point_np[0,0] - 0.5) x_max = max(x_max, test_point_np[0,0] + 0.5) y_min = min(y_min, test_point_np[0,1] - 0.5) y_max = max(y_max, test_point_np[0,1] + 0.5) xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) if xx.size == 0 or yy.size == 0: return jsonify({'error': 'Meshgrid could not be created. Data range too narrow.'}), 400 # Predict class for each point in the meshgrid # Use predict_proba and then argmax to get class for decision boundary coloring Z = nb_model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) decision_boundary_z = Z.tolist() decision_boundary_x_coords = xx[0, :].tolist() decision_boundary_y_coords = yy[:, 0].tolist() return jsonify({ 'prediction': prediction, 'decision_boundary_z': decision_boundary_z, 'decision_boundary_x_coords': decision_boundary_x_coords, 'decision_boundary_y_coords': decision_boundary_y_coords }) except Exception as e: print(f"An error occurred in /nb_visual_predict: {e}") return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 def check_with_virustotal(url): try: headers = {"x-apikey": VT_API_KEY} submit_url = "https://www.virustotal.com/api/v3/urls" # Submit the URL for scanning response = requests.post(submit_url, headers=headers, data={"url": url}) url_id = response.json()["data"]["id"] # Fetch result result = requests.get(f"{submit_url}/{url_id}", headers=headers) data = result.json() stats = data["data"]["attributes"]["last_analysis_stats"] malicious_count = stats.get("malicious", 0) if malicious_count > 0: return True, f"☣️ VirusTotal flagged it as malicious ({malicious_count} engines)" return False, None except Exception as e: print(f"⚠️ VirusTotal error: {e}") return False, None @app.route('/kmeans-clustering') def clustering(): return render_template('clustering.html') #image code @app.route('/kmeans-Dbscan-image', methods=['GET', 'POST']) def compress_and_clean(): final_image = None if request.method == 'POST': try: # Get form values mode = request.form.get('mode', 'compress') k = int(request.form.get('k', 8)) eps = float(request.form.get('eps', 0.6)) min_samples = int(request.form.get('min_samples', 50)) image_file = request.files.get('image') if image_file and image_file.filename != '': # Load image img = Image.open(image_file).convert('RGB') max_size = (518, 518) img.thumbnail(max_size, Image.Resampling.LANCZOS) img_np = np.array(img) h, w, d = img_np.shape pixels = img_np.reshape(-1, d) # Apply KMeans kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) kmeans.fit(pixels) clustered_pixels = kmeans.cluster_centers_[kmeans.labels_].astype(np.uint8) # Mode 1: Just Compress if mode == 'compress': final_pixels = clustered_pixels.reshape(h, w, d) # Mode 2: Compress + Clean (KMeans + DBSCAN) else: # Sample to avoid MemoryError max_dbscan_pixels = 10000 if len(clustered_pixels) > max_dbscan_pixels: idx = np.random.choice(len(clustered_pixels), max_dbscan_pixels, replace=False) dbscan_input = clustered_pixels[idx] else: dbscan_input = clustered_pixels # DBSCAN # For DBSCAN: use only 10,000 pixels max max_dbscan_pixels = 10000 scaler = StandardScaler() pixels_scaled = scaler.fit_transform(dbscan_input) db = DBSCAN(eps=eps, min_samples=min_samples) labels = db.fit_predict(pixels_scaled) # Clean noisy pixels clean_pixels = [] for i in range(len(dbscan_input)): label = labels[i] clean_pixels.append([0, 0, 0] if label == -1 else dbscan_input[i]) # Fill extra if sampling was used if len(clustered_pixels) > max_dbscan_pixels: clean_pixels.extend([[0, 0, 0]] * (len(clustered_pixels) - len(clean_pixels))) final_pixels = np.array(clean_pixels, dtype=np.uint8).reshape(h, w, d) # Save final image final_img = Image.fromarray(final_pixels) final_image = 'compressed_clean.jpg' final_img.save(os.path.join(app.config['UPLOAD_FOLDER'], final_image), optimize=True, quality=90) except Exception as e: return f"⚠️ Error: {str(e)}", 500 return render_template('kmean-dbscan-image.html', final_image=final_image) @app.route('/DBscan') def DBSCAN(): return render_template('DBSCAN.html') #test routs start here @app.route('/Test-layout') def test(): return render_template('Test-layout.html') @app.route('/Test-home') def Test_home(): return render_template('Test-home.html',active_page='Test-home') @app.route('/Test-supervise') def Test_supervise(): return render_template('Test/Test-supervise.html', active_page='Test-supervise') @app.route('/Test-unsupervised') def Test_unsupervised(): return render_template('Test/Test-unsupervised.html', active_page='Test-unsupervised') # Semi-Supervised Learning page @app.route('/Test-semi-supervised') def Test_semi_supervised(): return render_template('Test/Test-semi_supervised.html', active_page='Test-semi_supervised') # Reinforcement Learning page @app.route('/Test-reinforcement') def Test_reinforcement(): return render_template('Test/Test-reinforcement.html', active_page='Test-reinforcement') # Ensemble Learning page @app.route('/Test-ensemble') def Test_ensemble(): return render_template('Test/Test-ensemble.html', active_page='Test-ensemble') #Templates/Test/Quiz-Overview-Page.html @app.route('/linear-Quiz-Overview-Page') def linear_Test_quiz_overview(): return render_template('Test/linear-Quiz-Overview-Page.html', active_page='linear-Quiz-Overview-Page') @app.route('/Quiz-test') def Quiz_test(): return render_template('Test/Quiz-test.html', active_page='Quiz-test') #if the dtat file doesnt show or dsiapay use render_data like this render_template('data/yourfile.json') # @app.route('/Quiz-test/') # def quiz_topic(topic): # import json, os # count = int(request.args.get('count', 10)) # try: # json_path = os.path.join(app.root_path, 'data', f'{topic}.json') # with open(json_path, 'r', encoding='utf-8') as f: # data = json.load(f) # This is your JSON array # # Transform the JSON to match frontend expectations # transformed = [] # for q in data[:count]: # transformed.append({ # "id": q.get("id"), # "question": q.get("questionText"), # "options": q.get("options"), # "answer": q.get("options")[q.get("correctAnswerIndex")], # "explanation": q.get("explanation") # }) # return jsonify(transformed) # except FileNotFoundError: # return "Topic not found", 404 # except json.JSONDecodeError: # # return "Invalid JSON file", 500 # @app.route('/Quiz-test/') # def quiz_topic(topic): # import os, json # count = int(request.args.get('count', 10)) # json_path = os.path.join(app.root_path, 'data', f'{topic}.json') # try: # with open(json_path, 'r', encoding='utf-8') as f: # data = json.load(f) # # If JSON is a dict with "questions" key # if isinstance(data, dict) and "questions" in data: # questions = data["questions"][:count] # elif isinstance(data, list): # questions = data[:count] # else: # return "Invalid JSON structure", 400 # return jsonify(questions) # except FileNotFoundError: # return "Topic not found", 404 # except json.JSONDecodeError: # return "Invalid JSON file", 400 # ✅ API Route: Send JSON quiz data @app.route('/api/quiz/') def get_quiz(topic): count = int(request.args.get('count', 10)) file_path = os.path.join('data', f'{topic}.json') if not os.path.exists(file_path): return jsonify({'error': 'Topic not found'}), 404 with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) questions = data.get('questions', [])[:count] return jsonify({'questions': questions}) @app.route('/polynomial-Quiz') def polynomial_Test_quiz(): return render_template('Test/polynomial-Quiz.html', active_page='polynomial-Quiz') # ------------------------------- # Regression Algorithms # ------------------------------- @app.route('/ridge-regression-test') def ridge_regression_test(): return render_template('Test/ridge-regression-test.html', active_page='ridge-regression-test') @app.route('/lasso-regression-test') def lasso_regression_test(): return render_template('Test/lasso-regression-test.html', active_page='lasso-regression-test') @app.route('/svr-test') def svr_test(): return render_template('Test/svr-r-test.html', active_page='svr-r-test') @app.route('/decision-tree-regression-test') def decision_tree_regression_test(): return render_template('Test/decision-tree-regression-test.html', active_page='decision-tree-regression-test') @app.route('/random-forest-regression-test') def random_forest_regression_test(): return render_template('Test/random-forest-regression-test.html', active_page='random-forest-regression-test') # ------------------------------- # Classification Algorithms # ------------------------------- @app.route('/logistic-regression-test') def logistic_regression_test(): return render_template('Test/logistic-regression-test.html', active_page='logistic-regression-test') @app.route('/svm-c-test') def svm_test(): return render_template('Test/svm-c-test.html', active_page='svm-c-test') @app.route('/decision-trees-c-test') def decision_trees_test(): return render_template('Test/decision-trees-c-test.html', active_page='decision-trees-c-test') @app.route('/random-forest-c-test') def random_forest_test(): return render_template('Test/random-forest-c-test.html', active_page='random-forest-c-test') @app.route('/gradient-descent-test') def gradient_descent_test(): return render_template('Test/gradient-descent-test.html', active_page='gradient-descent-test') @app.route('/gradient-boosting-test') def gradient_boosting_test(): return render_template('Test/gradient-boosting-test.html', active_page='gradient-boosting-test') @app.route('/xgboost-regression-test') def xgboost_regression_test(): return render_template('Test/xgboost-regression-test.html', active_page='xgboost-regression-test') @app.route('/lightgbm-test') def lightgbm_test(): return render_template('Test/lightgbm-test.html', active_page='lightgbm-test') @app.route('/knn-test') def knn_test(): return render_template('Test/knn-test.html', active_page='knn-test') @app.route('/naive-bayes-test') def naive_bayes_test(): return render_template('Test/naive-bayes-test.html', active_page='naive-bayes-test') @app.route('/neural-networks-test') def neural_networks_test(): return render_template('Test/neural-networks-test.html', active_page='neural-networks-test') # ------------------------------- # Clustering # ------------------------------- @app.route('/k-means-test') def k_means_test(): return render_template('Test/k-means-test.html', active_page='k-means-test') @app.route('/hierarchical-clustering-test') def hierarchical_clustering_test(): return render_template('Test/hierarchical-clustering-test.html', active_page='hierarchical-clustering-test') @app.route('/dbscan-test') def dbscan_test(): return render_template('Test/dbscan-test.html', active_page='dbscan-test') @app.route('/gmm-test') def gmm_test(): return render_template('Test/gmm-test.html', active_page='gmm-test') # ------------------------------- # Dimensionality Reduction # ------------------------------- @app.route('/pca-test') def pca_test(): return render_template('Test/pca-test.html', active_page='pca-test') @app.route('/tsne-test') def tsne_test(): return render_template('Test/tsne-test.html', active_page='tsne-test') @app.route('/lda-test') def lda_test(): return render_template('Test/lda-test.html', active_page='lda-test') @app.route('/ica-test') def ica_test(): return render_template('Test/ica-test.html', active_page='ica-test') # ------------------------------- # Association Rule Learning # ------------------------------- @app.route('/apriori-test') def apriori_test(): return render_template('Test/apriori-test.html', active_page='apriori-test') @app.route('/eclat-test') def eclat_test(): return render_template('Test/eclat-test.html', active_page='eclat-test') # ------------------------------- # Semi-Supervised Learning # ------------------------------- @app.route('/generative-models-test') def generative_models_test(): return render_template('Test/generative-models-test.html', active_page='generative-models-test') @app.route('/self-training-test') def self_training_test(): return render_template('Test/self-training-test.html', active_page='self-training-test') @app.route('/transductive-svm-test') def transductive_svm_test(): return render_template('Test/transductive-svm-test.html', active_page='transductive-svm-test') @app.route('/graph-based-methods-test') def graph_based_methods_test(): return render_template('Test/graph-based-methods-test.html', active_page='graph-based-methods-test') # ------------------------------- # Reinforcement Learning # ------------------------------- @app.route('/agent-environment-state-test') def agent_environment_state_test(): return render_template('Test/agent-environment-state-test.html', active_page='agent-environment-state-test') @app.route('/action-policy-test') def action_policy_test(): return render_template('Test/action-policy-test.html', active_page='action-policy-test') @app.route('/reward-value-function-test') def reward_value_function_test(): return render_template('Test/reward-value-function-test.html', active_page='reward-value-function-test') @app.route('/q-learning-test') def q_learning_test(): return render_template('Test/q-learning-test.html', active_page='q-learning-test') @app.route('/deep-reinforcement-learning-test') def deep_reinforcement_learning_test(): return render_template('Test/deep-reinforcement-learning-test.html', active_page='deep-reinforcement-learning-test') # ------------------------------- # Ensemble Methods # ------------------------------- @app.route('/bagging-test') def bagging_test(): return render_template('Test/bagging-test.html', active_page='bagging-test') @app.route('/boosting-test') def boosting_test(): return render_template('Test/boosting-test.html', active_page='boosting-test') @app.route('/stacking-test') def stacking_test(): return render_template('Test/stacking-test.html', active_page='stacking-test') @app.route('/voting-test') def voting_test(): return render_template('Test/voting-test.html', active_page='voting-test') if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)