File size: 1,899 Bytes
cab07a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# DataVisualization.py
# Purpose: Script to create visualizations for chat data and machine learning model results.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Data
# Assuming you have a CSV file with your model's predictions and actual scores
datafile_path = "data/model_predictions.csv"
df = pd.read_csv(datafile_path)

# Visualization Functions

def plot_feature_importances(model):
    """
    Plots feature importances of a trained model.
    """
    feat_importances = pd.Series(model.feature_importances_, index=df.columns[:-1])
    feat_importances.nlargest(10).plot(kind='barh')
    plt.title('Feature Importances')
    plt.show()

def plot_actual_vs_predicted(y_actual, y_pred, title='Actual vs Predicted'):
    """
    Scatter plot for actual vs predicted values.
    """
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=y_actual, y=y_pred, alpha=0.6)
    plt.plot([y_actual.min(), y_actual.max()], [y_actual.min(), y_actual.max()], '--r')
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    plt.title(title)
    plt.show()

def plot_error_distribution(y_actual, y_pred, title='Error Distribution'):
    """
    Histogram for prediction errors.
    """
    errors = y_actual - y_pred
    plt.figure(figsize=(10, 6))
    sns.histplot(errors, bins=20, kde=True)
    plt.xlabel('Prediction Error')
    plt.title(title)
    plt.show()

# Example Usage
# These are just examples. Replace 'your_model' with your actual trained model
# and 'y_actual', 'y_pred' with your actual data.

# plot_feature_importances(your_model)
# plot_actual_vs_predicted(df['ActualScore'], df['PredictedScore'])
# plot_error_distribution(df['ActualScore'], df['PredictedScore'])

# Note to Users:
# - Adjust the data paths, column names, and model variables as per your data and model.
# - Feel free to add more visualization functions based on your specific needs.