Commit e982cd2d authored by Dennis Willers's avatar Dennis Willers 🏀

Cross-Validation implementiert mit einer Excel Dokumentation der Ergebnisse

parent af35f578
......@@ -9,25 +9,55 @@ from src.enum.optimierungsverfahren import Optimierungsverfahren
from src.knn.configKNN import ConfigKNN
from src.knn.createKNN import evaluate_model, fit_model
from src.knn.defineKNN import define_model
from src.result.createExcelFile import create_excel_result
from src.result.createExcelFile import create_excel_result, get_excel_workbook, get_excel_worksheet, save_excel, \
average_evaluate_cross_validation
from src.result.customCallback import CustomCallback
from src.result.plotResult import plot_values
# run the test harness for evaluating a model
def run_model():
def run_cross_validation():
print("Lese Configurationsdatei")
# Lesen der Configurationdatei
with open("ressources/config/config.yaml", "r") as file:
config = yaml.load(file, Loader=yaml.FullLoader)
# Initialisieren der KNN-Modellbau Eigenschaften
config_knn = ConfigKNN(
excluded_folder=Markt.Markt_C,
activation_function_1_units=Aktivierungsfunktion.sigmoid,
activation_function_128_units=Aktivierungsfunktion.ReLU,
optimization_method=Optimierungsverfahren.SGD
)
for optimization_method in Optimierungsverfahren:
for activation_function_1 in Aktivierungsfunktion:
for activation_function_128 in Aktivierungsfunktion:
model_evaluate_metrics = []
training_duration_models = []
workbook = get_excel_workbook()
worksheet = get_excel_worksheet(workbook)
r = 1
config_knn = ConfigKNN(
excluded_folder=Markt.Kein_Markt,
activation_function_1_units=activation_function_1,
activation_function_128_units=activation_function_128,
optimization_method=optimization_method
)
for excluded_folder in Markt:
if excluded_folder == Markt.Kein_Markt:
continue
config_knn.excluded_folder = excluded_folder
worksheet, r, evaluate_metrics, training_duration_model = run_model(config, config_knn, worksheet, r)
model_evaluate_metrics.append(evaluate_metrics)
training_duration_models.append(training_duration_model)
print("Berechne den Durchschnitt der Cross-Validation beim Modellbau")
average_evaluate_cross_validation(worksheet, r, model_evaluate_metrics, training_duration_models)
print("Speichere die Excel Tabelle")
save_excel(workbook, config, config_knn)
# config_knn = ConfigKNN(
# excluded_folder=Markt.Markt_C,
# activation_function_1_units=Aktivierungsfunktion.sigmoid,
# activation_function_128_units=Aktivierungsfunktion.ReLU,
# optimization_method=Optimierungsverfahren.SGD
# )
# run a model definition
def run_model(config, config_knn, worksheet, r):
print("Initialisiere Callback")
# Erstellen Sie eine Instanz des benutzerdefinierten Callbacks
callback = CustomCallback()
......@@ -60,14 +90,17 @@ def run_model():
# print duration
print("Stoppe die Zeit")
end_time = datetime.now()
training_duration_model = format(end_time - start_time)
print('Die Dauer für das Erstellen des Modells beträgt: ' + training_duration_model)
training_duration_model = end_time - start_time
training_duration_model_str = format(training_duration_model)
print('Die Dauer für das Erstellen des Modells beträgt: ' + training_duration_model_str)
# learning curves
print("Erstelle den Plot Graphen")
plot_values(history, config)
print("Erstelle die Excel-Tabelle")
create_excel_result(callback, config, config_knn, evaluate_metrics, training_duration_model)
print("Inhalte in die Excel-Tabelle schreiben")
worksheet, r = create_excel_result(worksheet, r, callback, config, config_knn, evaluate_metrics,
training_duration_model)
return worksheet, r, evaluate_metrics, training_duration_model
# entry point, run the test harness
run_model()
# entry point
run_cross_validation()
......@@ -4,8 +4,8 @@ from enum import Enum
class Markt(Enum):
Markt_A = 0,
Markt_B = 1,
Markt_C = 2,
Markt_D = 3,
Markt_E = 4,
Markt_F = 5
#Markt_C = 2,
#Markt_D = 3,
#Markt_E = 4,
#Markt_F = 5
Kein_Markt = -1
import openpyxl
import time
from datetime import datetime
import openpyxl
def create_excel_result(callback, config, config_knn, evaluate_metrics, training_duration_model):
# Öffnen Sie eine neue Excel-Arbeitsmappe
workbook = openpyxl.Workbook()
# Fügen Sie ein neues Arbeitsblatt hinzu
worksheet = workbook.active
r = 1
def create_excel_result(worksheet, r, callback, config, config_knn, evaluate_metrics, training_duration_model):
# Spaltenbreite setzen
worksheet.column_dimensions["A"].width = 10
worksheet.column_dimensions["B"].width = 20
......@@ -27,12 +22,12 @@ def create_excel_result(callback, config, config_knn, evaluate_metrics, training
worksheet.cell(row=r, column=3).value = 'Aktivierungsfunktion 128'
worksheet.cell(row=r, column=4).value = 'Aktivierungsfunktion 2'
worksheet.cell(row=r, column=5).value = 'Evaluationsmarkt'
r = r+1
r = r + 1
worksheet.cell(row=r, column=2).value = config_knn.optimization_method.name
worksheet.cell(row=r, column=3).value = config_knn.activation_function_128_units.name
worksheet.cell(row=r, column=4).value = config_knn.activation_function_1_units.name
worksheet.cell(row=r, column=5).value = config_knn.excluded_folder.name
r = r+2
r = r + 2
# Schreiben Sie die Überschriften in die erste Zeile
# TODO: Ergänzung Recall 1, Recall 2
......@@ -46,7 +41,7 @@ def create_excel_result(callback, config, config_knn, evaluate_metrics, training
worksheet.cell(row=r, column=8).value = 'test_recall_oos'
worksheet.cell(row=r, column=9).value = 'test_recall_not_oos'
worksheet.cell(row=r, column=10).value = 'duration'
r = r+1
r = r + 1
# Schreiben Sie die Ergebnisse der Epochen in die Arbeitsmappe
for i, (loss, accuracy, recall_oos, recall_not_oos,
......@@ -55,40 +50,105 @@ def create_excel_result(callback, config, config_knn, evaluate_metrics, training
callback.epoch_training_recall_oos, callback.epoch_training_recall_not_oos,
callback.epoch_test_losses, callback.epoch_test_accuracies, callback.epoch_test_recall_oos,
callback.epoch_test_recall_not_oos, callback.epoch_durations)):
worksheet.cell(row=r+i, column=1).value = i + 1
worksheet.cell(row=r+i, column=2).value = loss
worksheet.cell(row=r+i, column=3).value = accuracy
worksheet.cell(row=r+i, column=4).value = recall_oos
worksheet.cell(row=r+i, column=5).value = recall_not_oos
worksheet.cell(row=r+i, column=6).value = val_loss
worksheet.cell(row=r+i, column=7).value = val_accuracy
worksheet.cell(row=r+i, column=8).value = val_recall_oos
worksheet.cell(row=r+i, column=9).value = val_recall_not_oos
worksheet.cell(row=r+i, column=10).value = duration
r = r+config["knn"]["epochs"]+1
worksheet.cell(row=r + i, column=1).value = i + 1
worksheet.cell(row=r + i, column=2).value = loss
worksheet.cell(row=r + i, column=3).value = accuracy
worksheet.cell(row=r + i, column=4).value = recall_oos
worksheet.cell(row=r + i, column=5).value = recall_not_oos
worksheet.cell(row=r + i, column=6).value = val_loss
worksheet.cell(row=r + i, column=7).value = val_accuracy
worksheet.cell(row=r + i, column=8).value = val_recall_oos
worksheet.cell(row=r + i, column=9).value = val_recall_not_oos
worksheet.cell(row=r + i, column=10).value = duration
r = r + config["knn"]["epochs"] + 1
worksheet.cell(row=r, column=2).value = 'Trainingsdauer'
worksheet.cell(row=r, column=3).value = training_duration_model
r = r+3
r = r + 3
worksheet.cell(row=r, column=1).value = 'Evaluation'
worksheet.cell(row=r, column=2).value = 'loss'
worksheet.cell(row=r, column=3).value = 'accuracy'
worksheet.cell(row=r, column=4).value = 'recall_oos'
worksheet.cell(row=r, column=5).value = 'recall_not_oos'
r = r+1
r = r + 1
worksheet.cell(row=r, column=1).value = config_knn.excluded_folder.name
worksheet.cell(row=r, column=2).value = evaluate_metrics[0]
worksheet.cell(row=r, column=3).value = evaluate_metrics[1]
worksheet.cell(row=r, column=4).value = evaluate_metrics[2]
worksheet.cell(row=r, column=5).value = evaluate_metrics[3]
r = r + 4
return worksheet, r
# TODO: Abschluss Statistik nach Cross-Validation von einer Konfiguration: Durchschnitt Accuracy, Recall 1, Recall 2, Loss, Duration für Evaluation
def average_evaluate_cross_validation(worksheet, r, model_evaluate_metrics, training_duration_models):
total_information = sum_values_into_one_array(model_evaluate_metrics, training_duration_models)
worksheet.cell(row=r, column=2).value = 'AVERAGE'
worksheet.cell(row=r, column=3).value = 'EVALUATE'
worksheet.cell(row=r, column=4).value = 'METRICS'
r = r + 1
worksheet.cell(row=r, column=1).value = 'Models'
worksheet.cell(row=r, column=2).value = 'loss'
worksheet.cell(row=r, column=3).value = 'accuracy'
worksheet.cell(row=r, column=4).value = 'recall_oos'
worksheet.cell(row=r, column=5).value = 'recall_not_oos'
worksheet.cell(row=r, column=6).value = 'duration'
r = r + 1
worksheet.cell(row=r, column=1).value = len(total_information)
worksheet.cell(row=r, column=2).value = total_information[0]
worksheet.cell(row=r, column=3).value = total_information[1]
worksheet.cell(row=r, column=4).value = total_information[2]
worksheet.cell(row=r, column=5).value = total_information[3]
# Durchschnittswert wieder in Datetime-Objekt umwandeln
average_datetime = format(total_information[4])
worksheet.cell(row=r, column=6).value = average_datetime
def sum_values_into_one_array(model_evaluate_metrics, training_duration_models):
# Initialisiere Summen-Array mit Nullen
sum_array = [0, 0, 0, 0, 0]
num_arrays = len(model_evaluate_metrics)
# Iteriere durch das äußere Array
for evaluate_metric in model_evaluate_metrics:
# Füge die Werte des inneren Arrays zum Summen-Array hinzu
for i in range(len(evaluate_metric)):
sum_array[i] += evaluate_metric[i]
# Berechne den Durchschnitt, indem das Summen-Array durch die Anzahl der inneren Arrays geteilt wird
avg_array = [value / num_arrays for value in sum_array]
# Jetzt durchschnittsdauer des Modelltrainings berechnen
current_time = datetime.now()
add_times = current_time
for training_time in training_duration_models:
add_times = add_times + training_time
add_times = add_times - current_time
# Durchschnitt berechnen
average_duration = add_times / len(training_duration_models)
# Ergebnis dem avg_array hinzufügen
avg_array[4] = average_duration
return avg_array
def get_excel_workbook():
# Öffnen Sie eine neue Excel-Arbeitsmappe
workbook = openpyxl.Workbook()
return workbook
def get_excel_worksheet(workbook):
# Fügen Sie ein neues Arbeitsblatt hinzu
worksheet = workbook.active
return worksheet
def save_excel(workbook, config, config_knn):
now = datetime.now()
date_time = now.strftime("%d_%m_%y__%H_%M")
file_name = config_knn.optimization_method.name + "_" + config_knn.activation_function_1_units.name + "_" + \
file_name = config_knn.optimization_method.name + "_" + \
config_knn.activation_function_1_units.name + "_" + \
config_knn.activation_function_128_units.name + "_" + date_time + ".xlsx"
# Speichern Sie die Arbeitsmappe
workbook.save(config["result"]["excel_path"] + file_name)
# Abschlussbericht definieren
# TODO: Implementierung Evaluation mit Excluded Folder, Accuracy, Recall 1, Recall 2
# TODO: Abschluss Statistik nach Cross-Validation von einer Konfiguration: Durchschnitt Accuracy, Recall 1, Recall 2, Loss, Duration für Evaluation
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment