2025-11-12 17:34:59 +01:00

292 lines
8.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-11-12 17:18:24.255077: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
"2025-11-12 17:18:24.312342: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2025-11-12 17:18:25.689783: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import classification_report, confusion_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Synthetic Data Generation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# synthetic data generation of 2000 samples\n",
"X, y = make_classification(n_samples=2000,\n",
" n_features=20, \n",
" n_classes=2, \n",
" n_informative=15, \n",
" n_redundant=5, \n",
" random_state=42)\n",
"scaler = StandardScaler()\n",
"X = scaler.fit_transform(X)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train/Validation/Test Splits"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Split into train (64%), val (16%), test (20%)\n",
"X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model Setup"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Feed Forward Neural Network Initalization\n",
"model = tf.keras.Sequential([\n",
" tf.keras.layers.Dense(64, activation='relu', input_shape=(20,)),\n",
" tf.keras.layers.Dense(32, activation='relu'),\n",
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training Hyperparameters Setup"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#optimizer and loss setup\n",
"model.compile(\n",
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.05),\n",
" loss='binary_crossentropy',\n",
" metrics=['accuracy']\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Learning Rate Scheduler"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Learning rate scheduler\n",
"lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(\n",
" monitor='val_loss', # metric to monitor\n",
" factor=0.5, # reduce by a factor\n",
" patience=2, # wait 2 epochs before reducing LR\n",
" min_lr=1e-5, # don't reduce below this\n",
" verbose=1\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Early Stopping Logic"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# 3. Early stopping callback with patience and loss threshold\n",
"early_stop = tf.keras.callbacks.EarlyStopping(\n",
" monitor='val_loss',\n",
" patience=3,\n",
" min_delta=0.01, # minimum change to be considered an improvement\n",
" restore_best_weights=True,\n",
" verbose=1\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Model Training"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/100\n",
"40/40 - 1s - 29ms/step - accuracy: 0.8359 - loss: 0.3691 - val_accuracy: 0.9187 - val_loss: 0.2269 - learning_rate: 0.0500\n",
"Epoch 2/100\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9102 - loss: 0.2240 - val_accuracy: 0.9438 - val_loss: 0.1643 - learning_rate: 0.0500\n",
"Epoch 3/100\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9477 - loss: 0.1400 - val_accuracy: 0.9531 - val_loss: 0.1484 - learning_rate: 0.0500\n",
"Epoch 4/100\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9547 - loss: 0.1338 - val_accuracy: 0.9344 - val_loss: 0.1857 - learning_rate: 0.0500\n",
"Epoch 5/100\n",
"\n",
"Epoch 5: ReduceLROnPlateau reducing learning rate to 0.02500000037252903.\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9555 - loss: 0.1402 - val_accuracy: 0.9219 - val_loss: 0.1695 - learning_rate: 0.0500\n",
"Epoch 6/100\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9688 - loss: 0.0904 - val_accuracy: 0.9656 - val_loss: 0.1186 - learning_rate: 0.0250\n",
"Epoch 7/100\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9812 - loss: 0.0491 - val_accuracy: 0.9688 - val_loss: 0.1048 - learning_rate: 0.0250\n",
"Epoch 8/100\n",
"40/40 - 0s - 4ms/step - accuracy: 0.9922 - loss: 0.0317 - val_accuracy: 0.9563 - val_loss: 0.1213 - learning_rate: 0.0250\n",
"Epoch 9/100\n",
"\n",
"Epoch 9: ReduceLROnPlateau reducing learning rate to 0.012500000186264515.\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9922 - loss: 0.0220 - val_accuracy: 0.9625 - val_loss: 0.1212 - learning_rate: 0.0250\n",
"Epoch 10/100\n",
"40/40 - 0s - 3ms/step - accuracy: 0.9953 - loss: 0.0177 - val_accuracy: 0.9563 - val_loss: 0.1283 - learning_rate: 0.0125\n",
"Epoch 10: early stopping\n",
"Restoring model weights from the end of the best epoch: 7.\n"
]
},
{
"data": {
"text/plain": [
"<keras.src.callbacks.history.History at 0x7f6c3ff320c0>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 4. Train the model\n",
"model.fit(\n",
" X_train, y_train,\n",
" validation_data=(X_val, y_val),\n",
" epochs=100,\n",
" callbacks=[early_stop, lr_scheduler], # your custom early stopping + LR scheduler\n",
" verbose=2\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"evaluation metrics"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m13/13\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 4ms/step \n",
"\n",
" Test Set Evaluation:\n",
" precision recall f1-score support\n",
"\n",
" 0 0.95 0.99 0.97 207\n",
" 1 0.99 0.95 0.97 193\n",
"\n",
" accuracy 0.97 400\n",
" macro avg 0.97 0.97 0.97 400\n",
"weighted avg 0.97 0.97 0.97 400\n",
"\n",
"Confusion Matrix:\n",
"[[205 2]\n",
" [ 10 183]]\n"
]
}
],
"source": [
"# 5. Evaluate on test set\n",
"y_pred_probs = model.predict(X_test).flatten()\n",
"y_pred = (y_pred_probs >= 0.5).astype(int)\n",
"\n",
"\n",
"print(\"\\n Test Set Evaluation:\")\n",
"print(classification_report(y_test, y_pred))\n",
"print(\"Confusion Matrix:\")\n",
"print(confusion_matrix(y_test, y_pred))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}