init
This commit is contained in:
commit
e783ffa2cc
21
LICENSE
Executable file
21
LICENSE
Executable file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Dmitri Ollari Ischimji
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
10
README.md
Executable file
10
README.md
Executable file
@ -0,0 +1,10 @@
|
||||
# Womb Wise
|
||||
|
||||
Progetto di ricerca svolto con L'ospedale di Parma per una tesi.
|
||||
Ho svolto la parte di machine learning.
|
||||
Il compito principale era capire se dei video di feti contenevano o meno determinate azioni come sbadigli.
|
||||
Essendo un movimento complesso e prolungato nel tempo ho dovuto approfondire il discorso delle time series.
|
||||
|
||||
Ho provveduto ad eliminare i dati di input e output per evitare problematiche legate alla privacy.
|
||||
|
||||
Kanopo
|
||||
2
fetus-event-detection-classification/.gitignore
vendored
Executable file
2
fetus-event-detection-classification/.gitignore
vendored
Executable file
@ -0,0 +1,2 @@
|
||||
|
||||
./weights
|
||||
1314
fetus-event-detection-classification/poetry.lock
generated
Executable file
1314
fetus-event-detection-classification/poetry.lock
generated
Executable file
File diff suppressed because it is too large
Load Diff
24
fetus-event-detection-classification/pyproject.toml
Executable file
24
fetus-event-detection-classification/pyproject.toml
Executable file
@ -0,0 +1,24 @@
|
||||
[tool.poetry]
|
||||
name = "fetus-event-detection-classification"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["kanopo <dmitri.ollari@protonmail.com>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
package-mode = false
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
pandas = "^2.2.2"
|
||||
matplotlib = "^3.9.1"
|
||||
torch = "^2.3.1"
|
||||
seaborn = "^0.13.2"
|
||||
imblearn = "^0.0"
|
||||
imbalanced-learn = "^0.12.3"
|
||||
torchmetrics = "^1.4.0.post0"
|
||||
tqdm = "^4.66.4"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
340
fetus-event-detection-classification/src/load_dataset.py
Executable file
340
fetus-event-detection-classification/src/load_dataset.py
Executable file
@ -0,0 +1,340 @@
|
||||
import os
|
||||
import warnings
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import re
|
||||
from typing import List, Dict
|
||||
|
||||
warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
|
||||
|
||||
def remove_dot_files(files: list) -> list:
|
||||
return [file for file in files if not file.startswith(".")]
|
||||
|
||||
|
||||
def get_dataset(path: str, data_type: str) -> pd.DataFrame:
|
||||
path_base_dir = os.path.expanduser(path)
|
||||
dirs = []
|
||||
tmp = os.listdir(path_base_dir)
|
||||
tmp = remove_dot_files(tmp)
|
||||
dirs.extend(tmp)
|
||||
|
||||
dataset = pd.DataFrame(
|
||||
columns=[
|
||||
"image_name", # string value
|
||||
"leftLip_x", # float value
|
||||
"leftLip_y", # float value
|
||||
"rightLip_x", # float value
|
||||
"rightLip_y", # float value
|
||||
"topMidInner_x", # float value
|
||||
"topMidInner_y", # float value
|
||||
"bottomMidInner_x", # float value
|
||||
"bottomMidInner_y", # float value
|
||||
"nose_x", # float value
|
||||
"nose_y", # float value
|
||||
"test",
|
||||
"frame",
|
||||
"label",
|
||||
# "isFetus",
|
||||
]
|
||||
)
|
||||
|
||||
for root, dirs, _ in os.walk(path_base_dir):
|
||||
|
||||
for d in dirs:
|
||||
splitted = root.split("/")
|
||||
path = os.path.join(root, d)
|
||||
fetus_name = splitted[-2]
|
||||
fetus_action = splitted[-3]
|
||||
|
||||
if data_type == "baseline":
|
||||
if re.search(r"baseline", d) or re.search(r"Baseline", d):
|
||||
for f in remove_dot_files(os.listdir(path)):
|
||||
if ".png" in f and fetus_action.lower() == data_type:
|
||||
pass
|
||||
|
||||
elif ".csv" in f and fetus_action.lower() == data_type:
|
||||
p = os.path.join(path, f)
|
||||
data = pd.read_csv(p)
|
||||
data.columns = data.iloc[0]
|
||||
data = data.drop([0, 1])
|
||||
data = data.iloc[:, 2:]
|
||||
|
||||
data.columns = [
|
||||
"image_name",
|
||||
"leftLip_x",
|
||||
"leftLip_y",
|
||||
"rightLip_x",
|
||||
"rightLip_y",
|
||||
"topMidInner_x",
|
||||
"topMidInner_y",
|
||||
"bottomMidInner_x",
|
||||
"bottomMidInner_y",
|
||||
"nose_x",
|
||||
"nose_y",
|
||||
]
|
||||
|
||||
data = data.dropna()
|
||||
data["test"] = fetus_name.split("_")[1]
|
||||
data["frame"] = p.split("/")[-2].split("_")[-1]
|
||||
data["label"] = data_type
|
||||
|
||||
image_name = data["image_name"].apply(
|
||||
lambda x: x.split(".")[0].split("img")[1]
|
||||
)
|
||||
|
||||
image_name = image_name.apply(lambda x: x.zfill(4))
|
||||
|
||||
data["image_name"] = image_name
|
||||
|
||||
data = calculate_distance(data)
|
||||
|
||||
dataset = pd.concat([dataset, data])
|
||||
|
||||
elif data_type == "yawn":
|
||||
if re.search(r"yawn", d) or re.search(r"Yawn", d):
|
||||
for f in remove_dot_files(os.listdir(path)):
|
||||
if ".png" in f and fetus_action.lower() == data_type:
|
||||
pass
|
||||
|
||||
elif ".csv" in f and fetus_action.lower() == data_type:
|
||||
p = os.path.join(path, f)
|
||||
data = pd.read_csv(p)
|
||||
data.columns = data.iloc[0]
|
||||
data = data.drop([0, 1])
|
||||
data = data.iloc[:, 2:]
|
||||
|
||||
data.columns = [
|
||||
"image_name",
|
||||
"leftLip_x",
|
||||
"leftLip_y",
|
||||
"rightLip_x",
|
||||
"rightLip_y",
|
||||
"topMidInner_x",
|
||||
"topMidInner_y",
|
||||
"bottomMidInner_x",
|
||||
"bottomMidInner_y",
|
||||
"nose_x",
|
||||
"nose_y",
|
||||
]
|
||||
|
||||
data = data.dropna()
|
||||
data["test"] = fetus_name.split("_")[1]
|
||||
data["frame"] = p.split("/")[-2].split("_")[-1]
|
||||
data["label"] = data_type
|
||||
|
||||
image_name = data["image_name"].apply(
|
||||
lambda x: x.split(".")[0].split("img")[1]
|
||||
)
|
||||
|
||||
image_name = image_name.apply(lambda x: x.zfill(4))
|
||||
|
||||
data["image_name"] = image_name
|
||||
data = calculate_distance(data)
|
||||
|
||||
dataset = pd.concat([dataset, data])
|
||||
elif data_type == "opcl":
|
||||
if re.search(r"opcl", d) or re.search(r"Opcl", d):
|
||||
for f in remove_dot_files(os.listdir(path)):
|
||||
if ".png" in f and fetus_action.lower() == data_type:
|
||||
pass
|
||||
|
||||
elif ".csv" in f and fetus_action.lower() == data_type:
|
||||
p = os.path.join(path, f)
|
||||
data = pd.read_csv(p)
|
||||
data.columns = data.iloc[0]
|
||||
data = data.drop([0, 1])
|
||||
data = data.iloc[:, 2:]
|
||||
|
||||
data.columns = [
|
||||
"image_name",
|
||||
"leftLip_x",
|
||||
"leftLip_y",
|
||||
"rightLip_x",
|
||||
"rightLip_y",
|
||||
"topMidInner_x",
|
||||
"topMidInner_y",
|
||||
"bottomMidInner_x",
|
||||
"bottomMidInner_y",
|
||||
"nose_x",
|
||||
"nose_y",
|
||||
]
|
||||
|
||||
data = data.dropna()
|
||||
data["test"] = fetus_name.split("_")[1]
|
||||
data["frame"] = p.split("/")[-2].split("_")[-1]
|
||||
data["label"] = data_type
|
||||
|
||||
image_name = data["image_name"].apply(
|
||||
lambda x: x.split(".")[0].split("img")[1]
|
||||
)
|
||||
|
||||
image_name = image_name.apply(lambda x: x.zfill(4))
|
||||
|
||||
data["image_name"] = image_name
|
||||
data = calculate_distance(data)
|
||||
|
||||
dataset = pd.concat([dataset, data])
|
||||
|
||||
dataset = dataset.dropna()
|
||||
dataset = dataset.reset_index(drop=True)
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def euclidean_distance(x1, y1, x2, y2) -> float:
|
||||
x1 = float(x1)
|
||||
y1 = float(y1)
|
||||
x2 = float(x2)
|
||||
y2 = float(y2)
|
||||
|
||||
return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
||||
|
||||
|
||||
def calculate_distance(dataset: pd.DataFrame) -> pd.DataFrame:
|
||||
dataset["top_bottom_distance"] = dataset.apply(
|
||||
lambda x: euclidean_distance(
|
||||
x["topMidInner_x"],
|
||||
x["topMidInner_y"],
|
||||
x["bottomMidInner_x"],
|
||||
x["bottomMidInner_y"],
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
return dataset
|
||||
|
||||
|
||||
def split_dataset(
|
||||
dataset: pd.DataFrame, classes, train_optimal_size, test_optimal_size, series_length
|
||||
) -> List[Dict[np.ndarray, int]]:
|
||||
dataset["label"] = dataset["label"].apply(lambda x: classes.index(x))
|
||||
|
||||
total_dataset: np.ndarray = []
|
||||
|
||||
grouped = dataset.groupby(by=["fetus", "frame", "label"])
|
||||
for name, group in grouped:
|
||||
df = group.reset_index(drop=True, inplace=False)
|
||||
# remove a col
|
||||
label = df.pop("label")
|
||||
frame = df.pop("frame")
|
||||
fetus = df.pop("fetus")
|
||||
distance = df.pop("top_bottom_distance")
|
||||
|
||||
df = df.sort_values(by="image_name")
|
||||
df.pop("image_name")
|
||||
df = df.reset_index(drop=True, inplace=False)
|
||||
x = df.values
|
||||
# x = distance.values
|
||||
y = label.values[0]
|
||||
|
||||
if x.shape[0] < series_length:
|
||||
missing_cols = series_length - x.shape[0]
|
||||
if len(x.shape) == 1:
|
||||
new_matrix = np.full((missing_cols), x[0])
|
||||
else:
|
||||
new_matrix = np.zeros((missing_cols, x.shape[1]))
|
||||
x = np.concatenate((x, new_matrix), axis=0)
|
||||
|
||||
if x.shape[0] > series_length:
|
||||
if len(x.shape) == 1:
|
||||
x = x[:series_length]
|
||||
else:
|
||||
x = x[:series_length, :]
|
||||
|
||||
total_dataset.append({"x": x, "y": y})
|
||||
|
||||
# if y.shape[0] < series_length:
|
||||
# missing_cols = series_length - y.shape[0]
|
||||
# new_matrix = np.full((missing_cols), y[0])
|
||||
# y = np.concatenate((y, new_matrix), axis=0)
|
||||
#
|
||||
# if y.shape[0] > series_length:
|
||||
# y = y[:series_length]
|
||||
|
||||
total_yawn_count = 0
|
||||
total_baseline_count = 0
|
||||
total_opcl_count = 0
|
||||
for i in range(len(total_dataset)):
|
||||
if total_dataset[i]["y"] == 0:
|
||||
total_baseline_count += 1
|
||||
|
||||
if total_dataset[i]["y"] == 1:
|
||||
total_opcl_count += 1
|
||||
|
||||
if total_dataset[i]["y"] == 2:
|
||||
total_yawn_count += 1
|
||||
|
||||
train: List[Dict[np.ndarray, int]] = []
|
||||
test: List[Dict[np.ndarray, int]] = []
|
||||
# val: List[Dict[np.ndarray, int]] = []
|
||||
|
||||
train_yawn_optimal_size = int(total_yawn_count * train_optimal_size)
|
||||
test_yawn_optimal_size = int(total_yawn_count * test_optimal_size)
|
||||
|
||||
train_baseline_optimal_size = int(total_baseline_count * train_optimal_size)
|
||||
test_baseline_optimal_size = int(total_baseline_count * test_optimal_size)
|
||||
|
||||
train_opcl_optimal_size = int(total_opcl_count * train_optimal_size)
|
||||
test_opcl_optimal_size = int(total_opcl_count * test_optimal_size)
|
||||
|
||||
train_yawn: List[Dict[np.ndarray, int]] = []
|
||||
test_yawn: List[Dict[np.ndarray, int]] = []
|
||||
# val_yawn: List[Dict[np.ndarray, int]] = []
|
||||
|
||||
train_baseline: List[Dict[np.ndarray, int]] = []
|
||||
test_baseline: List[Dict[np.ndarray, int]] = []
|
||||
# val_baseline: List[Dict[np.ndarray, int]] = []
|
||||
|
||||
train_opcl: List[Dict[np.ndarray, int]] = []
|
||||
test_opcl: List[Dict[np.ndarray, int]] = []
|
||||
# val_opcl: List[Dict[np.ndarray, int]] = []
|
||||
|
||||
for data in total_dataset:
|
||||
if data["y"] == 2:
|
||||
# yawn
|
||||
if len(train_yawn) < train_yawn_optimal_size:
|
||||
train_yawn.append({"data": data["x"], "label": data["y"]})
|
||||
else:
|
||||
test_yawn.append({"data": data["x"], "label": data["y"]})
|
||||
# elif (
|
||||
# len(val_yawn)
|
||||
# < total_yawn_count - train_yawn_optimal_size - test_yawn_optimal_size
|
||||
# ):
|
||||
# val_yawn.append({"data": data["x"], "label": data["y"]})
|
||||
elif data["y"] == 0:
|
||||
# baseline
|
||||
if len(train_baseline) < train_baseline_optimal_size:
|
||||
train_baseline.append({"data": data["x"], "label": data["y"]})
|
||||
else:
|
||||
test_baseline.append({"data": data["x"], "label": data["y"]})
|
||||
# elif (
|
||||
# len(val_baseline)
|
||||
# < total_baseline_count
|
||||
# - train_baseline_optimal_size
|
||||
# - test_baseline_optimal_size
|
||||
# ):
|
||||
# val_baseline.append({"data": data["x"], "label": data["y"]})
|
||||
elif data["y"] == 1:
|
||||
# opcl
|
||||
if len(train_opcl) < train_opcl_optimal_size:
|
||||
train_opcl.append({"data": data["x"], "label": data["y"]})
|
||||
else:
|
||||
test_opcl.append({"data": data["x"], "label": data["y"]})
|
||||
# elif (
|
||||
# len(val_opcl)
|
||||
# < total_opcl_count - train_opcl_optimal_size - test_opcl_optimal_size
|
||||
# ):
|
||||
# val_opcl.append({"data": data["x"], "label": data["y"]})
|
||||
else:
|
||||
print("[ERROR] Invalid class label during split")
|
||||
break
|
||||
|
||||
train += train_yawn + train_baseline + train_opcl
|
||||
test += test_yawn + test_baseline + test_opcl
|
||||
# val += val_yawn + val_baseline + val_opcl
|
||||
|
||||
np.random.shuffle(train)
|
||||
np.random.shuffle(test)
|
||||
# np.random.shuffle(val)
|
||||
|
||||
return train, test
|
||||
789
fetus-event-detection-classification/src/main.py
Executable file
789
fetus-event-detection-classification/src/main.py
Executable file
@ -0,0 +1,789 @@
|
||||
#!/usr/bin/env python3.12
|
||||
|
||||
import os
|
||||
import warnings
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import List, Dict
|
||||
from matplotlib import pyplot as plt
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import seaborn as sns
|
||||
import argparse
|
||||
from imblearn.over_sampling import RandomOverSampler
|
||||
from imblearn.under_sampling import RandomUnderSampler
|
||||
from collections import Counter
|
||||
from sklearn.model_selection import StratifiedKFold
|
||||
from sklearn.model_selection import train_test_split
|
||||
from datetime import datetime
|
||||
|
||||
from load_dataset import get_dataset
|
||||
from model import SimpleLSTM
|
||||
from training import training_loop
|
||||
from validation import validation
|
||||
|
||||
warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
|
||||
|
||||
def setup_model_training(
|
||||
input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
num_classes,
|
||||
sequence_length,
|
||||
device,
|
||||
lr,
|
||||
weight_decay,
|
||||
eps,
|
||||
):
|
||||
model = SimpleLSTM(
|
||||
input_size=input_size,
|
||||
hidden_size=hidden_size,
|
||||
num_layers=num_layers,
|
||||
num_classes=num_classes,
|
||||
sequence_length=sequence_length,
|
||||
device=device,
|
||||
)
|
||||
|
||||
optimizer = optim.Adam(
|
||||
model.parameters(), lr=lr, weight_decay=weight_decay, eps=eps
|
||||
)
|
||||
|
||||
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer, mode="min", factor=0.1, patience=25
|
||||
)
|
||||
|
||||
# criterion = nn.CrossEntropyLoss()
|
||||
# criterion = nn.BCELoss()
|
||||
criterion = nn.BCEWithLogitsLoss()
|
||||
|
||||
return (model, optimizer, scheduler, criterion)
|
||||
|
||||
|
||||
def create_loaders(
|
||||
x, y, data, under=False, over=False, classes=["base", "yawn"], batch_size=2
|
||||
):
|
||||
if under is True:
|
||||
x, y = undersample(x, y, data)
|
||||
|
||||
if over is True:
|
||||
x, y = oversample(x, y, data)
|
||||
|
||||
x_train, x_test, y_train, y_test = train_test_split(
|
||||
x, y, test_size=TEST_OPTIMAL_SIZE, random_state=seed
|
||||
)
|
||||
|
||||
train_dataset = FetusDataset(
|
||||
[{"data": x, "label": y} for x, y in zip(x_train, y_train)],
|
||||
train=True,
|
||||
classes=len(classes),
|
||||
)
|
||||
|
||||
test_dataset = FetusDataset(
|
||||
[{"data": x, "label": y} for x, y in zip(x_test, y_test)],
|
||||
train=True,
|
||||
classes=len(classes),
|
||||
)
|
||||
|
||||
train_loader = DataLoader(
|
||||
train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
)
|
||||
|
||||
test_loader = DataLoader(
|
||||
test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
)
|
||||
|
||||
return (train_loader, test_loader)
|
||||
|
||||
|
||||
def get_device() -> torch.device:
|
||||
if torch.backends.mps.is_built():
|
||||
return torch.device("mps")
|
||||
|
||||
if torch.cuda.is_available():
|
||||
return torch.device("cuda")
|
||||
|
||||
return torch.device("cpu")
|
||||
|
||||
|
||||
def undersample(x: np.ndarray, y: np.ndarray, data):
|
||||
print("Before undersampling")
|
||||
print(Counter([d["label"] for d in data]))
|
||||
|
||||
rus = RandomUnderSampler(random_state=seed, sampling_strategy="majority")
|
||||
x = np.array([d["data"] for d in data])
|
||||
y = np.array([d["label"] for d in data])
|
||||
flat_x = np.array([x.flatten() for x in x])
|
||||
flat_y = np.array(y)
|
||||
x, y = rus.fit_resample(flat_x, flat_y)
|
||||
x = x.reshape(-1, SERIES_LENGTH, FEATURE_SIZE)
|
||||
|
||||
print("After undersampling")
|
||||
print(Counter(y))
|
||||
return x, y
|
||||
|
||||
|
||||
def oversample(x: np.ndarray, y: np.ndarray, data):
|
||||
print("Before oversampling")
|
||||
print(Counter([d["label"] for d in data]))
|
||||
|
||||
ros = RandomOverSampler(random_state=seed, sampling_strategy="all")
|
||||
x = np.array([d["data"] for d in data])
|
||||
y = np.array([d["label"] for d in data])
|
||||
flat_x = np.array([x.flatten() for x in x])
|
||||
flat_y = np.array(y)
|
||||
x, y = ros.fit_resample(flat_x, flat_y)
|
||||
x = x.reshape(-1, SERIES_LENGTH, FEATURE_SIZE)
|
||||
|
||||
print("After oversampling")
|
||||
print(Counter(y))
|
||||
return x, y
|
||||
|
||||
|
||||
class FetusDataset(Dataset):
|
||||
def __init__(
|
||||
self, data: List[Dict[np.ndarray, int]], train: bool = False, classes: int = 2
|
||||
):
|
||||
self.data = data
|
||||
self.train = train
|
||||
self.classes = classes
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
x = self.data[idx]["data"]
|
||||
y = self.data[idx]["label"]
|
||||
|
||||
# Conversione del tipo di dato
|
||||
x = x.astype(np.float32)
|
||||
y = np.eye(self.classes)[y]
|
||||
|
||||
# Conversione in tensori
|
||||
x = torch.tensor(x, dtype=torch.float32)
|
||||
y = torch.tensor(y, dtype=torch.int32)
|
||||
|
||||
# Gestione di valori NaN o infiniti
|
||||
x = torch.nan_to_num(
|
||||
x
|
||||
) # Sostituisce NaN con 0 e valori infiniti con numeri molto grandi o piccoli
|
||||
|
||||
# Normalizzazione solo durante il training
|
||||
if self.train:
|
||||
mean = x.mean()
|
||||
std = x.std()
|
||||
|
||||
# Normalizzazione condizionale (solo se std > 0)
|
||||
if std > 0:
|
||||
x = (x - mean) / std
|
||||
|
||||
return x, y
|
||||
|
||||
|
||||
def createArgParser():
|
||||
parser = argparse.ArgumentParser(description="Womb Wise")
|
||||
parser.add_argument(
|
||||
"-rd",
|
||||
"--reload-dataset",
|
||||
action="store_true",
|
||||
help="Reload the dataset",
|
||||
)
|
||||
|
||||
# path to the dataset
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--path",
|
||||
action="store",
|
||||
help="Path to the dataset",
|
||||
default="~/Documents/womb-wise/Data/",
|
||||
)
|
||||
|
||||
# epoch
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--epochs",
|
||||
action="store",
|
||||
help="Number of epochs",
|
||||
default=10,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--kfold",
|
||||
action="store",
|
||||
help="Number of folds for kfold cross validation",
|
||||
default=1,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--oversampling",
|
||||
action="store_true",
|
||||
help="Apply oversampling",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--undersampling",
|
||||
action="store_true",
|
||||
help="Apply undersampling",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--dataset",
|
||||
action="store",
|
||||
default="all",
|
||||
choices=["all", "fetus", "mother", "fetus-mother", "mother-fetus"],
|
||||
help="Choose the dataset: all, fetus, mother or train with mother and test with fetus or viceversa",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(
|
||||
f"""
|
||||
ARGS:
|
||||
\n
|
||||
reload-dataset: {args.reload_dataset}
|
||||
path: {args.path}
|
||||
epochs: {args.epochs}
|
||||
kfold: {args.kfold}
|
||||
oversampling: {args.oversampling}
|
||||
undersampling: {args.undersampling}
|
||||
dataset: {args.dataset}
|
||||
"""
|
||||
)
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
CLASSES = ["baseline", "opcl", "yawn"]
|
||||
FEATURE_SIZE = 10
|
||||
SERIES_LENGTH = 60
|
||||
# SINGLE_FRAME_LENGTH = FEATURE_SIZE * SERIES_LENGTH
|
||||
BATCH_SIZE = 4
|
||||
WEIGHT_DECAY = 1e-5
|
||||
LEARNING_RATE = 1e-3
|
||||
TEST_OPTIMAL_SIZE = 0.2
|
||||
HIDDEN_SIZE = 256
|
||||
DROP_OUT = 0.0
|
||||
NUM_LAYERS = 2
|
||||
EPS = 1e-7
|
||||
|
||||
# TEST_NAME = "0_k1_all"
|
||||
# TEST_NAME = "1_k1_fetus"
|
||||
# TEST_NAME = "2_k1_mother"
|
||||
# TEST_NAME = "3_k1_mother_fetus"
|
||||
# TEST_NAME = "4_k1_fetus_mother"
|
||||
# TEST_NAME = "5_k5_all"
|
||||
TEST_NAME = "6_k5_fetus"
|
||||
# TEST_NAME = "7_k5_mother"
|
||||
|
||||
if not os.path.exists("output/" + TEST_NAME):
|
||||
os.makedirs("output/" + TEST_NAME)
|
||||
|
||||
if not os.path.exists("output/" + TEST_NAME + "/weights"):
|
||||
os.makedirs("output/" + TEST_NAME + "/weights")
|
||||
|
||||
if not os.path.exists("output/" + TEST_NAME + "/confusion_matrix"):
|
||||
os.makedirs("output/" + TEST_NAME + "/confusion_matrix")
|
||||
|
||||
if not os.path.exists("output/" + TEST_NAME + "/metrics"):
|
||||
os.makedirs("output/" + TEST_NAME + "/metrics")
|
||||
|
||||
# fix the seed
|
||||
seed = 42
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
|
||||
device = get_device()
|
||||
args = createArgParser()
|
||||
|
||||
PATH = args.path
|
||||
EPOCHS = int(args.epochs)
|
||||
K_FOLD = int(args.kfold)
|
||||
OVER_SAMPLING = args.oversampling
|
||||
UNDER_SAMPLING = args.undersampling
|
||||
EARLY_STOPPING = True
|
||||
DATASET_TYPE = args.dataset
|
||||
|
||||
if os.path.exists("dataset.csv") and args.reload_dataset is False:
|
||||
dataset = pd.read_csv("dataset.csv")
|
||||
mother = pd.read_csv("mother.csv")
|
||||
fetus = pd.read_csv("fetus.csv")
|
||||
else:
|
||||
baseline_fetus = get_dataset(
|
||||
"~/Documents/kanopo/womb-wise/Data/Ultrasound_Scans/tracked_frames/",
|
||||
"baseline",
|
||||
)
|
||||
yawn_fetus = get_dataset(
|
||||
"~/Documents/kanopo/womb-wise/Data/Ultrasound_Scans/tracked_frames/",
|
||||
"yawn",
|
||||
)
|
||||
opcl_fetus = get_dataset(
|
||||
"~/Documents/kanopo/womb-wise/Data/Ultrasound_Scans/tracked_frames/",
|
||||
"opcl",
|
||||
)
|
||||
|
||||
fetus = pd.concat([baseline_fetus, yawn_fetus, opcl_fetus])
|
||||
|
||||
baseline_mother = get_dataset(
|
||||
"~/Documents/kanopo/womb-wise/Data/Mothers_videos/Tracked/",
|
||||
"baseline",
|
||||
)
|
||||
yawn_mother = get_dataset(
|
||||
"~/Documents/kanopo/womb-wise/Data/Mothers_videos/Tracked/",
|
||||
"yawn",
|
||||
)
|
||||
opcl_mother = get_dataset(
|
||||
"~/Documents/kanopo/womb-wise/Data/Mothers_videos/Tracked/",
|
||||
"opcl",
|
||||
)
|
||||
|
||||
mother = pd.concat([baseline_mother, yawn_mother, opcl_mother])
|
||||
fetus["type"] = "fetus"
|
||||
mother["type"] = "mother"
|
||||
|
||||
fetus.to_csv("fetus.csv")
|
||||
mother.to_csv("mother.csv")
|
||||
|
||||
dataset = pd.concat([mother, fetus])
|
||||
dataset.to_csv("dataset.csv")
|
||||
|
||||
|
||||
|
||||
mother = mother.drop(columns=["top_bottom_distance"])
|
||||
fetus = fetus.drop(columns=["top_bottom_distance"])
|
||||
dataset = dataset.drop(columns=["top_bottom_distance"])
|
||||
grouped_dataset = dataset.groupby(["label", "frame", "test", "type"])
|
||||
grouped_mother = mother.groupby(["label", "frame", "test"])
|
||||
grouped_fetus = fetus.groupby(["label", "frame", "test"])
|
||||
|
||||
data: List[Dict[np.ndarray, int]] = []
|
||||
mother_data: List[Dict[np.ndarray, int]] = []
|
||||
fetus_data: List[Dict[np.ndarray, int]] = []
|
||||
|
||||
for name, group in grouped_mother:
|
||||
label = group["label"]
|
||||
frame = group["frame"]
|
||||
test = group["test"]
|
||||
|
||||
group = group.drop(columns=["test", "frame", "label", "type"])
|
||||
|
||||
group.set_index("image_name", inplace=True)
|
||||
|
||||
if group.columns[0] == "Unnamed: 0":
|
||||
group = group.drop(columns=["Unnamed: 0"])
|
||||
|
||||
group = group.to_numpy()
|
||||
|
||||
if group.shape[0] < SERIES_LENGTH:
|
||||
group = np.vstack(
|
||||
[group, np.zeros((SERIES_LENGTH - group.shape[0], FEATURE_SIZE))]
|
||||
)
|
||||
|
||||
elif group.shape[0] > SERIES_LENGTH:
|
||||
group = group[:SERIES_LENGTH]
|
||||
|
||||
group = group.astype(np.float32)
|
||||
|
||||
label = CLASSES.index(label.iat[0])
|
||||
mother_data.append(
|
||||
{
|
||||
"data": group,
|
||||
"label": label,
|
||||
}
|
||||
)
|
||||
|
||||
for name, group in grouped_fetus:
|
||||
label = group["label"]
|
||||
frame = group["frame"]
|
||||
test = group["test"]
|
||||
|
||||
group = group.drop(columns=["test", "frame", "label", "type"])
|
||||
|
||||
group.set_index("image_name", inplace=True)
|
||||
|
||||
if group.columns[0] == "Unnamed: 0":
|
||||
group = group.drop(columns=["Unnamed: 0"])
|
||||
|
||||
group = group.to_numpy()
|
||||
|
||||
if group.shape[0] < SERIES_LENGTH:
|
||||
group = np.vstack(
|
||||
[group, np.zeros((SERIES_LENGTH - group.shape[0], FEATURE_SIZE))]
|
||||
)
|
||||
|
||||
elif group.shape[0] > SERIES_LENGTH:
|
||||
group = group[:SERIES_LENGTH]
|
||||
|
||||
group = group.astype(np.float32)
|
||||
|
||||
label = CLASSES.index(label.iat[0])
|
||||
fetus_data.append(
|
||||
{
|
||||
"data": group,
|
||||
"label": label,
|
||||
}
|
||||
)
|
||||
|
||||
for name, group in grouped_dataset:
|
||||
label = group["label"]
|
||||
frame = group["frame"]
|
||||
test = group["test"]
|
||||
data_type = group["type"]
|
||||
|
||||
group = group.drop(columns=["test", "frame", "label", "type"])
|
||||
|
||||
group.set_index("image_name", inplace=True)
|
||||
|
||||
if group.columns[0] == "Unnamed: 0":
|
||||
group = group.drop(columns=["Unnamed: 0"])
|
||||
|
||||
group = group.to_numpy()
|
||||
|
||||
if group.shape[0] < SERIES_LENGTH:
|
||||
group = np.vstack(
|
||||
[group, np.zeros((SERIES_LENGTH - group.shape[0], FEATURE_SIZE))]
|
||||
)
|
||||
|
||||
elif group.shape[0] > SERIES_LENGTH:
|
||||
group = group[:SERIES_LENGTH]
|
||||
|
||||
group = group.astype(np.float32)
|
||||
|
||||
label = CLASSES.index(label.iat[0])
|
||||
data.append(
|
||||
{
|
||||
"data": group,
|
||||
"label": label,
|
||||
}
|
||||
)
|
||||
|
||||
if K_FOLD == 1:
|
||||
|
||||
x_all = [d["data"] for d in data]
|
||||
y_all = [d["label"] for d in data]
|
||||
|
||||
x_mother = [d["data"] for d in mother_data]
|
||||
y_mother = [d["label"] for d in mother_data]
|
||||
|
||||
x_fetus = [d["data"] for d in fetus_data]
|
||||
y_fetus = [d["label"] for d in fetus_data]
|
||||
|
||||
(train_loader_all, test_loader_all) = create_loaders(
|
||||
x_all,
|
||||
y_all,
|
||||
data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(train_loader_mother, test_loader_mother) = create_loaders(
|
||||
x_mother,
|
||||
y_mother,
|
||||
mother_data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(train_loader_fetus, test_loader_fetus) = create_loaders(
|
||||
x_fetus,
|
||||
y_fetus,
|
||||
fetus_data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(model, optimizer, scheduler, criterion) = setup_model_training(
|
||||
input_size=FEATURE_SIZE,
|
||||
hidden_size=HIDDEN_SIZE,
|
||||
num_layers=NUM_LAYERS,
|
||||
num_classes=len(CLASSES),
|
||||
sequence_length=SERIES_LENGTH,
|
||||
device=device,
|
||||
lr=LEARNING_RATE,
|
||||
weight_decay=WEIGHT_DECAY,
|
||||
eps=EPS,
|
||||
)
|
||||
|
||||
if DATASET_TYPE == "all":
|
||||
train_loader = train_loader_all
|
||||
test_loader = test_loader_all
|
||||
|
||||
elif DATASET_TYPE == "fetus":
|
||||
train_loader = train_loader_fetus
|
||||
test_loader = test_loader_fetus
|
||||
|
||||
elif DATASET_TYPE == "mother":
|
||||
train_loader = train_loader_mother
|
||||
test_loader = test_loader_mother
|
||||
|
||||
elif DATASET_TYPE == "fetus-mother":
|
||||
train_loader = train_loader_fetus
|
||||
test_loader = test_loader_mother
|
||||
|
||||
elif DATASET_TYPE == "mother-fetus":
|
||||
train_loader = train_loader_mother
|
||||
test_loader = test_loader_fetus
|
||||
|
||||
else:
|
||||
Exception("Invalid dataset type")
|
||||
|
||||
trained_model = training_loop(
|
||||
model=model,
|
||||
train_loader=train_loader,
|
||||
test_loader=test_loader,
|
||||
optimizer=optimizer,
|
||||
scheduler=scheduler,
|
||||
criterion=criterion,
|
||||
device=device,
|
||||
epochs=EPOCHS,
|
||||
early_stopping=EARLY_STOPPING,
|
||||
log_dir="output/" + TEST_NAME + "/metrics",
|
||||
)
|
||||
|
||||
loss, conf_matrix, classification_rep = validation(
|
||||
trained_model,
|
||||
test_loader,
|
||||
criterion,
|
||||
device,
|
||||
)
|
||||
|
||||
# save classification report to a file
|
||||
df = pd.DataFrame(classification_rep).transpose()
|
||||
df.to_csv("output/" + TEST_NAME + "/metrics/classification_report.csv")
|
||||
|
||||
torch.save(
|
||||
trained_model.state_dict(),
|
||||
"output/" + TEST_NAME + "/weights/model.pth",
|
||||
)
|
||||
|
||||
plt.figure(figsize=(19.20, 10.80))
|
||||
plt.title("Confusion Matrix")
|
||||
sns.heatmap(
|
||||
conf_matrix,
|
||||
annot=True,
|
||||
fmt=".2f",
|
||||
xticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
yticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
cmap="viridis",
|
||||
)
|
||||
|
||||
plt.xlabel("Predicted")
|
||||
plt.ylabel("Actual")
|
||||
|
||||
plt.savefig("output/" + TEST_NAME + "/confusion_matrix/confusion_matrix.png")
|
||||
|
||||
plt.figure(figsize=(19.20, 10.80))
|
||||
plt.title("Confusion Matrix Percentage")
|
||||
conf_matrix_percent = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] * 100
|
||||
sns.heatmap(
|
||||
conf_matrix_percent,
|
||||
annot=True,
|
||||
fmt=".2f",
|
||||
xticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
yticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
cmap="viridis",
|
||||
)
|
||||
|
||||
plt.xlabel("Predicted")
|
||||
plt.ylabel("Actual")
|
||||
|
||||
plt.savefig("output/" + TEST_NAME + "/confusion_matrix/confusion_matrix_percentage.png")
|
||||
|
||||
else:
|
||||
x_all = [d["data"] for d in data]
|
||||
y_all = [d["label"] for d in data]
|
||||
|
||||
x_mother = [d["data"] for d in mother_data]
|
||||
y_mother = [d["label"] for d in mother_data]
|
||||
|
||||
x_fetus = [d["data"] for d in fetus_data]
|
||||
y_fetus = [d["label"] for d in fetus_data]
|
||||
|
||||
(train_loader_all, test_loader_all) = create_loaders(
|
||||
x_all,
|
||||
y_all,
|
||||
data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(train_loader_mother, test_loader_mother) = create_loaders(
|
||||
x_mother,
|
||||
y_mother,
|
||||
mother_data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(train_loader_fetus, test_loader_fetus) = create_loaders(
|
||||
x_fetus,
|
||||
y_fetus,
|
||||
fetus_data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(model, optimizer, scheduler, criterion) = setup_model_training(
|
||||
input_size=FEATURE_SIZE,
|
||||
hidden_size=HIDDEN_SIZE,
|
||||
num_layers=NUM_LAYERS,
|
||||
num_classes=len(CLASSES),
|
||||
sequence_length=SERIES_LENGTH,
|
||||
device=device,
|
||||
lr=LEARNING_RATE,
|
||||
weight_decay=WEIGHT_DECAY,
|
||||
eps=EPS,
|
||||
)
|
||||
|
||||
kf = StratifiedKFold(n_splits=K_FOLD, shuffle=True, random_state=seed)
|
||||
model_index = 0
|
||||
|
||||
x = None
|
||||
y = None
|
||||
|
||||
if DATASET_TYPE == "all":
|
||||
x = [d["data"] for d in data]
|
||||
y = [d["label"] for d in data]
|
||||
elif DATASET_TYPE == "fetus":
|
||||
x = [d["data"] for d in fetus_data]
|
||||
y = [d["label"] for d in fetus_data]
|
||||
elif DATASET_TYPE == "mother":
|
||||
x = [d["data"] for d in mother_data]
|
||||
y = [d["label"] for d in mother_data]
|
||||
|
||||
# TODO: HOW TO HANDLE THIS CASES? for the mixed training and validation
|
||||
else:
|
||||
Exception("Invalid dataset type")
|
||||
|
||||
for train_index, test_index in kf.split(X=x, y=y):
|
||||
train_data = [data[i] for i in train_index]
|
||||
test_data = [data[i] for i in test_index]
|
||||
|
||||
data = train_data + test_data
|
||||
|
||||
x = [d["data"] for d in train_data]
|
||||
y = [d["label"] for d in train_data]
|
||||
|
||||
(train_loader, test_loader) = create_loaders(
|
||||
x,
|
||||
y,
|
||||
data,
|
||||
over=OVER_SAMPLING,
|
||||
under=UNDER_SAMPLING,
|
||||
classes=CLASSES,
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
|
||||
(model, optimizer, scheduler, criterion) = setup_model_training(
|
||||
input_size=FEATURE_SIZE,
|
||||
hidden_size=HIDDEN_SIZE,
|
||||
num_layers=NUM_LAYERS,
|
||||
num_classes=len(CLASSES),
|
||||
sequence_length=SERIES_LENGTH,
|
||||
device=device,
|
||||
lr=LEARNING_RATE,
|
||||
weight_decay=WEIGHT_DECAY,
|
||||
eps=EPS,
|
||||
)
|
||||
|
||||
trained_model = training_loop(
|
||||
model=model,
|
||||
train_loader=train_loader,
|
||||
test_loader=test_loader,
|
||||
optimizer=optimizer,
|
||||
scheduler=scheduler,
|
||||
criterion=criterion,
|
||||
device=device,
|
||||
epochs=EPOCHS,
|
||||
early_stopping=EARLY_STOPPING,
|
||||
log_dir="output/" + TEST_NAME + "/metrics/" + f"{model_index}",
|
||||
)
|
||||
|
||||
loss, conf_matrix, classification_rep = validation(
|
||||
trained_model,
|
||||
test_loader,
|
||||
criterion,
|
||||
device,
|
||||
)
|
||||
|
||||
# save classification report to a file
|
||||
df = pd.DataFrame(classification_rep).transpose()
|
||||
df.to_csv(
|
||||
"output/"
|
||||
+ TEST_NAME
|
||||
+ "/metrics/classification_report_"
|
||||
+ str(model_index)
|
||||
+ ".csv"
|
||||
)
|
||||
|
||||
torch.save(
|
||||
trained_model.state_dict(),
|
||||
"output/" + TEST_NAME + "/weights/model_" + str(model_index) + ".pth",
|
||||
)
|
||||
plt.figure(figsize=(19.20, 10.80))
|
||||
plt.title("Confusion Matrix")
|
||||
sns.heatmap(
|
||||
conf_matrix,
|
||||
annot=True,
|
||||
fmt=".2f",
|
||||
xticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
yticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
cmap="viridis",
|
||||
)
|
||||
|
||||
plt.xlabel("Predicted")
|
||||
plt.ylabel("Actual")
|
||||
plt.savefig(
|
||||
"output/"
|
||||
+ TEST_NAME
|
||||
+ "/confusion_matrix/confusion_matrix_"
|
||||
+ str(model_index)
|
||||
+ ".png"
|
||||
)
|
||||
conf_matrix_percent = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] * 100
|
||||
plt.figure(figsize=(19.20, 10.80))
|
||||
plt.title("Confusion Matrix Percentage")
|
||||
sns.heatmap(
|
||||
conf_matrix_percent,
|
||||
annot=True,
|
||||
fmt=".2f",
|
||||
xticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
yticklabels=["Baseline", "Opcl", "Yawn"],
|
||||
cmap="viridis",
|
||||
)
|
||||
|
||||
plt.xlabel("Predicted")
|
||||
plt.ylabel("Actual")
|
||||
plt.savefig(
|
||||
"output/"
|
||||
+ TEST_NAME
|
||||
+ "/confusion_matrix/confusion_matrix_percentage_"
|
||||
+ str(model_index)
|
||||
+ ".png"
|
||||
)
|
||||
model_index += 1
|
||||
124
fetus-event-detection-classification/src/model.py
Executable file
124
fetus-event-detection-classification/src/model.py
Executable file
@ -0,0 +1,124 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class LSTM(nn.Module):
|
||||
def __init__(
|
||||
self, input_size, batch_size, hidden_size, drop_out, num_classes, num_layers
|
||||
):
|
||||
super(LSTM, self).__init__()
|
||||
|
||||
self.input_size = input_size
|
||||
self.batch_size = batch_size
|
||||
self.hidden_size = hidden_size
|
||||
self.drop_out = drop_out
|
||||
self.num_classes = num_classes
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=input_size,
|
||||
hidden_size=hidden_size,
|
||||
num_layers=num_layers,
|
||||
dropout=drop_out,
|
||||
batch_first=True,
|
||||
)
|
||||
|
||||
self.fc = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = x.view(x.size(0), -1)
|
||||
x = x.unsqueeze(1)
|
||||
# out, (hn, cn) = self.lstm(x)
|
||||
# out = self.fc(hn[-1])
|
||||
# return out
|
||||
|
||||
out, hidden = self.lstm(x)
|
||||
out = self.fc(out[:, -1, :])
|
||||
return out
|
||||
|
||||
|
||||
class GRU(nn.Module):
|
||||
def __init__(
|
||||
self, input_size, batch_size, hidden_size, drop_out, num_classes, num_layers
|
||||
):
|
||||
super(GRU, self).__init__()
|
||||
self.input_size = input_size
|
||||
self.batch_size = batch_size
|
||||
self.hidden_size = hidden_size
|
||||
self.drop_out = drop_out
|
||||
self.num_classes = num_classes
|
||||
self.gru = nn.GRU(
|
||||
input_size=input_size,
|
||||
hidden_size=hidden_size,
|
||||
num_layers=num_layers,
|
||||
dropout=drop_out,
|
||||
batch_first=True,
|
||||
)
|
||||
self.fc = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = x.view(x.size(0), -1)
|
||||
out, _ = self.gru(x)
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
|
||||
class SimpleGRU(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
num_classes,
|
||||
sequence_length,
|
||||
device,
|
||||
):
|
||||
super(SimpleGRU, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)
|
||||
self.device = device
|
||||
|
||||
def forward(self, x):
|
||||
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
|
||||
|
||||
out, _ = self.gru(x, h0)
|
||||
out = out.reshape(out.shape[0], -1)
|
||||
out = self.fc1(out)
|
||||
return out
|
||||
|
||||
|
||||
class SimpleLSTM(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
sequence_length,
|
||||
num_classes,
|
||||
device,
|
||||
):
|
||||
super(SimpleLSTM, self).__init__()
|
||||
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)
|
||||
self.device = device
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
# h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
|
||||
# c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
|
||||
|
||||
if len(x.shape) == 2:
|
||||
x = x.unsqueeze(2)
|
||||
|
||||
# out, _ = self.lstm(x, (h0, c0))
|
||||
out, _ = self.lstm(x)
|
||||
out = out.reshape(out.size(0), -1)
|
||||
out = self.fc1(out)
|
||||
return out
|
||||
0
fetus-event-detection-classification/src/predict.py
Executable file
0
fetus-event-detection-classification/src/predict.py
Executable file
171
fetus-event-detection-classification/src/training.py
Executable file
171
fetus-event-detection-classification/src/training.py
Executable file
@ -0,0 +1,171 @@
|
||||
import torch
|
||||
from torchmetrics import Accuracy, Precision, Recall, F1Score
|
||||
import tqdm
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
import os
|
||||
from sklearn.metrics import classification_report
|
||||
|
||||
|
||||
class EarlyStopping:
|
||||
def __init__(self, tolerance=10, min_delta=0):
|
||||
|
||||
self.tolerance = tolerance
|
||||
self.min_delta = min_delta
|
||||
self.counter = 0
|
||||
self.early_stop = False
|
||||
|
||||
def __call__(self, train_loss, validation_loss):
|
||||
if (validation_loss - train_loss) > self.min_delta:
|
||||
self.counter += 1
|
||||
if self.counter >= self.tolerance:
|
||||
self.early_stop = True
|
||||
|
||||
|
||||
def training_loop(
|
||||
model,
|
||||
train_loader,
|
||||
test_loader,
|
||||
optimizer,
|
||||
scheduler,
|
||||
criterion,
|
||||
device,
|
||||
epochs,
|
||||
early_stopping=False,
|
||||
log_dir=None,
|
||||
):
|
||||
|
||||
if log_dir is not None:
|
||||
logs: pd.DataFrame = pd.DataFrame(
|
||||
columns=[
|
||||
"train_loss",
|
||||
"test_loss",
|
||||
"train_accuracy",
|
||||
"test_accuracy",
|
||||
"train_precision",
|
||||
"test_precision",
|
||||
"train_recall",
|
||||
"test_recall",
|
||||
"train_f1",
|
||||
"test_f1",
|
||||
]
|
||||
)
|
||||
|
||||
# move to device
|
||||
model = model.to(device)
|
||||
|
||||
# metrics
|
||||
accuracy = Accuracy(task="multiclass", num_classes=3).to(device)
|
||||
precision = Precision(task="multiclass", num_classes=3).to(device)
|
||||
recall = Recall(task="multiclass", num_classes=3).to(device)
|
||||
f1 = F1Score(task="multiclass", num_classes=3).to(device)
|
||||
|
||||
t = tqdm(range(epochs))
|
||||
|
||||
early_stopping = EarlyStopping(tolerance=50, min_delta=0)
|
||||
for epochs in t:
|
||||
|
||||
model.train()
|
||||
train_loss = 0
|
||||
train_accuracy = 0
|
||||
train_precision = 0
|
||||
train_recall = 0
|
||||
train_f1 = 0
|
||||
|
||||
for x, y in train_loader:
|
||||
x = x.to(device)
|
||||
y = y.to(device)
|
||||
|
||||
optimizer.zero_grad()
|
||||
y_pred = model(x)
|
||||
y = y.squeeze(1)
|
||||
y = y.float()
|
||||
y_pred = y_pred.squeeze(1)
|
||||
y_pred = y_pred.float()
|
||||
loss = criterion(y_pred, y)
|
||||
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
train_loss += loss.item()
|
||||
|
||||
y_pred = torch.argmax(y_pred, dim=1)
|
||||
y = torch.argmax(y, dim=1)
|
||||
|
||||
train_accuracy += accuracy(y_pred, y)
|
||||
train_precision += precision(y_pred, y)
|
||||
train_recall += recall(y_pred, y)
|
||||
train_f1 += f1(y_pred, y)
|
||||
|
||||
train_loss /= len(train_loader)
|
||||
train_accuracy /= len(train_loader)
|
||||
train_precision /= len(train_loader)
|
||||
train_recall /= len(train_loader)
|
||||
train_f1 /= len(train_loader)
|
||||
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
test_accuracy = 0
|
||||
test_precision = 0
|
||||
test_recall = 0
|
||||
test_f1 = 0
|
||||
|
||||
for x, y in test_loader:
|
||||
x = x.to(device)
|
||||
y = y.to(device)
|
||||
|
||||
optimizer.zero_grad()
|
||||
y_pred = model(x)
|
||||
y = y.squeeze(1)
|
||||
y = y.float()
|
||||
y_pred = y_pred.squeeze(1)
|
||||
y_pred = y_pred.float()
|
||||
loss = criterion(y_pred, y)
|
||||
test_loss += loss.item()
|
||||
|
||||
y_pred = torch.argmax(y_pred, dim=1)
|
||||
y = torch.argmax(y, dim=1)
|
||||
|
||||
test_accuracy += accuracy(y_pred, y)
|
||||
test_precision += precision(y_pred, y)
|
||||
test_recall += recall(y_pred, y)
|
||||
test_f1 += f1(y_pred, y)
|
||||
|
||||
test_loss /= len(test_loader)
|
||||
test_accuracy /= len(test_loader)
|
||||
test_precision /= len(test_loader)
|
||||
test_recall /= len(test_loader)
|
||||
test_f1 /= len(test_loader)
|
||||
|
||||
if log_dir is not None:
|
||||
new_line = {
|
||||
"train_loss": train_loss,
|
||||
"test_loss": test_loss,
|
||||
"train_accuracy": train_accuracy.item(),
|
||||
"test_accuracy": test_accuracy.item(),
|
||||
"train_precision": train_precision.item(),
|
||||
"test_precision": test_precision.item(),
|
||||
"train_recall": train_recall.item(),
|
||||
"test_recall": test_recall.item(),
|
||||
"train_f1": train_f1.item(),
|
||||
"test_f1": test_f1.item(),
|
||||
}
|
||||
|
||||
logs = pd.concat([logs, pd.DataFrame([new_line])])
|
||||
|
||||
scheduler.step(train_loss)
|
||||
t.set_description(
|
||||
f"Epoch: {epochs + 1}, Train Loss: {train_loss}, Test Loss: {test_loss}"
|
||||
)
|
||||
|
||||
early_stopping(train_loss, test_loss)
|
||||
if early_stopping.early_stop:
|
||||
print("Early stopping")
|
||||
break
|
||||
|
||||
if log_dir is not None:
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
logs.to_csv(log_dir + "/logs.csv", index=False)
|
||||
|
||||
return model
|
||||
58
fetus-event-detection-classification/src/validation.py
Executable file
58
fetus-event-detection-classification/src/validation.py
Executable file
@ -0,0 +1,58 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchmetrics
|
||||
from torchmetrics import Accuracy, Precision, Recall, F1Score
|
||||
import tqdm
|
||||
from tqdm import tqdm
|
||||
from sklearn.metrics import classification_report, confusion_matrix
|
||||
|
||||
|
||||
def validation(
|
||||
model,
|
||||
val_loader,
|
||||
criterion,
|
||||
device,
|
||||
):
|
||||
# move to device
|
||||
model = model.to(device)
|
||||
|
||||
# metrics
|
||||
|
||||
losses = []
|
||||
predictions = []
|
||||
ground_truth = []
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in val_loader:
|
||||
x = x.to(device)
|
||||
y = y.to(device)
|
||||
y_pred = model(x)
|
||||
y = y.squeeze(1)
|
||||
y = y.float()
|
||||
y_pred = y_pred.squeeze(1)
|
||||
y_pred = y_pred.float()
|
||||
loss = criterion(y_pred, y)
|
||||
losses.append(loss.item())
|
||||
predictions.append(torch.argmax(y_pred, dim=1))
|
||||
ground_truth.append(torch.argmax(y, dim=1))
|
||||
|
||||
loss = sum(losses) / len(losses)
|
||||
|
||||
gt = torch.cat(ground_truth).cpu().numpy()
|
||||
pred = torch.cat(predictions).cpu().numpy()
|
||||
|
||||
replace = {0: "base", 1: "opcl", 2: "yawn"}
|
||||
|
||||
conf_matrix = confusion_matrix(gt, pred)
|
||||
|
||||
gt = [replace[i] for i in gt]
|
||||
pred = [replace[i] for i in pred]
|
||||
|
||||
classification_rep = classification_report(
|
||||
pred, gt, zero_division=0, output_dict=True
|
||||
)
|
||||
|
||||
print(classification_rep)
|
||||
|
||||
return loss, conf_matrix, classification_rep
|
||||
BIN
mean_mouth_distance/combined_fetus_mother_series.png
Normal file
BIN
mean_mouth_distance/combined_fetus_mother_series.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 394 KiB |
BIN
mean_mouth_distance/fetus_series.png
Normal file
BIN
mean_mouth_distance/fetus_series.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 173 KiB |
169
mean_mouth_distance/main.ipynb
Normal file
169
mean_mouth_distance/main.ipynb
Normal file
@ -0,0 +1,169 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "bb315604-cc62-46b7-8258-61a70f174386",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Load the dataset\n",
|
||||
"dataset = pd.read_csv(\"./dataset.csv\")\n",
|
||||
"\n",
|
||||
"# Group by 'test', 'frame', 'label', and 'type'\n",
|
||||
"grouped_data = dataset.groupby(['test', 'frame', 'label', 'type'])\n",
|
||||
"\n",
|
||||
"# Lists to store data based on type and label\n",
|
||||
"fetus_baseline = []\n",
|
||||
"fetus_opcl = []\n",
|
||||
"fetus_yawn = []\n",
|
||||
"mother_baseline = []\n",
|
||||
"mother_opcl = []\n",
|
||||
"mother_yawn = []\n",
|
||||
"\n",
|
||||
"# Process grouped data\n",
|
||||
"for name, group in grouped_data:\n",
|
||||
" group = group.drop(group.columns[0], axis=1) # Drop the first column\n",
|
||||
" group = group.drop([\"leftLip_x\", \"leftLip_y\", \"rightLip_x\", \"rightLip_y\", \n",
|
||||
" \"topMidInner_x\", \"topMidInner_y\", \"bottomMidInner_x\", \n",
|
||||
" \"bottomMidInner_y\", \"nose_x\", \"nose_y\"], axis=1)\n",
|
||||
" \n",
|
||||
" # Get values from 'top_bottom_distance'\n",
|
||||
" top_bottom_distance_values = group['top_bottom_distance'].values\n",
|
||||
" \n",
|
||||
" # Assign to appropriate lists based on label and type\n",
|
||||
" label = name[2] # 'label' from groupby\n",
|
||||
" type_ = name[3] # 'type' from groupby\n",
|
||||
" \n",
|
||||
" if type_ == 'fetus':\n",
|
||||
" if label == 'baseline':\n",
|
||||
" fetus_baseline.append(top_bottom_distance_values)\n",
|
||||
" elif label == 'opcl':\n",
|
||||
" fetus_opcl.append(top_bottom_distance_values)\n",
|
||||
" elif label == 'yawn':\n",
|
||||
" fetus_yawn.append(top_bottom_distance_values)\n",
|
||||
" elif type_ == 'mother':\n",
|
||||
" if label == 'baseline':\n",
|
||||
" mother_baseline.append(top_bottom_distance_values)\n",
|
||||
" elif label == 'opcl':\n",
|
||||
" mother_opcl.append(top_bottom_distance_values)\n",
|
||||
" elif label == 'yawn':\n",
|
||||
" mother_yawn.append(top_bottom_distance_values)\n",
|
||||
"\n",
|
||||
"# Function to pad, sample, and smooth each array\n",
|
||||
"def process_series(series_list, window_size=3):\n",
|
||||
" max_length = max(len(series) for series in series_list)\n",
|
||||
" padded_series = np.array([np.pad(series, (0, max_length - len(series)), mode='constant') for series in series_list])\n",
|
||||
"\n",
|
||||
" # Sampled series\n",
|
||||
" sampled_series = []\n",
|
||||
" sample_n = 1\n",
|
||||
" for i in range(0, max_length, sample_n):\n",
|
||||
" segment_values = padded_series[:, i:i + sample_n][padded_series[:, i:i + sample_n] != 0]\n",
|
||||
" if len(segment_values) > 0:\n",
|
||||
" sampled_point = np.random.choice(segment_values.flatten(), size=1)\n",
|
||||
" sampled_series.append(sampled_point[0])\n",
|
||||
" \n",
|
||||
" sampled_series = np.array(sampled_series)\n",
|
||||
"\n",
|
||||
" # Apply moving average for smoothing\n",
|
||||
" smooth_sampled_series = np.convolve(sampled_series, np.ones(window_size) / window_size, mode='valid')\n",
|
||||
" \n",
|
||||
" return smooth_sampled_series\n",
|
||||
"\n",
|
||||
"# Process all series\n",
|
||||
"smooth_fetus_baseline = process_series(fetus_baseline)\n",
|
||||
"smooth_fetus_opcl = process_series(fetus_opcl)\n",
|
||||
"smooth_fetus_yawn = process_series(fetus_yawn)\n",
|
||||
"smooth_mother_baseline = process_series(mother_baseline)\n",
|
||||
"smooth_mother_opcl = process_series(mother_opcl)\n",
|
||||
"smooth_mother_yawn = process_series(mother_yawn)\n",
|
||||
"\n",
|
||||
"# Function to plot and save images\n",
|
||||
"def plot_and_save(series_dict, title, filename):\n",
|
||||
" plt.figure(figsize=(19.20, 10.80))\n",
|
||||
" for label, series in series_dict.items():\n",
|
||||
" plt.plot(series, marker='x', label=label, linestyle='--')\n",
|
||||
" \n",
|
||||
" plt.title(title)\n",
|
||||
" plt.xlabel('Time Points')\n",
|
||||
" plt.ylabel('Values')\n",
|
||||
" plt.legend()\n",
|
||||
" plt.grid()\n",
|
||||
" \n",
|
||||
" # Save the plot\n",
|
||||
" plt.savefig(filename)\n",
|
||||
" plt.close() # Close the figure to save memory\n",
|
||||
"\n",
|
||||
"# Create plots for fetus and mother\n",
|
||||
"fetus_series_dict = {\n",
|
||||
" 'Smoothed Fetus Baseline': smooth_fetus_baseline,\n",
|
||||
" 'Smoothed Fetus OPCL': smooth_fetus_opcl,\n",
|
||||
" 'Smoothed Fetus Yawn': smooth_fetus_yawn,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"mother_series_dict = {\n",
|
||||
" 'Smoothed Mother Baseline': smooth_mother_baseline,\n",
|
||||
" 'Smoothed Mother OPCL': smooth_mother_opcl,\n",
|
||||
" 'Smoothed Mother Yawn': smooth_mother_yawn,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Plot and save images\n",
|
||||
"plot_and_save(fetus_series_dict, 'Smoothed Series for Fetus', 'fetus_series.png')\n",
|
||||
"plot_and_save(mother_series_dict, 'Smoothed Series for Mother', 'mother_series.png')\n",
|
||||
"\n",
|
||||
"# Combine the plots into a single figure\n",
|
||||
"plt.figure(figsize=(19.20, 10.80))\n",
|
||||
"\n",
|
||||
"# Plot all series together\n",
|
||||
"for label, series in fetus_series_dict.items():\n",
|
||||
" plt.plot(series, marker='x', label=label, linestyle='--')\n",
|
||||
"for label, series in mother_series_dict.items():\n",
|
||||
" plt.plot(series, marker='o', label=label, linestyle='--')\n",
|
||||
"\n",
|
||||
"plt.title('Combined Smoothed Series for Fetus and Mother')\n",
|
||||
"plt.xlabel('Time Points')\n",
|
||||
"plt.ylabel('Values')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid()\n",
|
||||
"\n",
|
||||
"# Save the combined plot\n",
|
||||
"plt.savefig('combined_fetus_mother_series.png')\n",
|
||||
"plt.close()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abd865fb-17e1-4608-a98a-32f65c5505bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
BIN
mean_mouth_distance/mother_series.png
Normal file
BIN
mean_mouth_distance/mother_series.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 298 KiB |
2235
mean_mouth_distance/poetry.lock
generated
Normal file
2235
mean_mouth_distance/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
18
mean_mouth_distance/pyproject.toml
Normal file
18
mean_mouth_distance/pyproject.toml
Normal file
@ -0,0 +1,18 @@
|
||||
[tool.poetry]
|
||||
name = "mean-mouth-distance"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Dmitri <dmitri.ollari@protonmail.com>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
package-mode = false
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
notebook = "^7.2.2"
|
||||
pandas = "^2.2.3"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
Loading…
x
Reference in New Issue
Block a user