init

2024-11-29 16:19:05 +01:00 · 2024-11-29 16:19:05 +01:00 · e783ffa2cc
commit e783ffa2cc
17 changed files with 5275 additions and 0 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 Dmitri Ollari Ischimji
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,10 @@
 # Womb Wise
 Progetto di ricerca svolto con L'ospedale di Parma per una tesi. 
 Ho svolto la parte di machine learning.
 Il compito principale era capire se dei video di feti contenevano o meno determinate azioni come sbadigli.
 Essendo un movimento complesso e prolungato nel tempo ho dovuto approfondire il discorso delle time series.
 Ho provveduto ad eliminare i dati di input e output per evitare problematiche legate alla privacy.
 Kanopo
--- a/fetus-event-detection-classification/.gitignore
+++ b/fetus-event-detection-classification/.gitignore
@ -0,0 +1,2 @@
 ./weights
--- a/fetus-event-detection-classification/poetry.lock
+++ b/fetus-event-detection-classification/poetry.lock
--- a/fetus-event-detection-classification/pyproject.toml
+++ b/fetus-event-detection-classification/pyproject.toml
@ -0,0 +1,24 @@
 [tool.poetry]
 name = "fetus-event-detection-classification"
 version = "0.1.0"
 description = ""
 authors = ["kanopo <dmitri.ollari@protonmail.com>"]
 license = "MIT"
 readme = "README.md"
 package-mode = false
 [tool.poetry.dependencies]
 python = "^3.12"
 pandas = "^2.2.2"
 matplotlib = "^3.9.1"
 torch = "^2.3.1"
 seaborn = "^0.13.2"
 imblearn = "^0.0"
 imbalanced-learn = "^0.12.3"
 torchmetrics = "^1.4.0.post0"
 tqdm = "^4.66.4"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/fetus-event-detection-classification/src/load_dataset.py
+++ b/fetus-event-detection-classification/src/load_dataset.py
@ -0,0 +1,340 @@
 import os
 import warnings
 import pandas as pd
 import numpy as np
 import re
 from typing import List, Dict
 warnings.simplefilter(action="ignore", category=FutureWarning)
 def remove_dot_files(files: list) -> list:
    return [file for file in files if not file.startswith(".")]
 def get_dataset(path: str, data_type: str) -> pd.DataFrame:
    path_base_dir = os.path.expanduser(path)
    dirs = []
    tmp = os.listdir(path_base_dir)
    tmp = remove_dot_files(tmp)
    dirs.extend(tmp)
    dataset = pd.DataFrame(
        columns=[
            "image_name",  # string value
            "leftLip_x",  # float value
            "leftLip_y",  # float value
            "rightLip_x",  # float value
            "rightLip_y",  # float value
            "topMidInner_x",  # float value
            "topMidInner_y",  # float value
            "bottomMidInner_x",  # float value
            "bottomMidInner_y",  # float value
            "nose_x",  # float value
            "nose_y",  # float value
            "test",
            "frame",
            "label",
            # "isFetus",
        ]
    )
    for root, dirs, _ in os.walk(path_base_dir):
        for d in dirs:
            splitted = root.split("/")
            path = os.path.join(root, d)
            fetus_name = splitted[-2]
            fetus_action = splitted[-3]
            if data_type == "baseline":
                if re.search(r"baseline", d) or re.search(r"Baseline", d):
                    for f in remove_dot_files(os.listdir(path)):
                        if ".png" in f and fetus_action.lower() == data_type:
                            pass
                        elif ".csv" in f and fetus_action.lower() == data_type:
                            p = os.path.join(path, f)
                            data = pd.read_csv(p)
                            data.columns = data.iloc[0]
                            data = data.drop([0, 1])
                            data = data.iloc[:, 2:]
                            data.columns = [
                                "image_name",
                                "leftLip_x",
                                "leftLip_y",
                                "rightLip_x",
                                "rightLip_y",
                                "topMidInner_x",
                                "topMidInner_y",
                                "bottomMidInner_x",
                                "bottomMidInner_y",
                                "nose_x",
                                "nose_y",
                            ]
                            data = data.dropna()
                            data["test"] = fetus_name.split("_")[1]
                            data["frame"] = p.split("/")[-2].split("_")[-1]
                            data["label"] = data_type
                            image_name = data["image_name"].apply(
                                lambda x: x.split(".")[0].split("img")[1]
                            )
                            image_name = image_name.apply(lambda x: x.zfill(4))
                            data["image_name"] = image_name
                            data = calculate_distance(data)
                            dataset = pd.concat([dataset, data])
            elif data_type == "yawn":
                if re.search(r"yawn", d) or re.search(r"Yawn", d):
                    for f in remove_dot_files(os.listdir(path)):
                        if ".png" in f and fetus_action.lower() == data_type:
                            pass
                        elif ".csv" in f and fetus_action.lower() == data_type:
                            p = os.path.join(path, f)
                            data = pd.read_csv(p)
                            data.columns = data.iloc[0]
                            data = data.drop([0, 1])
                            data = data.iloc[:, 2:]
                            data.columns = [
                                "image_name",
                                "leftLip_x",
                                "leftLip_y",
                                "rightLip_x",
                                "rightLip_y",
                                "topMidInner_x",
                                "topMidInner_y",
                                "bottomMidInner_x",
                                "bottomMidInner_y",
                                "nose_x",
                                "nose_y",
                            ]
                            data = data.dropna()
                            data["test"] = fetus_name.split("_")[1]
                            data["frame"] = p.split("/")[-2].split("_")[-1]
                            data["label"] = data_type
                            image_name = data["image_name"].apply(
                                lambda x: x.split(".")[0].split("img")[1]
                            )
                            image_name = image_name.apply(lambda x: x.zfill(4))
                            data["image_name"] = image_name
                            data = calculate_distance(data)
                            dataset = pd.concat([dataset, data])
            elif data_type == "opcl":
                if re.search(r"opcl", d) or re.search(r"Opcl", d):
                    for f in remove_dot_files(os.listdir(path)):
                        if ".png" in f and fetus_action.lower() == data_type:
                            pass
                        elif ".csv" in f and fetus_action.lower() == data_type:
                            p = os.path.join(path, f)
                            data = pd.read_csv(p)
                            data.columns = data.iloc[0]
                            data = data.drop([0, 1])
                            data = data.iloc[:, 2:]
                            data.columns = [
                                "image_name",
                                "leftLip_x",
                                "leftLip_y",
                                "rightLip_x",
                                "rightLip_y",
                                "topMidInner_x",
                                "topMidInner_y",
                                "bottomMidInner_x",
                                "bottomMidInner_y",
                                "nose_x",
                                "nose_y",
                            ]
                            data = data.dropna()
                            data["test"] = fetus_name.split("_")[1]
                            data["frame"] = p.split("/")[-2].split("_")[-1]
                            data["label"] = data_type
                            image_name = data["image_name"].apply(
                                lambda x: x.split(".")[0].split("img")[1]
                            )
                            image_name = image_name.apply(lambda x: x.zfill(4))
                            data["image_name"] = image_name
                            data = calculate_distance(data)
                            dataset = pd.concat([dataset, data])
    dataset = dataset.dropna()
    dataset = dataset.reset_index(drop=True)
    return dataset
 def euclidean_distance(x1, y1, x2, y2) -> float:
    x1 = float(x1)
    y1 = float(y1)
    x2 = float(x2)
    y2 = float(y2)
    return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
 def calculate_distance(dataset: pd.DataFrame) -> pd.DataFrame:
    dataset["top_bottom_distance"] = dataset.apply(
        lambda x: euclidean_distance(
            x["topMidInner_x"],
            x["topMidInner_y"],
            x["bottomMidInner_x"],
            x["bottomMidInner_y"],
        ),
        axis=1,
    )
    return dataset
 def split_dataset(
    dataset: pd.DataFrame, classes, train_optimal_size, test_optimal_size, series_length
 ) -> List[Dict[np.ndarray, int]]:
    dataset["label"] = dataset["label"].apply(lambda x: classes.index(x))
    total_dataset: np.ndarray = []
    grouped = dataset.groupby(by=["fetus", "frame", "label"])
    for name, group in grouped:
        df = group.reset_index(drop=True, inplace=False)
        # remove a col
        label = df.pop("label")
        frame = df.pop("frame")
        fetus = df.pop("fetus")
        distance = df.pop("top_bottom_distance")
        df = df.sort_values(by="image_name")
        df.pop("image_name")
        df = df.reset_index(drop=True, inplace=False)
        x = df.values
        # x = distance.values
        y = label.values[0]
        if x.shape[0] < series_length:
            missing_cols = series_length - x.shape[0]
            if len(x.shape) == 1:
                new_matrix = np.full((missing_cols), x[0])
            else:
                new_matrix = np.zeros((missing_cols, x.shape[1]))
            x = np.concatenate((x, new_matrix), axis=0)
        if x.shape[0] > series_length:
            if len(x.shape) == 1:
                x = x[:series_length]
            else:
                x = x[:series_length, :]
        total_dataset.append({"x": x, "y": y})
        # if y.shape[0] < series_length:
        #     missing_cols = series_length - y.shape[0]
        #     new_matrix = np.full((missing_cols), y[0])
        #     y = np.concatenate((y, new_matrix), axis=0)
        #
        # if y.shape[0] > series_length:
        #     y = y[:series_length]
    total_yawn_count = 0
    total_baseline_count = 0
    total_opcl_count = 0
    for i in range(len(total_dataset)):
        if total_dataset[i]["y"] == 0:
            total_baseline_count += 1
        if total_dataset[i]["y"] == 1:
            total_opcl_count += 1
        if total_dataset[i]["y"] == 2:
            total_yawn_count += 1
    train: List[Dict[np.ndarray, int]] = []
    test: List[Dict[np.ndarray, int]] = []
    # val: List[Dict[np.ndarray, int]] = []
    train_yawn_optimal_size = int(total_yawn_count * train_optimal_size)
    test_yawn_optimal_size = int(total_yawn_count * test_optimal_size)
    train_baseline_optimal_size = int(total_baseline_count * train_optimal_size)
    test_baseline_optimal_size = int(total_baseline_count * test_optimal_size)
    train_opcl_optimal_size = int(total_opcl_count * train_optimal_size)
    test_opcl_optimal_size = int(total_opcl_count * test_optimal_size)
    train_yawn: List[Dict[np.ndarray, int]] = []
    test_yawn: List[Dict[np.ndarray, int]] = []
    # val_yawn: List[Dict[np.ndarray, int]] = []
    train_baseline: List[Dict[np.ndarray, int]] = []
    test_baseline: List[Dict[np.ndarray, int]] = []
    # val_baseline: List[Dict[np.ndarray, int]] = []
    train_opcl: List[Dict[np.ndarray, int]] = []
    test_opcl: List[Dict[np.ndarray, int]] = []
    # val_opcl: List[Dict[np.ndarray, int]] = []
    for data in total_dataset:
        if data["y"] == 2:
            # yawn
            if len(train_yawn) < train_yawn_optimal_size:
                train_yawn.append({"data": data["x"], "label": data["y"]})
            else:
                test_yawn.append({"data": data["x"], "label": data["y"]})
            # elif (
            #     len(val_yawn)
            #     < total_yawn_count - train_yawn_optimal_size - test_yawn_optimal_size
            # ):
            #     val_yawn.append({"data": data["x"], "label": data["y"]})
        elif data["y"] == 0:
            # baseline
            if len(train_baseline) < train_baseline_optimal_size:
                train_baseline.append({"data": data["x"], "label": data["y"]})
            else:
                test_baseline.append({"data": data["x"], "label": data["y"]})
            # elif (
            #     len(val_baseline)
            #     < total_baseline_count
            #     - train_baseline_optimal_size
            #     - test_baseline_optimal_size
            # ):
            #     val_baseline.append({"data": data["x"], "label": data["y"]})
        elif data["y"] == 1:
            # opcl
            if len(train_opcl) < train_opcl_optimal_size:
                train_opcl.append({"data": data["x"], "label": data["y"]})
            else:
                test_opcl.append({"data": data["x"], "label": data["y"]})
            # elif (
            #     len(val_opcl)
            #     < total_opcl_count - train_opcl_optimal_size - test_opcl_optimal_size
            # ):
            #     val_opcl.append({"data": data["x"], "label": data["y"]})
        else:
            print("[ERROR] Invalid class label during split")
            break
    train += train_yawn + train_baseline + train_opcl
    test += test_yawn + test_baseline + test_opcl
    # val += val_yawn + val_baseline + val_opcl
    np.random.shuffle(train)
    np.random.shuffle(test)
    # np.random.shuffle(val)
    return train, test
--- a/fetus-event-detection-classification/src/main.py
+++ b/fetus-event-detection-classification/src/main.py
@ -0,0 +1,789 @@
 #!/usr/bin/env python3.12
 import os
 import warnings
 import pandas as pd
 import numpy as np
 from typing import List, Dict
 from matplotlib import pyplot as plt
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import Dataset, DataLoader
 import seaborn as sns
 import argparse
 from imblearn.over_sampling import RandomOverSampler
 from imblearn.under_sampling import RandomUnderSampler
 from collections import Counter
 from sklearn.model_selection import StratifiedKFold
 from sklearn.model_selection import train_test_split
 from datetime import datetime
 from load_dataset import get_dataset
 from model import SimpleLSTM
 from training import training_loop
 from validation import validation
 warnings.simplefilter(action="ignore", category=FutureWarning)
 def setup_model_training(
    input_size,
    hidden_size,
    num_layers,
    num_classes,
    sequence_length,
    device,
    lr,
    weight_decay,
    eps,
 ):
    model = SimpleLSTM(
        input_size=input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        num_classes=num_classes,
        sequence_length=sequence_length,
        device=device,
    )
    optimizer = optim.Adam(
        model.parameters(), lr=lr, weight_decay=weight_decay, eps=eps
    )
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.1, patience=25
    )
    # criterion = nn.CrossEntropyLoss()
    # criterion = nn.BCELoss()
    criterion = nn.BCEWithLogitsLoss()
    return (model, optimizer, scheduler, criterion)
 def create_loaders(
    x, y, data, under=False, over=False, classes=["base", "yawn"], batch_size=2
 ):
    if under is True:
        x, y = undersample(x, y, data)
    if over is True:
        x, y = oversample(x, y, data)
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=TEST_OPTIMAL_SIZE, random_state=seed
    )
    train_dataset = FetusDataset(
        [{"data": x, "label": y} for x, y in zip(x_train, y_train)],
        train=True,
        classes=len(classes),
    )
    test_dataset = FetusDataset(
        [{"data": x, "label": y} for x, y in zip(x_test, y_test)],
        train=True,
        classes=len(classes),
    )
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
    )
    return (train_loader, test_loader)
 def get_device() -> torch.device:
    if torch.backends.mps.is_built():
        return torch.device("mps")
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")
 def undersample(x: np.ndarray, y: np.ndarray, data):
    print("Before undersampling")
    print(Counter([d["label"] for d in data]))
    rus = RandomUnderSampler(random_state=seed, sampling_strategy="majority")
    x = np.array([d["data"] for d in data])
    y = np.array([d["label"] for d in data])
    flat_x = np.array([x.flatten() for x in x])
    flat_y = np.array(y)
    x, y = rus.fit_resample(flat_x, flat_y)
    x = x.reshape(-1, SERIES_LENGTH, FEATURE_SIZE)
    print("After undersampling")
    print(Counter(y))
    return x, y
 def oversample(x: np.ndarray, y: np.ndarray, data):
    print("Before oversampling")
    print(Counter([d["label"] for d in data]))
    ros = RandomOverSampler(random_state=seed, sampling_strategy="all")
    x = np.array([d["data"] for d in data])
    y = np.array([d["label"] for d in data])
    flat_x = np.array([x.flatten() for x in x])
    flat_y = np.array(y)
    x, y = ros.fit_resample(flat_x, flat_y)
    x = x.reshape(-1, SERIES_LENGTH, FEATURE_SIZE)
    print("After oversampling")
    print(Counter(y))
    return x, y
 class FetusDataset(Dataset):
    def __init__(
        self, data: List[Dict[np.ndarray, int]], train: bool = False, classes: int = 2
    ):
        self.data = data
        self.train = train
        self.classes = classes
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        x = self.data[idx]["data"]
        y = self.data[idx]["label"]
        # Conversione del tipo di dato
        x = x.astype(np.float32)
        y = np.eye(self.classes)[y]
        # Conversione in tensori
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.int32)
        # Gestione di valori NaN o infiniti
        x = torch.nan_to_num(
            x
        )  # Sostituisce NaN con 0 e valori infiniti con numeri molto grandi o piccoli
        # Normalizzazione solo durante il training
        if self.train:
            mean = x.mean()
            std = x.std()
            # Normalizzazione condizionale (solo se std > 0)
            if std > 0:
                x = (x - mean) / std
        return x, y
 def createArgParser():
    parser = argparse.ArgumentParser(description="Womb Wise")
    parser.add_argument(
        "-rd",
        "--reload-dataset",
        action="store_true",
        help="Reload the dataset",
    )
    # path to the dataset
    parser.add_argument(
        "-p",
        "--path",
        action="store",
        help="Path to the dataset",
        default="~/Documents/womb-wise/Data/",
    )
    # epoch
    parser.add_argument(
        "-e",
        "--epochs",
        action="store",
        help="Number of epochs",
        default=10,
    )
    parser.add_argument(
        "-k",
        "--kfold",
        action="store",
        help="Number of folds for kfold cross validation",
        default=1,
    )
    parser.add_argument(
        "-o",
        "--oversampling",
        action="store_true",
        help="Apply oversampling",
    )
    parser.add_argument(
        "-u",
        "--undersampling",
        action="store_true",
        help="Apply undersampling",
    )
    parser.add_argument(
        "-d",
        "--dataset",
        action="store",
        default="all",
        choices=["all", "fetus", "mother", "fetus-mother", "mother-fetus"],
        help="Choose the dataset: all, fetus, mother or train with mother and test with fetus or viceversa",
    )
    args = parser.parse_args()
    print(
        f"""
    ARGS:
    \n
    reload-dataset: {args.reload_dataset}
    path: {args.path}
    epochs: {args.epochs}
    kfold: {args.kfold}
    oversampling: {args.oversampling}
    undersampling: {args.undersampling}
    dataset: {args.dataset}
    """
    )
    return args
 if __name__ == "__main__":
    CLASSES = ["baseline", "opcl", "yawn"]
    FEATURE_SIZE = 10
    SERIES_LENGTH = 60
    # SINGLE_FRAME_LENGTH = FEATURE_SIZE * SERIES_LENGTH
    BATCH_SIZE = 4
    WEIGHT_DECAY = 1e-5
    LEARNING_RATE = 1e-3
    TEST_OPTIMAL_SIZE = 0.2
    HIDDEN_SIZE = 256
    DROP_OUT = 0.0
    NUM_LAYERS = 2
    EPS = 1e-7
    # TEST_NAME = "0_k1_all"
    # TEST_NAME = "1_k1_fetus"
    # TEST_NAME = "2_k1_mother"
    # TEST_NAME = "3_k1_mother_fetus"
    # TEST_NAME = "4_k1_fetus_mother"
    # TEST_NAME = "5_k5_all"
    TEST_NAME = "6_k5_fetus"
    # TEST_NAME = "7_k5_mother"
    if not os.path.exists("output/" + TEST_NAME):
        os.makedirs("output/" + TEST_NAME)
    if not os.path.exists("output/" + TEST_NAME + "/weights"):
        os.makedirs("output/" + TEST_NAME + "/weights")
    if not os.path.exists("output/" + TEST_NAME + "/confusion_matrix"):
        os.makedirs("output/" + TEST_NAME + "/confusion_matrix")
    if not os.path.exists("output/" + TEST_NAME + "/metrics"):
        os.makedirs("output/" + TEST_NAME + "/metrics")
    # fix the seed
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    device = get_device()
    args = createArgParser()
    PATH = args.path
    EPOCHS = int(args.epochs)
    K_FOLD = int(args.kfold)
    OVER_SAMPLING = args.oversampling
    UNDER_SAMPLING = args.undersampling
    EARLY_STOPPING = True
    DATASET_TYPE = args.dataset
    if os.path.exists("dataset.csv") and args.reload_dataset is False:
        dataset = pd.read_csv("dataset.csv")
        mother = pd.read_csv("mother.csv")
        fetus = pd.read_csv("fetus.csv")
    else:
        baseline_fetus = get_dataset(
            "~/Documents/kanopo/womb-wise/Data/Ultrasound_Scans/tracked_frames/",
            "baseline",
        )
        yawn_fetus = get_dataset(
            "~/Documents/kanopo/womb-wise/Data/Ultrasound_Scans/tracked_frames/",
            "yawn",
        )
        opcl_fetus = get_dataset(
            "~/Documents/kanopo/womb-wise/Data/Ultrasound_Scans/tracked_frames/",
            "opcl",
        )
        fetus = pd.concat([baseline_fetus, yawn_fetus, opcl_fetus])
        baseline_mother = get_dataset(
            "~/Documents/kanopo/womb-wise/Data/Mothers_videos/Tracked/",
            "baseline",
        )
        yawn_mother = get_dataset(
            "~/Documents/kanopo/womb-wise/Data/Mothers_videos/Tracked/",
            "yawn",
        )
        opcl_mother = get_dataset(
            "~/Documents/kanopo/womb-wise/Data/Mothers_videos/Tracked/",
            "opcl",
        )
        mother = pd.concat([baseline_mother, yawn_mother, opcl_mother])
        fetus["type"] = "fetus"
        mother["type"] = "mother"
        fetus.to_csv("fetus.csv")
        mother.to_csv("mother.csv")
        dataset = pd.concat([mother, fetus])
        dataset.to_csv("dataset.csv")
    mother = mother.drop(columns=["top_bottom_distance"])
    fetus = fetus.drop(columns=["top_bottom_distance"])
    dataset = dataset.drop(columns=["top_bottom_distance"])
    grouped_dataset = dataset.groupby(["label", "frame", "test", "type"])
    grouped_mother = mother.groupby(["label", "frame", "test"])
    grouped_fetus = fetus.groupby(["label", "frame", "test"])
    data: List[Dict[np.ndarray, int]] = []
    mother_data: List[Dict[np.ndarray, int]] = []
    fetus_data: List[Dict[np.ndarray, int]] = []
    for name, group in grouped_mother:
        label = group["label"]
        frame = group["frame"]
        test = group["test"]
        group = group.drop(columns=["test", "frame", "label", "type"])
        group.set_index("image_name", inplace=True)
        if group.columns[0] == "Unnamed: 0":
            group = group.drop(columns=["Unnamed: 0"])
        group = group.to_numpy()
        if group.shape[0] < SERIES_LENGTH:
            group = np.vstack(
                [group, np.zeros((SERIES_LENGTH - group.shape[0], FEATURE_SIZE))]
            )
        elif group.shape[0] > SERIES_LENGTH:
            group = group[:SERIES_LENGTH]
        group = group.astype(np.float32)
        label = CLASSES.index(label.iat[0])
        mother_data.append(
            {
                "data": group,
                "label": label,
            }
        )
    for name, group in grouped_fetus:
        label = group["label"]
        frame = group["frame"]
        test = group["test"]
        group = group.drop(columns=["test", "frame", "label", "type"])
        group.set_index("image_name", inplace=True)
        if group.columns[0] == "Unnamed: 0":
            group = group.drop(columns=["Unnamed: 0"])
        group = group.to_numpy()
        if group.shape[0] < SERIES_LENGTH:
            group = np.vstack(
                [group, np.zeros((SERIES_LENGTH - group.shape[0], FEATURE_SIZE))]
            )
        elif group.shape[0] > SERIES_LENGTH:
            group = group[:SERIES_LENGTH]
        group = group.astype(np.float32)
        label = CLASSES.index(label.iat[0])
        fetus_data.append(
            {
                "data": group,
                "label": label,
            }
        )
    for name, group in grouped_dataset:
        label = group["label"]
        frame = group["frame"]
        test = group["test"]
        data_type = group["type"]
        group = group.drop(columns=["test", "frame", "label", "type"])
        group.set_index("image_name", inplace=True)
        if group.columns[0] == "Unnamed: 0":
            group = group.drop(columns=["Unnamed: 0"])
        group = group.to_numpy()
        if group.shape[0] < SERIES_LENGTH:
            group = np.vstack(
                [group, np.zeros((SERIES_LENGTH - group.shape[0], FEATURE_SIZE))]
            )
        elif group.shape[0] > SERIES_LENGTH:
            group = group[:SERIES_LENGTH]
        group = group.astype(np.float32)
        label = CLASSES.index(label.iat[0])
        data.append(
            {
                "data": group,
                "label": label,
            }
        )
    if K_FOLD == 1:
        x_all = [d["data"] for d in data]
        y_all = [d["label"] for d in data]
        x_mother = [d["data"] for d in mother_data]
        y_mother = [d["label"] for d in mother_data]
        x_fetus = [d["data"] for d in fetus_data]
        y_fetus = [d["label"] for d in fetus_data]
        (train_loader_all, test_loader_all) = create_loaders(
            x_all,
            y_all,
            data,
            over=OVER_SAMPLING,
            under=UNDER_SAMPLING,
            classes=CLASSES,
            batch_size=BATCH_SIZE,
        )
        (train_loader_mother, test_loader_mother) = create_loaders(
            x_mother,
            y_mother,
            mother_data,
            over=OVER_SAMPLING,
            under=UNDER_SAMPLING,
            classes=CLASSES,
            batch_size=BATCH_SIZE,
        )
        (train_loader_fetus, test_loader_fetus) = create_loaders(
            x_fetus,
            y_fetus,
            fetus_data,
            over=OVER_SAMPLING,
            under=UNDER_SAMPLING,
            classes=CLASSES,
            batch_size=BATCH_SIZE,
        )
        (model, optimizer, scheduler, criterion) = setup_model_training(
            input_size=FEATURE_SIZE,
            hidden_size=HIDDEN_SIZE,
            num_layers=NUM_LAYERS,
            num_classes=len(CLASSES),
            sequence_length=SERIES_LENGTH,
            device=device,
            lr=LEARNING_RATE,
            weight_decay=WEIGHT_DECAY,
            eps=EPS,
        )
        if DATASET_TYPE == "all":
            train_loader = train_loader_all
            test_loader = test_loader_all
        elif DATASET_TYPE == "fetus":
            train_loader = train_loader_fetus
            test_loader = test_loader_fetus
        elif DATASET_TYPE == "mother":
            train_loader = train_loader_mother
            test_loader = test_loader_mother
        elif DATASET_TYPE == "fetus-mother":
            train_loader = train_loader_fetus
            test_loader = test_loader_mother
        elif DATASET_TYPE == "mother-fetus":
            train_loader = train_loader_mother
            test_loader = test_loader_fetus
        else:
            Exception("Invalid dataset type")
        trained_model = training_loop(
            model=model,
            train_loader=train_loader,
            test_loader=test_loader,
            optimizer=optimizer,
            scheduler=scheduler,
            criterion=criterion,
            device=device,
            epochs=EPOCHS,
            early_stopping=EARLY_STOPPING,
            log_dir="output/" + TEST_NAME + "/metrics",
        )
        loss, conf_matrix, classification_rep = validation(
            trained_model,
            test_loader,
            criterion,
            device,
        )
        # save classification report to a file
        df = pd.DataFrame(classification_rep).transpose()
        df.to_csv("output/" + TEST_NAME + "/metrics/classification_report.csv")
        torch.save(
            trained_model.state_dict(),
            "output/" + TEST_NAME + "/weights/model.pth",
        )
        plt.figure(figsize=(19.20, 10.80))
        plt.title("Confusion Matrix")
        sns.heatmap(
            conf_matrix,
            annot=True,
            fmt=".2f",
            xticklabels=["Baseline", "Opcl", "Yawn"],
            yticklabels=["Baseline", "Opcl", "Yawn"],
            cmap="viridis",
        )
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.savefig("output/" + TEST_NAME + "/confusion_matrix/confusion_matrix.png")
        plt.figure(figsize=(19.20, 10.80))
        plt.title("Confusion Matrix Percentage")
        conf_matrix_percent = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] * 100
        sns.heatmap(
            conf_matrix_percent,
            annot=True,
            fmt=".2f",
            xticklabels=["Baseline", "Opcl", "Yawn"],
            yticklabels=["Baseline", "Opcl", "Yawn"],
            cmap="viridis",
        )
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.savefig("output/" + TEST_NAME + "/confusion_matrix/confusion_matrix_percentage.png")
    else:
        x_all = [d["data"] for d in data]
        y_all = [d["label"] for d in data]
        x_mother = [d["data"] for d in mother_data]
        y_mother = [d["label"] for d in mother_data]
        x_fetus = [d["data"] for d in fetus_data]
        y_fetus = [d["label"] for d in fetus_data]
        (train_loader_all, test_loader_all) = create_loaders(
            x_all,
            y_all,
            data,
            over=OVER_SAMPLING,
            under=UNDER_SAMPLING,
            classes=CLASSES,
            batch_size=BATCH_SIZE,
        )
        (train_loader_mother, test_loader_mother) = create_loaders(
            x_mother,
            y_mother,
            mother_data,
            over=OVER_SAMPLING,
            under=UNDER_SAMPLING,
            classes=CLASSES,
            batch_size=BATCH_SIZE,
        )
        (train_loader_fetus, test_loader_fetus) = create_loaders(
            x_fetus,
            y_fetus,
            fetus_data,
            over=OVER_SAMPLING,
            under=UNDER_SAMPLING,
            classes=CLASSES,
            batch_size=BATCH_SIZE,
        )
        (model, optimizer, scheduler, criterion) = setup_model_training(
            input_size=FEATURE_SIZE,
            hidden_size=HIDDEN_SIZE,
            num_layers=NUM_LAYERS,
            num_classes=len(CLASSES),
            sequence_length=SERIES_LENGTH,
            device=device,
            lr=LEARNING_RATE,
            weight_decay=WEIGHT_DECAY,
            eps=EPS,
        )
        kf = StratifiedKFold(n_splits=K_FOLD, shuffle=True, random_state=seed)
        model_index = 0
        x = None
        y = None
        if DATASET_TYPE == "all":
            x = [d["data"] for d in data]
            y = [d["label"] for d in data]
        elif DATASET_TYPE == "fetus":
            x = [d["data"] for d in fetus_data]
            y = [d["label"] for d in fetus_data]
        elif DATASET_TYPE == "mother":
            x = [d["data"] for d in mother_data]
            y = [d["label"] for d in mother_data]
        # TODO: HOW TO HANDLE THIS CASES? for the mixed training and validation
        else:
            Exception("Invalid dataset type")
        for train_index, test_index in kf.split(X=x, y=y):
            train_data = [data[i] for i in train_index]
            test_data = [data[i] for i in test_index]
            data = train_data + test_data
            x = [d["data"] for d in train_data]
            y = [d["label"] for d in train_data]
            (train_loader, test_loader) = create_loaders(
                x,
                y,
                data,
                over=OVER_SAMPLING,
                under=UNDER_SAMPLING,
                classes=CLASSES,
                batch_size=BATCH_SIZE,
            )
            (model, optimizer, scheduler, criterion) = setup_model_training(
                input_size=FEATURE_SIZE,
                hidden_size=HIDDEN_SIZE,
                num_layers=NUM_LAYERS,
                num_classes=len(CLASSES),
                sequence_length=SERIES_LENGTH,
                device=device,
                lr=LEARNING_RATE,
                weight_decay=WEIGHT_DECAY,
                eps=EPS,
            )
            trained_model = training_loop(
                model=model,
                train_loader=train_loader,
                test_loader=test_loader,
                optimizer=optimizer,
                scheduler=scheduler,
                criterion=criterion,
                device=device,
                epochs=EPOCHS,
                early_stopping=EARLY_STOPPING,
                log_dir="output/" + TEST_NAME + "/metrics/" + f"{model_index}",
            )
            loss, conf_matrix, classification_rep = validation(
                trained_model,
                test_loader,
                criterion,
                device,
            )
            # save classification report to a file
            df = pd.DataFrame(classification_rep).transpose()
            df.to_csv(
                "output/"
                + TEST_NAME
                + "/metrics/classification_report_"
                + str(model_index)
                + ".csv"
            )
            torch.save(
                trained_model.state_dict(),
                "output/" + TEST_NAME + "/weights/model_" + str(model_index) + ".pth",
            )
            plt.figure(figsize=(19.20, 10.80))
            plt.title("Confusion Matrix")
            sns.heatmap(
                conf_matrix,
                annot=True,
                fmt=".2f",
                xticklabels=["Baseline", "Opcl", "Yawn"],
                yticklabels=["Baseline", "Opcl", "Yawn"],
                cmap="viridis",
            )
            plt.xlabel("Predicted")
            plt.ylabel("Actual")
            plt.savefig(
                "output/"
                + TEST_NAME
                + "/confusion_matrix/confusion_matrix_"
                + str(model_index)
                + ".png"
            )
            conf_matrix_percent = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] * 100
            plt.figure(figsize=(19.20, 10.80))
            plt.title("Confusion Matrix Percentage")
            sns.heatmap(
                conf_matrix_percent,
                annot=True,
                fmt=".2f",
                xticklabels=["Baseline", "Opcl", "Yawn"],
                yticklabels=["Baseline", "Opcl", "Yawn"],
                cmap="viridis",
            )
            plt.xlabel("Predicted")
            plt.ylabel("Actual")
            plt.savefig(
                "output/"
                + TEST_NAME
                + "/confusion_matrix/confusion_matrix_percentage_"
                + str(model_index)
                + ".png"
            )
            model_index += 1
--- a/fetus-event-detection-classification/src/model.py
+++ b/fetus-event-detection-classification/src/model.py
@ -0,0 +1,124 @@
 import torch
 import torch.nn as nn
 class LSTM(nn.Module):
    def __init__(
        self, input_size, batch_size, hidden_size, drop_out, num_classes, num_layers
    ):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.drop_out = drop_out
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=drop_out,
            batch_first=True,
        )
        self.fc = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = x.unsqueeze(1)
        # out, (hn, cn) = self.lstm(x)
        # out = self.fc(hn[-1])
        # return out
        out, hidden = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out
 class GRU(nn.Module):
    def __init__(
        self, input_size, batch_size, hidden_size, drop_out, num_classes, num_layers
    ):
        super(GRU, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.drop_out = drop_out
        self.num_classes = num_classes
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=drop_out,
            batch_first=True,
        )
        self.fc = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out, _ = self.gru(x)
        out = self.fc(out)
        return out
 class SimpleGRU(nn.Module):
    def __init__(
        self,
        input_size,
        hidden_size,
        num_layers,
        num_classes,
        sequence_length,
        device,
    ):
        super(SimpleGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)
        self.device = device
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc1(out)
        return out
 class SimpleLSTM(nn.Module):
    def __init__(
        self,
        input_size,
        hidden_size,
        num_layers,
        sequence_length,
        num_classes,
        device,
    ):
        super(SimpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)
        self.device = device
    def forward(self, x):
        # h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        # c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        if len(x.shape) == 2:
            x = x.unsqueeze(2)
        # out, _ = self.lstm(x, (h0, c0))
        out, _ = self.lstm(x)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        return out
--- a/fetus-event-detection-classification/src/predict.py
+++ b/fetus-event-detection-classification/src/predict.py
--- a/fetus-event-detection-classification/src/training.py
+++ b/fetus-event-detection-classification/src/training.py
@ -0,0 +1,171 @@
 import torch
 from torchmetrics import Accuracy, Precision, Recall, F1Score
 import tqdm
 from tqdm import tqdm
 import pandas as pd
 import os
 from sklearn.metrics import classification_report
 class EarlyStopping:
    def __init__(self, tolerance=10, min_delta=0):
        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.early_stop = False
    def __call__(self, train_loss, validation_loss):
        if (validation_loss - train_loss) > self.min_delta:
            self.counter += 1
            if self.counter >= self.tolerance:
                self.early_stop = True
 def training_loop(
    model,
    train_loader,
    test_loader,
    optimizer,
    scheduler,
    criterion,
    device,
    epochs,
    early_stopping=False,
    log_dir=None,
 ):
    if log_dir is not None:
        logs: pd.DataFrame = pd.DataFrame(
            columns=[
                "train_loss",
                "test_loss",
                "train_accuracy",
                "test_accuracy",
                "train_precision",
                "test_precision",
                "train_recall",
                "test_recall",
                "train_f1",
                "test_f1",
            ]
        )
    # move to device
    model = model.to(device)
    # metrics
    accuracy = Accuracy(task="multiclass", num_classes=3).to(device)
    precision = Precision(task="multiclass", num_classes=3).to(device)
    recall = Recall(task="multiclass", num_classes=3).to(device)
    f1 = F1Score(task="multiclass", num_classes=3).to(device)
    t = tqdm(range(epochs))
    early_stopping = EarlyStopping(tolerance=50, min_delta=0)
    for epochs in t:
        model.train()
        train_loss = 0
        train_accuracy = 0
        train_precision = 0
        train_recall = 0
        train_f1 = 0
        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            y_pred = model(x)
            y = y.squeeze(1)
            y = y.float()
            y_pred = y_pred.squeeze(1)
            y_pred = y_pred.float()
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            y_pred = torch.argmax(y_pred, dim=1)
            y = torch.argmax(y, dim=1)
            train_accuracy += accuracy(y_pred, y)
            train_precision += precision(y_pred, y)
            train_recall += recall(y_pred, y)
            train_f1 += f1(y_pred, y)
        train_loss /= len(train_loader)
        train_accuracy /= len(train_loader)
        train_precision /= len(train_loader)
        train_recall /= len(train_loader)
        train_f1 /= len(train_loader)
        model.eval()
        test_loss = 0
        test_accuracy = 0
        test_precision = 0
        test_recall = 0
        test_f1 = 0
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            y_pred = model(x)
            y = y.squeeze(1)
            y = y.float()
            y_pred = y_pred.squeeze(1)
            y_pred = y_pred.float()
            loss = criterion(y_pred, y)
            test_loss += loss.item()
            y_pred = torch.argmax(y_pred, dim=1)
            y = torch.argmax(y, dim=1)
            test_accuracy += accuracy(y_pred, y)
            test_precision += precision(y_pred, y)
            test_recall += recall(y_pred, y)
            test_f1 += f1(y_pred, y)
        test_loss /= len(test_loader)
        test_accuracy /= len(test_loader)
        test_precision /= len(test_loader)
        test_recall /= len(test_loader)
        test_f1 /= len(test_loader)
        if log_dir is not None:
            new_line = {
                "train_loss": train_loss,
                "test_loss": test_loss,
                "train_accuracy": train_accuracy.item(),
                "test_accuracy": test_accuracy.item(),
                "train_precision": train_precision.item(),
                "test_precision": test_precision.item(),
                "train_recall": train_recall.item(),
                "test_recall": test_recall.item(),
                "train_f1": train_f1.item(),
                "test_f1": test_f1.item(),
            }
            logs = pd.concat([logs, pd.DataFrame([new_line])])
        scheduler.step(train_loss)
        t.set_description(
            f"Epoch: {epochs + 1}, Train Loss: {train_loss}, Test Loss: {test_loss}"
        )
        early_stopping(train_loss, test_loss)
        if early_stopping.early_stop:
            print("Early stopping")
            break
    if log_dir is not None:
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        logs.to_csv(log_dir + "/logs.csv", index=False)
    return model
--- a/fetus-event-detection-classification/src/validation.py
+++ b/fetus-event-detection-classification/src/validation.py
@ -0,0 +1,58 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import torchmetrics
 from torchmetrics import Accuracy, Precision, Recall, F1Score
 import tqdm
 from tqdm import tqdm
 from sklearn.metrics import classification_report, confusion_matrix
 def validation(
    model,
    val_loader,
    criterion,
    device,
 ):
    # move to device
    model = model.to(device)
    # metrics
    losses = []
    predictions = []
    ground_truth = []
    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)
            y = y.squeeze(1)
            y = y.float()
            y_pred = y_pred.squeeze(1)
            y_pred = y_pred.float()
            loss = criterion(y_pred, y)
            losses.append(loss.item())
            predictions.append(torch.argmax(y_pred, dim=1))
            ground_truth.append(torch.argmax(y, dim=1))
    loss = sum(losses) / len(losses)
    gt = torch.cat(ground_truth).cpu().numpy()
    pred = torch.cat(predictions).cpu().numpy()
    replace = {0: "base", 1: "opcl", 2: "yawn"}
    conf_matrix = confusion_matrix(gt, pred)
    gt = [replace[i] for i in gt]
    pred = [replace[i] for i in pred]
    classification_rep = classification_report(
        pred, gt, zero_division=0, output_dict=True
    )
    print(classification_rep)
    return loss, conf_matrix, classification_rep
--- a/mean_mouth_distance/combined_fetus_mother_series.png
+++ b/mean_mouth_distance/combined_fetus_mother_series.png
--- a/mean_mouth_distance/fetus_series.png
+++ b/mean_mouth_distance/fetus_series.png
--- a/mean_mouth_distance/main.ipynb
+++ b/mean_mouth_distance/main.ipynb
@ -0,0 +1,169 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "bb315604-cc62-46b7-8258-61a70f174386",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Load the dataset\n",
    "dataset = pd.read_csv(\"./dataset.csv\")\n",
    "\n",
    "# Group by 'test', 'frame', 'label', and 'type'\n",
    "grouped_data = dataset.groupby(['test', 'frame', 'label', 'type'])\n",
    "\n",
    "# Lists to store data based on type and label\n",
    "fetus_baseline = []\n",
    "fetus_opcl = []\n",
    "fetus_yawn = []\n",
    "mother_baseline = []\n",
    "mother_opcl = []\n",
    "mother_yawn = []\n",
    "\n",
    "# Process grouped data\n",
    "for name, group in grouped_data:\n",
    "    group = group.drop(group.columns[0], axis=1)  # Drop the first column\n",
    "    group = group.drop([\"leftLip_x\", \"leftLip_y\", \"rightLip_x\", \"rightLip_y\", \n",
    "                        \"topMidInner_x\", \"topMidInner_y\", \"bottomMidInner_x\", \n",
    "                        \"bottomMidInner_y\", \"nose_x\", \"nose_y\"], axis=1)\n",
    "    \n",
    "    # Get values from 'top_bottom_distance'\n",
    "    top_bottom_distance_values = group['top_bottom_distance'].values\n",
    "    \n",
    "    # Assign to appropriate lists based on label and type\n",
    "    label = name[2]  # 'label' from groupby\n",
    "    type_ = name[3]  # 'type' from groupby\n",
    "    \n",
    "    if type_ == 'fetus':\n",
    "        if label == 'baseline':\n",
    "            fetus_baseline.append(top_bottom_distance_values)\n",
    "        elif label == 'opcl':\n",
    "            fetus_opcl.append(top_bottom_distance_values)\n",
    "        elif label == 'yawn':\n",
    "            fetus_yawn.append(top_bottom_distance_values)\n",
    "    elif type_ == 'mother':\n",
    "        if label == 'baseline':\n",
    "            mother_baseline.append(top_bottom_distance_values)\n",
    "        elif label == 'opcl':\n",
    "            mother_opcl.append(top_bottom_distance_values)\n",
    "        elif label == 'yawn':\n",
    "            mother_yawn.append(top_bottom_distance_values)\n",
    "\n",
    "# Function to pad, sample, and smooth each array\n",
    "def process_series(series_list, window_size=3):\n",
    "    max_length = max(len(series) for series in series_list)\n",
    "    padded_series = np.array([np.pad(series, (0, max_length - len(series)), mode='constant') for series in series_list])\n",
    "\n",
    "    # Sampled series\n",
    "    sampled_series = []\n",
    "    sample_n = 1\n",
    "    for i in range(0, max_length, sample_n):\n",
    "        segment_values = padded_series[:, i:i + sample_n][padded_series[:, i:i + sample_n] != 0]\n",
    "        if len(segment_values) > 0:\n",
    "            sampled_point = np.random.choice(segment_values.flatten(), size=1)\n",
    "            sampled_series.append(sampled_point[0])\n",
    "    \n",
    "    sampled_series = np.array(sampled_series)\n",
    "\n",
    "    # Apply moving average for smoothing\n",
    "    smooth_sampled_series = np.convolve(sampled_series, np.ones(window_size) / window_size, mode='valid')\n",
    "    \n",
    "    return smooth_sampled_series\n",
    "\n",
    "# Process all series\n",
    "smooth_fetus_baseline = process_series(fetus_baseline)\n",
    "smooth_fetus_opcl = process_series(fetus_opcl)\n",
    "smooth_fetus_yawn = process_series(fetus_yawn)\n",
    "smooth_mother_baseline = process_series(mother_baseline)\n",
    "smooth_mother_opcl = process_series(mother_opcl)\n",
    "smooth_mother_yawn = process_series(mother_yawn)\n",
    "\n",
    "# Function to plot and save images\n",
    "def plot_and_save(series_dict, title, filename):\n",
    "    plt.figure(figsize=(19.20, 10.80))\n",
    "    for label, series in series_dict.items():\n",
    "        plt.plot(series, marker='x', label=label, linestyle='--')\n",
    "    \n",
    "    plt.title(title)\n",
    "    plt.xlabel('Time Points')\n",
    "    plt.ylabel('Values')\n",
    "    plt.legend()\n",
    "    plt.grid()\n",
    "    \n",
    "    # Save the plot\n",
    "    plt.savefig(filename)\n",
    "    plt.close()  # Close the figure to save memory\n",
    "\n",
    "# Create plots for fetus and mother\n",
    "fetus_series_dict = {\n",
    "    'Smoothed Fetus Baseline': smooth_fetus_baseline,\n",
    "    'Smoothed Fetus OPCL': smooth_fetus_opcl,\n",
    "    'Smoothed Fetus Yawn': smooth_fetus_yawn,\n",
    "}\n",
    "\n",
    "mother_series_dict = {\n",
    "    'Smoothed Mother Baseline': smooth_mother_baseline,\n",
    "    'Smoothed Mother OPCL': smooth_mother_opcl,\n",
    "    'Smoothed Mother Yawn': smooth_mother_yawn,\n",
    "}\n",
    "\n",
    "# Plot and save images\n",
    "plot_and_save(fetus_series_dict, 'Smoothed Series for Fetus', 'fetus_series.png')\n",
    "plot_and_save(mother_series_dict, 'Smoothed Series for Mother', 'mother_series.png')\n",
    "\n",
    "# Combine the plots into a single figure\n",
    "plt.figure(figsize=(19.20, 10.80))\n",
    "\n",
    "# Plot all series together\n",
    "for label, series in fetus_series_dict.items():\n",
    "    plt.plot(series, marker='x', label=label, linestyle='--')\n",
    "for label, series in mother_series_dict.items():\n",
    "    plt.plot(series, marker='o', label=label, linestyle='--')\n",
    "\n",
    "plt.title('Combined Smoothed Series for Fetus and Mother')\n",
    "plt.xlabel('Time Points')\n",
    "plt.ylabel('Values')\n",
    "plt.legend()\n",
    "plt.grid()\n",
    "\n",
    "# Save the combined plot\n",
    "plt.savefig('combined_fetus_mother_series.png')\n",
    "plt.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abd865fb-17e1-4608-a98a-32f65c5505bf",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/mean_mouth_distance/mother_series.png
+++ b/mean_mouth_distance/mother_series.png
--- a/mean_mouth_distance/poetry.lock
+++ b/mean_mouth_distance/poetry.lock
--- a/mean_mouth_distance/pyproject.toml
+++ b/mean_mouth_distance/pyproject.toml
@ -0,0 +1,18 @@
 [tool.poetry]
 name = "mean-mouth-distance"
 version = "0.1.0"
 description = ""
 authors = ["Dmitri <dmitri.ollari@protonmail.com>"]
 license = "MIT"
 readme = "README.md"
 package-mode = false
 [tool.poetry.dependencies]
 python = "^3.12"
 notebook = "^7.2.2"
 pandas = "^2.2.3"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"