Source code for mactrack.analyse.recap

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks



[docs]
def load_data(file_path):
    """
    Loads data from an Excel file.

    Parameters:
        file_path (str): Path to the Excel file.

    Returns:
        pd.DataFrame: Data loaded from the Excel file.
    """
    data = pd.read_excel(file_path, index_col=0)
    return data




[docs]
def calculate_intensity_features(intensity_data):
    """
    Calculates intensity features such as the number of peaks, mean prominence,
    and mean distance between peaks for each row in the intensity data.

    Parameters
    ----------
    intensity_data : pd.DataFrame
        DataFrame containing intensity data, where each row represents a signal.

    Returns
    -------
        num_peaks : list
            Number of peaks for each row.
        mean_prominence : list
            Mean prominence of peaks for each row.
        mean_distance : list
            Mean distance between peaks for each row.
    """

    def find_peaks_and_prominences(row):
        peaks, properties = find_peaks(row, prominence=0.3)
        prominences = properties["prominences"]
        return peaks, prominences

    def mean_distance_between_peaks(peaks):
        if len(peaks) > 1:
            distances = np.diff(peaks)
            return distances.mean()
        return np.nan

    peaks_and_prominences = intensity_data.apply(
        lambda row: find_peaks_and_prominences(row), axis=1
    )
    peaks_list = peaks_and_prominences.apply(lambda x: x[0])
    prominences_list = peaks_and_prominences.apply(lambda x: x[1])
    num_peaks = peaks_list.apply(len)
    mean_prominence = prominences_list.apply(lambda x: x.mean() if len(x) > 0 else 0)
    mean_distance = peaks_list.apply(
        lambda peaks: mean_distance_between_peaks(peaks) if len(peaks) > 2 else np.nan
    )

    return num_peaks, mean_prominence, mean_distance




[docs]
def calculate_mean(data):
    """
    Calculates the mean of each row in the given DataFrame.

    Parameters:
        data (pd.DataFrame): The input DataFrame to analyze.
        
    Returns:
        pd.Series: A series containing the mean of each row.
    """
    return data.mean(axis=1)




[docs]
def count_valid_entries(data):
    """
    Counts the number of valid (non-NaN) entries in each row of the given DataFrame.

    Parameters:
        data (pd.DataFrame): The input DataFrame to analyze.

    Returns:
        pd.Series: A series containing the count of valid entries for each row.
    """
    num_valid_entries = data.notna().sum(axis=1)
    return num_valid_entries




[docs]
def plot_intensity_curves(intensity_data, valid_entry_counts, threshold=10):
    """
    Plots intensity curves for each row in the DataFrame and saves them as PNG files.

    Parameters:
        intensity_data (pd.DataFrame): The DataFrame containing intensity data.
        valid_entry_counts (pd.Series): A series containing the count of valid entries for each row.
        threshold (int): The minimum number of valid entries required to plot the curve.
    """
    filtered_data = intensity_data[valid_entry_counts > threshold]
    output_folder = "output/plot"
    for index, row in filtered_data.iterrows():
        plt.plot(row)
        plt.xlabel("Temps")
        plt.ylabel("Intensité")
        plt.title(f"Courbe d'intensité pour l'entrée {index}")
        plt.savefig(f"{output_folder}/intensity_curve_{index}.png", format="png")
        plt.close()




[docs]
def aggregate(distance_file, intensity_file, size_file, perimeter_file):
    """
    Aggregates data from multiple files and saves the results to an Excel file.

    Parameters:
        distance_file (str): Path to the distance data file.
        intensity_file (str): Path to the intensity data file.
        size_file (str): Path to the size data file.
        perimeter_file (str): Path to the perimeter data file.
    """
    output_file = "output/data/data.xlsx"
    distance_data = load_data(distance_file)
    intensity_data = load_data(intensity_file)
    size_data = load_data(size_file)
    perimeter_data = load_data(perimeter_file)

    num_peaks, mean_prominence, mean_freq = calculate_intensity_features(intensity_data)
    mean_distance = calculate_mean(distance_data)
    mean_size = calculate_mean(size_data)
    mean_perimeter = calculate_mean(perimeter_data)
    valid_entry_counts = count_valid_entries(intensity_data)

    aggregated_data = pd.DataFrame(
        {
            "peaks": num_peaks,
            "amplitude": mean_prominence,
            "frequence": mean_freq,
            "distance": mean_distance,
            "size": mean_size,
            "perimeter": mean_perimeter,
            "validity": valid_entry_counts,
        }
    )

    aggregated_data.to_excel(output_file, engine="openpyxl")
    print(f"Les données agrégées ont été enregistrées dans {output_file}")
    plot_intensity_curves(intensity_data, valid_entry_counts)
    print(f"Les courbes d'intensité ont été enregistrées pour les entrées valides")