Documentation
Data Preprocessing
Tabular Data
EDA

Module: eda.py

Functions

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
 
def show_summary_statistics(df):
    """
    Display summary statistics for numerical columns in the dataframe.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
    """
 
 
def plot_distributions(df, numerical_columns):
    """
    Plot distributions for numerical columns.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
        numerical_columns (list): List of names of numerical columns.
    """
 
 
def plot_correlation_matrix(df):
    """
    Plot a heatmap of the correlation matrix.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
    """
 
 
def plot_missing_values(df):
    """
    Plot the count of missing values for each column.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
    """
 
 
def plot_pairwise_relationships(df, hue=None):
    """
    Plot pairwise relationships in the dataset.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
        hue (str, optional): Variable in `data` to map plot aspects to different colors.
    """
 
 
def plot_categorical_distributions(df, categorical_columns):
    """
    Plot distribution of categorical variables using count plots.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
        categorical_columns (list): List of names of categorical columns.
    """
 
 
def plot_boxplots(df, numerical_columns):
    """
    Plot boxplots for numerical columns to identify outliers.
 
    Parameters:
        df (pd.DataFrame): The dataframe containing the data.
        numerical_columns (list): List of names of numerical columns.
    """