Documentation
Data Preprocessing
Tabular Data
Clean

Module: clean.py

Functions

import pandas as pd
 
def fill_missing(dataframe, strategy='mean', columns=None):
    """
    Fill missing values in specified columns or all columns.
 
    Parameters:
        dataframe (pd.DataFrame): The dataframe containing the data.
        strategy (str or value): Strategy to fill missing values ('mean', 'median', 'mode', or specific value).
        columns (list, optional): List of columns to apply filling strategy. If None, applies to all columns.
 
    Returns:
        pd.DataFrame: DataFrame with missing values filled.
    """
 
 
def remove_duplicates(dataframe):
    """
    Remove duplicate rows from a DataFrame.
 
    Parameters:
        dataframe (pd.DataFrame): The dataframe containing the data.
 
    Returns:
        pd.DataFrame: DataFrame without duplicates.
    """
 
 
def correct_dtypes(dataframe, dtype_mappings):
    """
    Correct the data types of DataFrame columns based on a mapping.
 
    Parameters:
        dataframe (pd.DataFrame): The dataframe containing the data.
        dtype_mappings (dict): Dictionary mapping column names to new data types.
 
    Returns:
        pd.DataFrame: DataFrame with corrected data types.
    """