Module: clean.py
Functions
import pandas as pd
def fill_missing(dataframe, strategy='mean', columns=None):
"""
Fill missing values in specified columns or all columns.
Parameters:
dataframe (pd.DataFrame): The dataframe containing the data.
strategy (str or value): Strategy to fill missing values ('mean', 'median', 'mode', or specific value).
columns (list, optional): List of columns to apply filling strategy. If None, applies to all columns.
Returns:
pd.DataFrame: DataFrame with missing values filled.
"""
def remove_duplicates(dataframe):
"""
Remove duplicate rows from a DataFrame.
Parameters:
dataframe (pd.DataFrame): The dataframe containing the data.
Returns:
pd.DataFrame: DataFrame without duplicates.
"""
def correct_dtypes(dataframe, dtype_mappings):
"""
Correct the data types of DataFrame columns based on a mapping.
Parameters:
dataframe (pd.DataFrame): The dataframe containing the data.
dtype_mappings (dict): Dictionary mapping column names to new data types.
Returns:
pd.DataFrame: DataFrame with corrected data types.
"""