Documentation
Modeling
NLP
Data Preparation

Module: data_preparation.py

Functions

import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from config import MAX_WORDS, MAX_LEN, TEXT_COLUMN, LABEL_COLUMN, DATA_FILE
 
def load_and_prepare_data():
    """
    Load and prepare text data for NLP models.
 
    Returns:
        tuple: Padded sequences and labels for training and validation data.
        Tokenizer: Fitted tokenizer instance.
    """