Documentation
Data Preprocessing
Text
Tokenize

Module: tokenize.py

Functions

import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
 
# Ensure you have the necessary NLTK data downloaded
nltk.download('punkt')
 
def tokenize_words(text):
    """
    Tokenizes a given text into words.
 
    Parameters:
        text (str): String containing the text to be tokenized.
 
    Returns:
        list: A list of words.
    """
 
 
def tokenize_sentences(text):
    """
    Tokenizes a given text into sentences.
 
    Parameters:
        text (str): String containing the text to be tokenized.
 
    Returns:
        list: A list of sentences.
    """