diff --git a/Readme.md b/Readme.md index 79bfdee..8516c5a 100644 --- a/Readme.md +++ b/Readme.md @@ -1,42 +1,44 @@ # πŸ“° QuickFactChecker – Fake News Detection ## πŸ“Œ Project Overview -QuickFactChecker is a machine learning–based web app that helps detect whether a news article is **real** or **fake**. -It uses different models (e.g., Naive Bayes, LSTM) trained on the **LIAR dataset** to evaluate credibility and assist users in identifying potentially misleading information. +QuickFactChecker is a **machine learning–based web app** that helps detect whether a news article is **real** or **fake**. +It uses different models (e.g., Naive Bayes, LSTM) trained on the **LIAR dataset** to evaluate credibility and assist users in identifying potentially misleading information. --- ## ✨ Features -- βœ… Fake news classification using ML models (Naive Bayes, LSTM). -- βœ… Interactive web app built with **Flask** and **HTML templates**. -- βœ… Preprocessed dataset included (`train.tsv`, `test.tsv`, `valid.tsv`). -- βœ… Notebooks for **data analysis & experimentation** (`liar-data-analysis.ipynb`, `dataset.ipynb`). -- βœ… Easy setup with `requirements.txt`. +- βœ… Fake news classification using ML models (**Naive Bayes**, **Logistic Regression**, **Random Forest**, and **LSTM**). +- βœ… Interactive web app built with **Flask** and **HTML templates**. +- βœ… **Automated NLTK Setup** to prevent missing resource errors. +- βœ… Preprocessed dataset included (`train.tsv`, `test.tsv`, `valid.tsv`). +- βœ… Notebooks for **data analysis & experimentation** (`liar-data-analysis.ipynb`, `dataset.ipynb`). +- βœ… Easy setup with `requirements.txt`. --- ## πŸ“‚ Project Structure ```bash dataset/liar - β”œβ”€β”€ README.md ##Dataset description - β”œβ”€β”€ train.tsv ##Training data - β”œβ”€β”€ test.tsv ##Testing data - β”œβ”€β”€ valid.tsv ##Validation data +Β  Β β”œβ”€β”€ README.md Β  ##Dataset description +Β  Β β”œβ”€β”€ train.tsv Β  ##Training data +Β  Β β”œβ”€β”€ test.tsv Β  Β ##Testing data +Β  Β β”œβ”€β”€ valid.tsv Β  ##Validation data module/ - β”œβ”€β”€ dataset.ipynb - β”œβ”€β”€ fake-news-detection-using-lstm.ipynb - β”œβ”€β”€ fake-news-detection-using-nb.ipynb - β”œβ”€β”€ liar-data-analysis.ipynb +Β  Β β”œβ”€β”€ dataset.ipynb +Β  Β β”œβ”€β”€ fake-news-detection-using-lstm.ipynb +Β  Β β”œβ”€β”€ fake-news-detection-using-nb.ipynb +Β  Β β”œβ”€β”€ liar-data-analysis.ipynb templates/ - β”œβ”€β”€ index.html +Β  Β β”œβ”€β”€ index.html scripts/ - └── fake_news_logreg_rf.py ## Train & evaluate Naive Bayes, Logistic Regression, Random Forest +Β  Β β”œβ”€β”€ setup_nltk.py +Β   └── fake_news_logreg_rf.py Β  Β  ## Train & evaluate Naive Bayes, Logistic Regression, Random Forest results/ - β”œβ”€β”€ model_comparison.md ## Generated baseline comparison table (markdown) - └── comparison.png ## Generated accuracy bar chart +Β  Β β”œβ”€β”€ model_comparison.md Β  Β  Β  Β ## Generated baseline comparison table (markdown) +Β   └── comparison.png Β  Β  Β  Β  Β  Β  ## Generated accuracy bar chart .gitattributes app.py @@ -67,6 +69,11 @@ requirements.txt pip install -r requirements.txt ``` +5. Download NLTK Corpora: +```bash +python scripts/setup_nltk.py +``` + ## πŸ“Š Baseline Model Comparison We evaluated three models on the LIAR dataset using TF-IDF features. Example results (accuracy & precision): diff --git a/scripts/setup_nltk.py b/scripts/setup_nltk.py new file mode 100644 index 0000000..af91752 --- /dev/null +++ b/scripts/setup_nltk.py @@ -0,0 +1,49 @@ +import nltk +import ssl + +def setup_nltk_resources(): + """ + Downloads required NLTK resources, handling potential SSL and download errors. + """ + + try: + _create_unverified_https_context = ssl._create_unverified_context + except AttributeError: + pass + else: + ssl._create_default_https_context = _create_unverified_https_context + + resource_paths = { + "punkt": "tokenizers/punkt", + "stopwords": "corpora/stopwords", + "wordnet": "corpora/wordnet", + "omw-1.4": "corpora/omw-1.4", + } + + print("πŸš€ Starting NLTK resource setup...\n") + + for package, path in resource_paths.items(): + print(f"Processing: {package}") + + try: + nltk.data.find(path) + print(f" βœ… {package} is already installed.") + except LookupError: + try: + print(f" ⬇️ Downloading {package}...") + nltk.download(package, quiet=False) + print(f" βœ… Successfully downloaded: {package}") + except Exception as e: + print(f"--- πŸ›‘ ERROR downloading {package} ---") + print(" -> Check your network connection or run with administrator permissions.") + print(f" -> Details: {type(e).__name__}: {e}") + except Exception as e: + print(f"⚠️ Unexpected error while checking {package}: {e}") + + print() + + print("πŸŽ‰ NLTK resource setup complete.") + print("If you still encounter 'LookupError', ensure NLTK is installed correctly and your Python environment is active.") + +if __name__ == "__main__": + setup_nltk_resources()