Spaces:

gArthur98
/

Sentiment-Classifier-App

Sleeping

Sentiment-Classifier-App / pages /1_Explore.py

uopdate

38468af over 2 years ago

1.51 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as pe
	from wordcloud import WordCloud, STOPWORDS
	from nltk import FreqDist

	st.set_option('deprecation.showPyplotGlobalUse', False)



	st.title("Welcome To The Explore Page: ")

	st.markdown("On this page you will be able to some EDA Visuals")

	##loading my dataset

	data= pd.read_csv("datasets/Train.csv")

	clean_data= pd.read_csv("datasets/clean_copy.csv")

	clean_data= clean_data.dropna()

	##plotting my wordcloud for the unclean dataset

	unclean_words= " ".join(data["safe_text"])

	wc= WordCloud(stopwords=STOPWORDS).generate(unclean_words)

	plt.figure(figsize= (5,10))
	plt.title("Most common Words in unclean Dataset")
	plt.imshow(wc)
	st.pyplot()

	##creating a wordcloud of my most common word in cleaned tweet
	clean_words= ' '.join(clean_data["clean_tweet"]).split() ##converting the dataframe to corpus of words

	freq_words= pd.DataFrame(FreqDist(clean_words).most_common(20), columns= ["word", "count"])

	fig= pe.treemap(data_frame=freq_words, path=["word"], values= "count", title= "Top 20 Most Frequent Words After Cleaning")

	st.plotly_chart(fig)


	##getting the tweet lengths
	data["tweet_length"]= [len(i.split(" ")) for i in data["safe_text"]]

	words= data["tweet_length"].value_counts().reset_index()
	fig_2= pe.scatter(data_frame=words, x="tweet_length", y="count", size= "count", color= "tweet_length", title= "Tweet Lenghts")

	st.plotly_chart(fig_2)