{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Twitter Setiment Analysis " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Basic Imports\n", "import tweepy\n", "import pandas as pd \n", "import numpy as np \n", "import time\n", "import os\n", "import re\n", "\n", "# Plotting and Visualization\n", "from IPython.display import display\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from wordcloud import WordCloud, ImageColorGenerator, STOPWORDS\n", "%matplotlib inline\n", "\n", "# TextBlob Imports\n", "from textblob import TextBlob\n", "from textblob import TextBlob\n", "from textblob.classifiers import NaiveBayesClassifier\n", "\n", "# NLTK Imports\n", "import nltk\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.naive_bayes import MultinomialNB\n", "\n", "#MAP\n", "from geopy.geocoders import Nominatim\n", "import folium\n", "from folium import plugins\n", "from geopy.geocoders import Nominatim" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Twitter Autentication Keys" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#Keys for autentication\n", "consumer_key=''\n", "consumer_secret=''\n", "\n", "access_token=''\n", "access_token_secret=''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Twitter Autentication" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#Autentication Methods\n", "auth = tweepy.OAuthHandler(consumer_key,consumer_secret)\n", "auth.set_access_token(access_token,access_token_secret)\n", "api = tweepy.API(auth)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Searching for Tweets" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#Searchin Twitter Timelines\n", "tweets = []\n", "info = []\n", "\n", "for tweet in tweepy.Cursor(api.search,\n", " q=\"lula bolsonaro\",\n", " tweet_mode='extended',\n", " rpp=100,\n", " result_type=\"popular\",\n", " include_entities=True,\n", " lang=\"pt\").items(1500):\n", " if 'retweeted_status' in dir(tweet):\n", " aux=tweet.retweeted_status.full_text\n", " else:\n", " aux=tweet.full_text\n", " \n", " newtweet = aux.replace(\"\\n\", \" \")\n", " \n", " tweets.append(newtweet)\n", " info.append(tweet)\n", " \n", " file = open(\"lula-bolsonaro.txt\", \"a\", -1, \"utf-8\")\n", " file.write(newtweet+'\\n')\n", " file.close()\n", " \n", " time.sleep(0.5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating the dataframe " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#Construction of the dataframe\n", "tweets_df = pd.DataFrame(tweets, columns=['Tweets']) \n", "\n", "tweets_df['len'] = np.array([len(tweet) for tweet in tweets])\n", "tweets_df['ID'] = np.array([tweet.id for tweet in info])\n", "tweets_df['Date'] = np.array([tweet.created_at for tweet in info])\n", "tweets_df['Source'] = np.array([tweet.source for tweet in info])\n", "tweets_df['Likes'] = np.array([tweet.favorite_count for tweet in info])\n", "tweets_df['RTs'] = np.array([tweet.retweet_count for tweet in info])\n", "tweets_df['User Location'] = np.array([tweet.user.location for tweet in info])\n", "tweets_df['Geo'] = np.array([tweet.geo for tweet in info])\n", "tweets_df['Coordinates'] = np.array([tweet.coordinates for tweet in info])\n", "\n", "tweets_df.to_csv(\"lula-bolsonaro.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Tweets | \n", "len | \n", "ID | \n", "Date | \n", "Source | \n", "Likes | \n", "RTs | \n", "User Location | \n", "Geo | \n", "Coordinates | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "A mudança do país que culminou com eleição de ... | \n", "274 | \n", "1076667402346221568 | \n", "2018-12-23 02:34:46 | \n", "Twitter for iPhone | \n", "3855 | \n", "702 | \n", "Paraná/Brasil | \n", "None | \n", "None | \n", "
| 1 | \n", "VÍDEO: “Quem votou no Bolsonaro que cobre ele.... | \n", "106 | \n", "1076766217220882433 | \n", "2018-12-23 09:07:26 | \n", "Twitter Web Client | \n", "575 | \n", "181 | \n", "Sao Paulo, Brazil | \n", "None | \n", "None | \n", "
| 2 | \n", "Com Lula, o Brasil reassumiu sua soberania e p... | \n", "181 | \n", "1076891824684453888 | \n", "2018-12-23 17:26:33 | \n", "Twitter Web Client | \n", "866 | \n", "229 | \n", "\n", " | None | \n", "None | \n", "
| 3 | \n", "De 2003 até o golpe de 2016, o Brasil foi prot... | \n", "185 | \n", "1076877593738133504 | \n", "2018-12-23 16:30:00 | \n", "TweetDeck | \n", "474 | \n", "181 | \n", "Brasília / Brasil | \n", "None | \n", "None | \n", "
| 4 | \n", "Quem mandou Adélio esfaquear Bolsonaro? Ele ag... | \n", "116 | \n", "1076477470453522432 | \n", "2018-12-22 14:00:03 | \n", "Twitter for Android | \n", "363 | \n", "99 | \n", "Brasília - Brasil | \n", "None | \n", "None | \n", "