#Import Libraries
import pandas as pd
import numpy as np
from pandas.tseries.offsets import BDay
#Allows us to filter out non business days
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install plotly
import plotly.express as px
Requirement already satisfied: plotly in /Users/samchilds/opt/anaconda3/lib/python3.8/site-packages (4.14.3) Requirement already satisfied: retrying>=1.3.3 in /Users/samchilds/opt/anaconda3/lib/python3.8/site-packages (from plotly) (1.3.3) Requirement already satisfied: six in /Users/samchilds/opt/anaconda3/lib/python3.8/site-packages (from plotly) (1.15.0)
#S&P 500 Tweets:
spyDF = pd.read_csv("twitterData/tweetsSPY.csv")
del spyDF["Unnamed: 0"]
#Import file, remove unnecessary index column
spyDF["Datetime"] = pd.to_datetime(spyDF["Datetime"], format = "%m/%d/%Y %H:%M")
#Convert existing datetime column from text to datetime object type
#Tesla Tweets:
tslaDF = pd.read_csv("twitterData/tweetsTSLA.csv")
del tslaDF["Unnamed: 0"]
tslaDF["Datetime"] = pd.to_datetime(tslaDF["Datetime"], format = "%m/%d/%Y %H:%M")
#Gamestop Tweets:
gmeDF = pd.read_csv("twitterData/tweetsGME.csv")
del gmeDF["Unnamed: 0"]
gmeDF["Datetime"] = pd.to_datetime(gmeDF["Datetime"], format = "%m/%d/%Y %H:%M")
#Exxon Mobil Tweets:
xomDF = pd.read_csv("twitterData/tweetsXOM.csv")
del xomDF["Unnamed: 0"]
xomDF["Datetime"] = pd.to_datetime(xomDF["Datetime"], format = "%m/%d/%Y %H:%M")
#SPY Price Data
stonkSPY = pd.read_excel("priceData/SPY.xlsx")
stonkSPY["Datetime"] = pd.to_datetime(stonkSPY["Date"], format = "%Y-%m-%d")
#Before importing with this statement, the price data from yahoo finance needs to be formatted to match pandas.
del stonkSPY["Date"]
stonkSPY.set_index('Datetime')
#TSLA Price Data
stonkTSLA = pd.read_excel("priceData/TSLA.xlsx")
stonkTSLA["Datetime"] = pd.to_datetime(stonkTSLA["Date"], format = "%Y-%m-%d")
del stonkTSLA["Date"]
stonkTSLA.set_index('Datetime')
#GME Price Data
stonkGME = pd.read_excel("priceData/GME.xlsx")
stonkGME["Datetime"] = pd.to_datetime(stonkGME["Date"], format = "%Y-%m-%d")
del stonkGME["Date"]
stonkGME.set_index('Datetime')
#XOM Price Data
stonkXOM = pd.read_excel("priceData/XOM.xlsx")
stonkXOM["Datetime"] = pd.to_datetime(stonkXOM["Date"], format = "%Y-%m-%d")
del stonkXOM["Date"]
stonkXOM.set_index('Datetime')
Open | High | Low | Close | Adj Close | Volume | 1 | 2 | 3 | 4 | 5 | 10 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Datetime | ||||||||||||
2020-04-16 | 40.480000 | 40.480000 | 38.930000 | 39.150002 | 36.160999 | 30934000 | 0.103959 | 0.051852 | 0.046232 | 0.076118 | 0.109834 | 0.186973 |
2020-04-17 | 39.880001 | 43.320000 | 39.869999 | 43.220001 | 39.920265 | 40829100 | -0.047200 | -0.052291 | -0.025220 | 0.005322 | 0.011800 | -0.001851 |
2020-04-20 | 40.750000 | 42.740002 | 40.540001 | 41.180000 | 38.036011 | 39260800 | -0.005342 | 0.023070 | 0.055124 | 0.061923 | 0.067023 | 0.089850 |
2020-04-21 | 40.400002 | 42.130001 | 39.630001 | 40.959999 | 37.832809 | 44381600 | 0.028565 | 0.060791 | 0.067627 | 0.072754 | 0.097900 | 0.094483 |
2020-04-22 | 42.599998 | 42.930000 | 41.860001 | 42.130001 | 38.913486 | 27084700 | 0.031332 | 0.037978 | 0.042962 | 0.067410 | 0.126513 | 0.044149 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-04-09 | 56.000000 | 56.439999 | 55.529999 | 55.869999 | 55.869999 | 20010100 | -0.006622 | -0.002864 | 0.025953 | 0.019868 | NaN | NaN |
2021-04-12 | 56.759998 | 56.790001 | 55.310001 | 55.500000 | 55.500000 | 20238800 | 0.003784 | 0.032793 | 0.026667 | NaN | NaN | NaN |
2021-04-13 | 55.299999 | 55.980000 | 55.090000 | 55.709999 | 55.709999 | 19370600 | 0.028900 | 0.022797 | NaN | NaN | NaN | NaN |
2021-04-14 | 56.410000 | 58.009998 | 56.250000 | 57.320000 | 57.320000 | 28051100 | -0.005932 | NaN | NaN | NaN | NaN | NaN |
2021-04-15 | 57.419998 | 57.419998 | 56.779999 | 56.980000 | 56.980000 | 23246300 | NaN | NaN | NaN | NaN | NaN | NaN |
252 rows × 12 columns
#SPY Sentiment
spyDF = spyDF.drop('TweetID',1).resample('d', on="Datetime").mean()
spyDF = spyDF.dropna()
spyDF2 = spyDF.reset_index()
validSPY = spyDF2["Datetime"].tolist()
spySentiment = pd.Series(spyDF2["Predictions"].values, index = spyDF2["Datetime"]).to_dict()
#Creates a dictionary with the date time object as key and the day's sentiment average as the value.
#TSLA Sentiment
tslaDF = tslaDF.drop('TweetID',1).resample('d', on="Datetime").mean()
tslaDF = tslaDF.dropna()
tslaDF2 = tslaDF.reset_index()
validTSLA = tslaDF2["Datetime"].tolist()
tslaSentiment = pd.Series(tslaDF2["Predictions"].values, index = tslaDF2["Datetime"]).to_dict()
#GME Sentiment
gmeDF = gmeDF.drop('TweetID',1).resample('d', on="Datetime").mean()
gmeDF = gmeDF.dropna()
gmeDF2 = gmeDF.reset_index()
validGME = gmeDF2["Datetime"].tolist()
gmeSentiment = pd.Series(gmeDF2["Predictions"].values, index = gmeDF2["Datetime"]).to_dict()
#XOM Sentiment
xomDF = xomDF.drop('TweetID',1).resample('d', on="Datetime").mean()
xomDF = xomDF.dropna()
xomDF2 = xomDF.reset_index()
validXOM = xomDF2["Datetime"].tolist()
xomSentiment = pd.Series(xomDF2["Predictions"].values, index = xomDF2["Datetime"]).to_dict()
#SPY
rotDF = stonkSPY.drop(["Open","High","Low","Close","Adj Close","Volume"],axis = 1)
rotDF = rotDF.set_index("Datetime")
meltDF = rotDF.reset_index()
meltDF2 = meltDF.melt(id_vars = "Datetime")
meltDF3 = meltDF2.set_index("Datetime").sort_values(by=['Datetime','variable'])
meltDF4 = meltDF3.rename(columns={"variable":"holdingPeriod",'value':'return'})
meltDF4 = meltDF4.reset_index()
meltDF5 = meltDF4[meltDF4["Datetime"].isin(validSPY)]
newTable = []
for row in meltDF5.itertuples(index = False):
comp = list(row)
comp.append(spyDF.loc[row[0]]["Predictions"])
newTable.append(tuple(comp))
df6 = pd.DataFrame(newTable, columns = ["date", "holdP","return","sentiment"])
finalSPY = df6.dropna()
#TSLA
rotDF = stonkTSLA.drop(["Open","High","Low","Close","Adj Close","Volume"],axis = 1)
rotDF = rotDF.set_index("Datetime")
meltDF = rotDF.reset_index()
meltDF2 = meltDF.melt(id_vars = "Datetime")
meltDF3 = meltDF2.set_index("Datetime").sort_values(by=['Datetime','variable'])
meltDF4 = meltDF3.rename(columns={"variable":"holdingPeriod",'value':'return'})
meltDF4 = meltDF4.reset_index()
meltDF5 = meltDF4[meltDF4["Datetime"].isin(validTSLA)]
newTable = []
for row in meltDF5.itertuples(index = False):
comp = list(row)
comp.append(tslaDF.loc[row[0]]["Predictions"])
newTable.append(tuple(comp))
df6 = pd.DataFrame(newTable, columns = ["date", "holdP","return","sentiment"])
finalTSLA = df6.dropna()
#GME
rotDF = stonkGME.drop(["Open","High","Low","Close","Adj Close","Volume"],axis = 1)
rotDF = rotDF.set_index("Datetime")
meltDF = rotDF.reset_index()
meltDF2 = meltDF.melt(id_vars = "Datetime")
meltDF3 = meltDF2.set_index("Datetime").sort_values(by=['Datetime','variable'])
meltDF4 = meltDF3.rename(columns={"variable":"holdingPeriod",'value':'return'})
meltDF4 = meltDF4.reset_index()
meltDF5 = meltDF4[meltDF4["Datetime"].isin(validGME)]
newTable = []
for row in meltDF5.itertuples(index = False):
comp = list(row)
comp.append(gmeDF.loc[row[0]]["Predictions"])
newTable.append(tuple(comp))
df6 = pd.DataFrame(newTable, columns = ["date", "holdP","return","sentiment"])
finalGME = df6.dropna()
#XOM
rotDF = stonkXOM.drop(["Open","High","Low","Close","Adj Close","Volume"],axis = 1)
rotDF = rotDF.set_index("Datetime")
meltDF = rotDF.reset_index()
meltDF2 = meltDF.melt(id_vars = "Datetime")
meltDF3 = meltDF2.set_index("Datetime").sort_values(by=['Datetime','variable'])
meltDF4 = meltDF3.rename(columns={"variable":"holdingPeriod",'value':'return'})
meltDF4 = meltDF4.reset_index()
meltDF5 = meltDF4[meltDF4["Datetime"].isin(validXOM)]
newTable = []
for row in meltDF5.itertuples(index = False):
comp = list(row)
comp.append(xomDF.loc[row[0]]["Predictions"])
newTable.append(tuple(comp))
df6 = pd.DataFrame(newTable, columns = ["date", "holdP","return","sentiment"])
finalXOM = df6.dropna()
Note: Color is used to convey depth along the time axis, and has no other significance.
#Create Visuals
lbls = {"sentiment": "Sentiment Score",
"holdP" : "Time Horizon",
"return": "Investment Return"
}
#*** Just get the dataframes updated and our work here is done.
ttl = "SPY Returns by Holding Period and Simple Average Sentiment\n"
spyfig = px.scatter_3d(finalSPY, x="sentiment", y="holdP", z="return",
hover_name="date", color="holdP",range_color = [0,12],
range_x=[0.43,0.29], title = ttl,labels = lbls)
spyfig.show()
ttl = "TSLA Returns by Holding Period and Simple Average Sentiment\n"
tslafig = px.scatter_3d(finalTSLA, x="sentiment", y="holdP", z="return",
hover_name="date", color="holdP",range_color = [0,12],
range_x=[0.8,0.67], title = ttl,labels = lbls)
tslafig.show()
ttl = "GME Returns by Holding Period and Simple Average Sentiment\n"
gmefig = px.scatter_3d(finalGME, x="sentiment", y="holdP", z="return",
hover_name="date", color="holdP",range_color = [0,12],
range_x=[0.7,0.4], title = ttl,labels = lbls)
gmefig.show()
ttl = "XOM Returns by Holding Period and Simple Average Sentiment\n"
xomfig = px.scatter_3d(finalXOM, x="sentiment", y="holdP", z="return",
hover_name="date", color="holdP",range_color = [0,12],
range_x=[0.85,0.6], title = ttl,labels = lbls)
xomfig.show()