Pandas and Statsmodels Exercises
Part 1
import pandas as pd
import numpy as np
import statsmodels.api as sm
df = pd.read_csv("anscombe.csv")
print ("The mean")
print (df.groupby("dataset")["x", "y"].mean())
print ("\nThe variance")
print (df.groupby("dataset")["x", "y"].var())
print ("\nThe correlation coefficient between x and y")
print (df.groupby("dataset")["x", "y"].corr())
print ("\nThe linear regression line: y = β0 + β1x + ϵ")
y = df.y
x = df.x
x = sm.add_constant(x)
est = sm.OLS(y, x).fit()
print (est.summary())
Result of Part 1
Part 2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("anscombe.csv")
g = sns.FacetGrid(df, col = "dataset", hue = "dataset")
g.map(plt.scatter, "x", "y")
plt.savefig("seaborn.png")
plt.show()
Result of Part 2