import varImp
import numpy as np
from sklearn import linear_model
from sklearn import preprocessing
from scipy.interpolate import interp1d
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
n = 100
p = 1000
s = 10
X = np.random.normal(size = [n, p])
beta = np.array([1] * 5 + [2] * 4 + [5] + [0] * (p - s))
Y = np.dot(X, beta) + np.random.normal(size = n)
j = 9
alphas, _, coefs = linear_model.lars_path(X, Y, Gram = None, method = 'lasso', return_path = True)
alphas_j, _, coefs_j = linear_model.lars_path(np.delete(X, j, axis = 1), Y, Gram=None, method = 'lasso', return_path=True)
plt.subplot(121)
for i in range(coefs.shape[0]):
plt.plot(alphas, coefs[i,:])
plt.title("Before removal ")
plt.ylim(-1, 5)
plt.subplot(122)
for i in range(coefs_j.shape[0]):
plt.plot(alphas_j, coefs_j[i,:], )
plt.title("After removal")
plt.ylim(-1, 5)
j = 7
alphas, _, coefs = linear_model.lars_path(X, Y, Gram = None, method = 'lasso', return_path = True)
alphas_j, _, coefs_j = linear_model.lars_path(np.delete(X, j, axis = 1), Y, Gram=None, method = 'lasso', return_path=True)
plt.subplot(121)
for i in range(coefs.shape[0]):
plt.plot(alphas, coefs[i,:])
plt.title("Before removal ")
plt.ylim(-1, 5)
plt.subplot(122)
for i in range(coefs_j.shape[0]):
plt.plot(alphas_j, coefs_j[i,:], )
plt.title("After removal")
plt.ylim(-1, 5)
j = 20
alphas_j, _, coefs_j = linear_model.lars_path(np.delete(X, j, axis = 1), Y, Gram=None, method = 'lasso', return_path=True)
plt.subplot(121)
for i in range(coefs.shape[0]):
plt.plot(alphas, coefs[i,:])
plt.title("Before removal ")
plt.subplot(122)
for i in range(coefs_j.shape[0]):
plt.plot(alphas_j, coefs_j[i,:])
plt.title("After removal")
where the $L_p$ norm inside the $l_q$ norm is the $L_p$ norm of a function, defined via: $$ || f(\lambda) ||_p = \big (\int |f(\lambda)|^p d\lambda \big ) ^ {\frac{1}{p}} $$
For convinience, we use $p=q=2$ in this demo.
for i in range(10):
print("Now compting {}th TS: {}".format(i, varImp.LOCO_TS(varImp.ExtractPath_LARS(X, Y, i, 0))))
for i in range(10, 20):
print("Now compting {}th TS: {}".format(i, varImp.LOCO_TS(varImp.ExtractPath_LARS(X, Y, i, 0))))
imp_list = [ varImp.LOCO_TS(varImp.ExtractPath_LARS(X, Y, i, 0)) for i in range(0, 30) ]
parse_score = (imp_list - min(imp_list)) / (max(imp_list) - min(imp_list))
parse_score
plt.bar(list(range(30)),parse_score, width = 0.2)
plt.scatter(list(range(30)), parse_score)
imp_list = [ varImp.LOCO_TS(varImp.ExtractPath_LARS(X, Y, i, 0)) for i in range(0, 100) ]
plt.bar(list(range(100)),imp_list, width = 0.4)
plt.scatter(list(range(100)), imp_list)
plt.ylim(0, 6)
imp_list