%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import math
from mpl_toolkits.mplot3d import Axes3D
from random import random
#from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression


def set_spines():
    ax = plt.gca()  # gca stands for 'get current axis'
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data',0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data',0))
    ax.set_ylabel('x')


x = np.linspace(-10,10)
f = lambda x : 7*x + 3 + np.random.normal(-10,10)
y =[f(i) for i in x]
plt.scatter(x,y)
set_spines()


def gradient_descent(X, Y, h=0.1, E=100):
    '''
    
    Calculation of cost (error) function of simple linear regression
   
    Parrameters : 
    X(array or number) : X arguments, independent variable
    Y (array or number) : Y arguments, actual dependent variable 
    C (number) :  learning rate
    I(int) : number of iteration(Epoch)
    Returns :
      number of cost function
   '''
   
    a = b = 0 #initial value
    a_args  = []
    b_args = []
    n = X.size
  
    for i in range(E):
        a = a + 2/n*(np.sum((Y - a*X - b)*X))*h
        b =  b + 2/n*(np.sum((Y - a*X -b)))*h
        a_args.append(a)
        b_args.append(b)
    return a,b,a_args,b_args


a, b, a_r, b_r = gradient_descent(x, y, h=0.01, E=400)


plt.plot(a_r,color='r',label=r'$\alpha $')
plt.plot(b_r,label=r'$\beta$')
plt.xlabel("number of epoch E")
plt.ylabel("etimated parameters")
plt.title('C = 0.01 , E = 1000')
plt.legend()
a,b

(6.897191437619058, -9.92500735399942)


plt.scatter(x,y) 
plt.plot(x,6.918205132062*x - 9.92,color='r')
set_spines()


a,b,a_r,b_r = gradient_descent(x,y,h=0.05,E=150)
plt.plot(a_r,color='r',label=r'$\alpha $')
plt.plot(b_r,label=r'$\beta$')
plt.xlabel("number of epoch E")
plt.ylabel("etimated parameters")
plt.title('h=0.05, E=150')
plt.legend()
a,b

(-5.334157361845766e+59, 2.3845980189037437e+42)


plt.scatter(x,y) 
plt.plot(x,-5.3*x +  2.476)
set_spines()


a,b,a_r,b_r = gradient_descent(x,y,h=0.00005,E=150)
plt.plot(a_r,color='r',label=r'$\alpha $')
plt.plot(b_r,label=r'$\beta$')
plt.xlabel("number of epoch E")
plt.ylabel("etimated parameters")
plt.title('h=0.0000, E=150')
plt.legend()
a,b

(2.7806585335865357, 0.04611991087616225)


plt.scatter(x,y) 
plt.plot(x,2.7*x +  0.01)
set_spines()


a,b,a_r,b_r = gradient_descent(x,y,h=0.00005,E=21150)
plt.plot(a_r,color='r',label=r'$\alpha $')
plt.plot(b_r,label=r'$\beta$')
plt.xlabel("number of epoch E")
plt.ylabel("etimated parameters")
plt.title('h=0.0000, E=1150')
plt.legend()
a,b

(6.89719143761893, -8.730548354552354)


plt.scatter(x,y) 
plt.plot(x,6.8*x - 8.7)
set_spines()


class MultipleLinearRegression:
    """
    Multiple Linear regression

    Parameters
    ..........

    C : float
        learning rate
    I : int
        number of iterations (Epochs)
    Attributes
    ..........
     w_ : weights (Predictors)

    """

    def __init__(self, C=0.001, I=50):
        self.C = C
        self.I = I
        self.w_ = None

    def fit(self, X, Y):
        """Fit tthe training data

        Parameters
        ..........

        X : array, shape = [N_samples,n_features]
            Training samples
        Y  : array, shape = [n_samples, n_target_values]

        Returns
        .......
        self : object

        """

        X, Y = self._validate(X, Y)
        self.w_ = np.ones((X.shape[1], 1))  # inicialized predictors
        M = X.shape[0]  # numbers of samples
        for _ in range(self.I):
            self.w_ =self.w_ -  self.C * (1 / M) * X.T.dot((X.dot(self.w_) - Y))
            #self.w -= self.C * (1 / M) * X.T.dot((X.dot(self.w) - Y))


        return self

    def predict(self, x):
        """ Predicts the value after the model has been trained.
        Parameters
        ----------
        x : array-like, shape = [n_samples, n_features]
            Test samples
        Returns
        -------
        Predicted value
        """
        x = np.append(np.ones((x.shape[0], 1)), x, axis=1)
        return np.dot(x, self.w_)

    def score(self,X, Y):
        """Calculation of accuracy using (R^2 score)
             x : array-like, shape = [n_samples, n_features]
             y : array-like, shape = [n_samples, n_features]
            Test samples
        Returns
        float : score
        -------
        Predicted value

        """
        #X = np.append(np.ones((X.shape[0], 1)), X, axis=1)
        #Y = np.array(Y).reshape(X.shape[0], 1)
        X,Y = self._validate(X,Y)
        y_prime = np.dot(X, self.w_)
        ssr = np.sum((y_prime - Y) ** 2)
        sst = np.sum((Y - np.mean(y_prime)) ** 2)
        r2_score = 1 - (ssr / sst)
        return r2_score
    
    def _validate(self,X,y):
        '''Added tow with one number to X data
        reshape Y data


        :param X: array-like, shape = [n_samples, n_features]
        :param y: array-like, shape = [n_samples, n_target_values]
        :return: validated X,Y
        '''
        return   np.append(np.ones((X.shape[0], 1)), X, axis=1),\
                 np.array(y).reshape(y.shape[0], 1)


#load dataset
df = pd.read_csv("../resources/data/insurance.csv")  
df.head()


cols = ['sex', 'smoker', 'region']
new_df = pd.get_dummies(df, cols, drop_first= True)
new_df.head()


X = new_df[['age', 'bmi', 'smoker_yes']]
y = new_df['charges']


We will perfomr standarlization over X,  before spliting into test and train data.


X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)


model =  MultipleLinearRegression(C=0.01,I=1000)
z = model.fit(X_train,y_train)


model.score(X_test,y_test)

0.7505450496652133


Let see how will be in standart LinearRegresion in sklearn in Python


lr = LinearRegression(fit_intercept=True)
lr.fit(X_train,y_train)

LinearRegression()


lr.score(X_test,y_test)

0.7499131966055586


our_predicted_data = model.predict(X_test)[:,0]
python_predicted_data = lr.predict(X_test)
pd.DataFrame(our_predicted_data ,python_predicted_data  )


ssr = np.sum((our_predicted_data - python_predicted_data) ** 2)
sst = np.sum((our_predicted_data - np.mean(python_predicted_data)) ** 2)
r2_score = 1 - (ssr / sst)
r2_score

0.9999999904989635

	age	sex	bmi	children	smoker	region	charges
0	19	female	27.900	0	yes	southwest	16884.92400
1	18	male	33.770	1	no	southeast	1725.55230
2	28	male	33.000	3	no	southeast	4449.46200
3	33	male	22.705	0	no	northwest	21984.47061
4	32	male	28.880	0	no	northwest	3866.85520

	age	bmi	children	charges	sex_male	smoker_yes	region_northwest	region_southeast	region_southwest
0	19	27.900	0	16884.92400	0	1	0	0	1
1	18	33.770	1	1725.55230	1	0	0	1	0
2	28	33.000	3	4449.46200	1	0	0	1	0
3	33	22.705	0	21984.47061	1	0	1	0	0
4	32	28.880	0	3866.85520	1	0	1	0	0

	0
6641.345993	6641.679451
3038.320627	3038.165225
7606.316258	7606.411403
7791.440926	7790.606893
12073.850781	12072.767689
...	...
11615.034360	11615.967093
6888.739724	6887.629092
35292.083162	35291.359220
26483.190385	26482.121680
9514.839654	9514.541685

Linear Regression¶

Simple linear regression

Gradient Descent over simple linear regression

Effect of different values for learning rate

2. Multiple Linear Regression in matrix form.

Implementation of gradient descent for Multiple Linear regression using NUMPY

Test of our implemntation in 'insurance.csv' dataset

The probabilistic approach to linear regression.Maximum likelihood estimation.

References