'''Multiple linear regression

Input:
    x, y... column vectors

Output:
    x = [x1, x2, ..., xk]... array of columns (resulting in a 2d matrix)
    beta... estimated regression coefficients
    yhat... predicted values
    ybar... mean value
    Syy, SSE, SSR... total variability
    R2... coefficient of determination

Typical call of program:
    from python18_2 import fct
    from python18_examples import vending_machine
    import numpy as np
    x1, x2, y = vending_machine()
    x = np.concatenate((x1, x2), axis=1)
    res = fct(x,y)
    
Check python18_examples for more examples
'''

import numpy as np

def fct(x,y):
    assert y.shape[1] == 1  # y has to be a column vector
    
    X = np.ones((x.shape[0],x.shape[1] + 1))
    X[:,1:] = x
    
    beta = np.linalg.solve(X.T.dot(X), X.T.dot(y))
    yhat = X.dot(beta)
    SSE = np.sum((y -yhat)**2)
    
    ybar = np.mean(y)
    Syy = np.sum((y - ybar)**2)
    
    SSR = Syy - SSE
    R2 = SSR/Syy

    print('X = ', X)
    print('beta = ', beta)
    print('yhat = ', yhat)
    print('ybar = ', ybar)
    print('Syy = ', Syy)
    print('SSE = ', SSE)
    print('SSR = ', SSR)
    print('R2 = ', R2)
    
    return X, beta, yhat, ybar, Syy, SSE, SSR, R2