import random
import numpy
import csv
from matplotlib import pyplot as plt
from collections import defaultdict

def readRows(header, r):
    prevID = None
    seqY, predVec, timeVec = [], [], []
    for row in r:
        d = dict(zip(header, row))
        if len(seqY) and d['ID'] != prevID:
            yield seqY, predVec, timeVec, prevID
            seqY, predVec, timeVec = [], [], []
        prevID = d['ID']
        y = float(d['CONC'])
        pred = float(d['PRED'])
        seqY.append(y)
        predVec.append(pred)
        timeVec.append(float(d['TIME']))

nObs = 47

def readCSV(fname):
    f = open(fname, 'r')
    r = csv.reader(f)
    header = next(r)
    yy = []
    predVecs = []
    timeVecs = []
    ids = []
    for y, predVec, timeVec, pid in readRows(header, r):
        yy.append(y)
        predVecs.append(predVec)
        timeVecs.append(timeVec)
        ids.append(int(pid))
    ids = [ids[i] for i in range(len(ids)) if len(yy[i]) == nObs] # Make sure all instances have the same length
    yy = [y for y in yy if len(y) == nObs]
    predVecs = [p for p in predVecs if len(p) == nObs]
    timeVecs = [t for t in timeVecs if len(t) == nObs]
    return {"ids": numpy.array(ids),
            "yy": numpy.array(yy),
            "predVecs": numpy.array(predVecs),
            "timeVecs": numpy.array(timeVecs)}

# First model from Jerry
MEM1 = readCSV("merged.csv")

# Better model from Jerry
MEM2 = readCSV("merged3.csv")

def readResults(fname):
    f = open(fname, 'r')
    r = csv.reader(f)
    ids = []
    predVecs = []
    predVec = []
    for row in r:
        i,val = int(row[0]), float(row[1])
        if ids == [] or i != ids[-1]:
            ids.append(i)
        predVec.append(val)
        if len(predVec) == nObs:
            predVecs.append(predVec)
            predVec = []
    return {"ids": numpy.array(ids),
            "predVecs": numpy.array(predVecs)}

LSTM_MSE = readResults("outMSE.csv")
LSTM_ABS = readResults("outABS.csv")

### Specify the prediction vectors from each model

ids = MEM1['ids']
timeVecs = MEM1['timeVecs'] # Should be the same for all models
yy = MEM1['yy'] # Should be the same for all models

yMEM1 = MEM1['predVecs']
yMEM2 = MEM2['predVecs']
yLSTM_MSE = LSTM_MSE['predVecs']
yLSTM_ABS = LSTM_ABS['predVecs']

### Error metrics

def MSE(preds, labels):
    ps = numpy.reshape(preds, [-1])
    ls = numpy.reshape(labels, [-1])
    return sum((ls - ps)**2) / len(ls)

def PE(preds, labels):
    ps = numpy.reshape(preds, [-1])
    ls = numpy.reshape(labels, [-1])
    error = numpy.median(numpy.abs(numpy.nan_to_num((ls - ps)/ps)))
    bias = numpy.median(numpy.nan_to_num((ls - ps)/ps))
    return error, bias

Basic error metrics¶

for name, y in [("Four-compartment MEM             ", yMEM1),
                ("MEM2                             ", yMEM2),
                ("GRU, optimized for MSE           ", yLSTM_MSE),
                ("GRU, optimized for Absolute Error", yLSTM_ABS)]:
    print("Overall Mean Squared Error, " + name + ": " + str(MSE(y, yy)))

Overall Mean Squared Error, Four-compartment MEM             : 22.90962634727098
Overall Mean Squared Error, MEM2                             : 22.413902022703645
Overall Mean Squared Error, GRU, optimized for MSE           : 15.99085036932094
Overall Mean Squared Error, GRU, optimized for Absolute Error: 20.831857451768215

for name, y in [("Four-compartment MEM             ", yMEM1),
                ("MEM2                             ", yMEM2),
                ("GRU, optimized for MSE           ", yLSTM_MSE),
                ("GRU, optimized for Absolute Error", yLSTM_ABS)]:
    e,b = PE(y, yy)
    print("Overall scaled error, " + name + ": " + str(e))
    print("Overall bias,         " + name + ": " + str(b))

Overall scaled error, Four-compartment MEM             : 0.58892330817696
Overall bias,         Four-compartment MEM             : -0.22123357862841592
Overall scaled error, MEM2                             : 0.5700939796710535
Overall bias,         MEM2                             : -0.30852311742058114
Overall scaled error, GRU, optimized for MSE           : 0.9207569461174572
Overall bias,         GRU, optimized for MSE           : -0.04660113282609636
Overall scaled error, GRU, optimized for Absolute Error: 0.5227238701042003
Overall bias,         GRU, optimized for Absolute Error: -0.3380311978126914

/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide
  after removing the cwd from sys.path.
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in true_divide
  """

def plotOne(y, yMEM, yLSTM, X, i, error = "Absolute"):
    if error == "Absolute":
        MyMEM,_ = PE(y, yMEM)
        MyLSTM,_ = PE(y, yLSTM)
    else:
        MyMEM = MSE(y, yMEM)
        MyLSTM = MSE(y, yLSTM)
    plt.figure(figsize=(16,6))
    plt.scatter(X, y, label='Concentration')
    plt.plot(X, yMEM, label='Mixed Effects Model (' + error + ' Error = ' + str(MyMEM)[:5] + ')')
    plt.plot(X, yLSTM, label='Gated Recurrent Unit (' + error + ' Error = ' + str(MyLSTM)[:5] + ')')
    plt.xlabel("Time")
    plt.ylabel("Concentration (predicted and observed)")
    plt.title("Predicted and actual concentration versus time, instance ID " + str(i))
    plt.legend(loc="best")
    plt.xlim(1,20)
    yMax = int(max(max(y), max(yMEM), max(yLSTM)) + 1)
    plt.ylim(0,yMax)
    plt.show()
    plt.figure(figsize=(16,6))
    plt.plot(X, [0]*len(X), 'k--')
    plt.plot(X, list(y - yMEM), label="Residuals, MEM")
    #plt.plot(X, list(y - numpy.reshape(preds, [-1])), label="Residuals, LSTM (patient features only)")
    plt.plot(X, list(y - numpy.reshape(yLSTM, [-1])), label="Residuals, Gated Recurrent Unit")
    plt.xlabel("Time")
    plt.ylabel("Residual")
    plt.title("Residuals versus time, instance ID " + str(i))
    plt.legend(loc="best")
    plt.xlim(1,47)
    plt.show()

Absolute error plots, per instance¶

for y, yMEM, yLSTM, X, i in zip(yy, yMEM2, yLSTM_ABS, timeVecs, ids):
    plotOne(y, yMEM, yLSTM, X, i, "Absolute")

/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in true_divide
  after removing the cwd from sys.path.
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide
  after removing the cwd from sys.path.
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in true_divide
  """
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in true_divide
  """

Mean squared error plots, per instance¶

for y, yMEM, yLSTM, X, i in zip(yy, yMEM2, yLSTM_MSE, timeVecs, ids):
    plotOne(y, yMEM, yLSTM, X, i, "Mean Squared")

Error versus bias¶

Xmem = []
Ymem = []
Xlstm = []
Ylstm = []
for i in range(-2000, 2001):
    z = 1 + i*0.005
    PEmem = numpy.nan_to_num((yy - yMEM2*z)/(yMEM2*z))
    PElstm = numpy.nan_to_num((yy - yLSTM_ABS*z)/(yLSTM_ABS*z))
    Xmem.append(numpy.median(PEmem))
    Xlstm.append(numpy.median(PElstm))
    Ymem.append(numpy.median(numpy.abs(PEmem)))
    Ylstm.append(numpy.median(numpy.abs(PElstm)))
Xmem = numpy.array(Xmem)
Ymem = numpy.array(Ymem)
Xlstm = numpy.array(Xlstm)
Ylstm = numpy.array(Ylstm)

plt.figure(figsize=(16,6))
plt.plot(Xmem, Ymem, label='Mixed Effects Model')
plt.plot(Xlstm, Ylstm, label='Gated Recurrent Unit')
plt.xlabel("Bias")
plt.ylabel("Error")
plt.legend(loc="best")
plt.xlim(-0.5,0.1)
plt.ylim(0.5,0.85)
plt.title("Error vs Bias")
plt.show()

/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:7: RuntimeWarning: divide by zero encountered in true_divide
  import sys
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:8: RuntimeWarning: divide by zero encountered in true_divide
  
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
  
/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:70: RuntimeWarning: overflow encountered in reduce
  ret = umr_sum(arr, axis, dtype, out, keepdims)

Residuals versus time¶

yy_ = numpy.reshape(yy, [-1])
yLSTM_ = numpy.reshape(yLSTM_ABS, [-1])
yMEM_ = numpy.reshape(yMEM2, [-1])
timeVecs_ = numpy.reshape(timeVecs, [-1])
vals = list(zip(timeVecs_,yy_,yLSTM_,yMEM_))
vals.sort()
K = 100
x = []
y = []
pLSTM = []
pMEM = []
resLSTM = []
resMEM = []
resLSTMabs = []
resMEMabs = []
resLSTMscaled = []
resMEMscaled = []
for i in range(len(vals)-10):
    sliding = vals[i:i+K]
    x_ = numpy.array([x[0] for x in sliding])
    y_ = numpy.array([x[1] for x in sliding])
    pLSTM_ = numpy.array([x[2] for x in sliding])
    pMEM_ = numpy.array([x[3] for x in sliding])
    resLSTM_ = pLSTM_ - y_
    resMEM_ = pMEM_ - y_
    x.append(numpy.mean(x_))
    y.append(numpy.mean(y_))
    pLSTM.append(numpy.mean(pLSTM_))
    pMEM.append(numpy.mean(pMEM_))
    resLSTM.append(numpy.mean(resLSTM_))
    resMEM.append(numpy.mean(resMEM_))
    resLSTMabs.append(numpy.mean(numpy.abs(resLSTM_)))
    resMEMabs.append(numpy.mean(numpy.abs(resMEM_)))
    resLSTMscaled.append(numpy.median(numpy.abs(numpy.nan_to_num((y_ - pLSTM_)/pLSTM_))))
    resMEMscaled.append(numpy.median(numpy.abs(numpy.nan_to_num((y_ - pMEM_)/pMEM_))))

plt.figure(figsize=(16,6))
plt.plot(x, [0]*len(x), 'k--')
plt.plot(x, resMEM, label='Mixed Effects Model')
plt.plot(x, resLSTM, label='Gated Recurrent Unit')
plt.xlabel("Time")
plt.ylabel("Residual")
plt.legend(loc="best")
plt.xlim(0,20)
#plt.ylim(0.5,0.85)
plt.title("Residuals over time (" + str(K) + " observation sliding window)")
plt.show()

plt.figure(figsize=(16,6))
plt.plot(x, resMEMabs, label='Mixed Effects Model')
plt.plot(x, resLSTMabs, label='Gated Recurrent Unit')
plt.xlabel("Time")
plt.ylabel("Absolute residual")
plt.legend(loc="best")
plt.xlim(0,20)
#plt.ylim(0.5,0.85)
plt.title("Absolute residuals over time (" + str(K) + " observation sliding window)")
plt.show()

plt.figure(figsize=(16,6))
plt.plot(x, resMEMscaled, label='Mixed Effects Model')
plt.plot(x, resLSTMscaled, label='Gated Recurrent Unit')
plt.xlabel("Time")
plt.ylabel("Scaled absolute residual")
plt.legend(loc="best")
plt.xlim(0,20)
#plt.ylim(0.5,0.85)
plt.title("Scaled absolute residuals over time (" + str(K) + " observation sliding window)")
plt.show()

/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:35: RuntimeWarning: invalid value encountered in true_divide

yy_ = numpy.reshape(yy, [-1])
yLSTM_ = numpy.reshape(yLSTM_ABS, [-1])
yMEM_ = numpy.reshape(yMEM2, [-1])
timeVecs_ = numpy.reshape(timeVecs, [-1])
valsLSTM = list(zip(yLSTM_,yy_))
valsMEM = list(zip(yMEM_,yy_))
valsLSTMabs = list(zip(yLSTM_, numpy.abs(numpy.nan_to_num((yy_ - yLSTM_)/yLSTM_))))
valsMEMabs = list(zip(yMEM_, numpy.abs(numpy.nan_to_num((yy_ - yMEM_)/yMEM_))))
valsLSTM.sort()
valsMEM.sort()
valsLSTMabs.sort()
valsMEMabs.sort()
K = 50
for vals,ylab,title in \
    [(valsLSTM, "Concentration", "Prediction vs. Concentration, GRU"),
     (valsMEM, "Concentration", "Prediction vs. Concentration, MEM"),
     (valsLSTMabs, "Scaled absolute residual", "Prediction vs. residual, GRU"),
     (valsMEMabs, "Scaled absolute residual", "Prediction vs. residual, MEM")]:
    x = []
    y = []
    p = []
    res = []
    resabs = []
    resscaled = []
    scatX = [x[0] for x in vals]
    scatY = [x[1] for x in vals]
    for i in range(len(vals)-10):
        sliding = vals[i:i+K]
        p_ = numpy.array([x[0] for x in sliding])
        y_ = numpy.array([x[1] for x in sliding])
        x.append(numpy.mean(p_))
        y.append(numpy.mean(y_))

    plt.figure(figsize=(6,6))
    if max(scatY) > 10:
        plt.ylim(0,25)
        plt.plot([0,100],[0,100], 'k-')
    else:
        plt.ylim(0,1)
    plt.scatter(scatX, scatY, lw = 0)
    plt.plot(x,y,'r', lw=2)
    plt.title(title)
    plt.xlabel("Prediction")
    plt.ylabel(ylab)
    #plt.legend(loc="best")
    plt.xlim(0,25)
    plt.show()

/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide