import random
import numpy
import csv
from matplotlib import pyplot as plt
from collections import defaultdict
def readRows(header, r):
prevID = None
seqY, predVec, timeVec = [], [], []
for row in r:
d = dict(zip(header, row))
if len(seqY) and d['ID'] != prevID:
yield seqY, predVec, timeVec, prevID
seqY, predVec, timeVec = [], [], []
prevID = d['ID']
y = float(d['CONC'])
pred = float(d['PRED'])
seqY.append(y)
predVec.append(pred)
timeVec.append(float(d['TIME']))
nObs = 47
def readCSV(fname):
f = open(fname, 'r')
r = csv.reader(f)
header = next(r)
yy = []
predVecs = []
timeVecs = []
ids = []
for y, predVec, timeVec, pid in readRows(header, r):
yy.append(y)
predVecs.append(predVec)
timeVecs.append(timeVec)
ids.append(int(pid))
ids = [ids[i] for i in range(len(ids)) if len(yy[i]) == nObs] # Make sure all instances have the same length
yy = [y for y in yy if len(y) == nObs]
predVecs = [p for p in predVecs if len(p) == nObs]
timeVecs = [t for t in timeVecs if len(t) == nObs]
return {"ids": numpy.array(ids),
"yy": numpy.array(yy),
"predVecs": numpy.array(predVecs),
"timeVecs": numpy.array(timeVecs)}
# First model from Jerry
MEM1 = readCSV("merged.csv")
# Better model from Jerry
MEM2 = readCSV("merged3.csv")
def readResults(fname):
f = open(fname, 'r')
r = csv.reader(f)
ids = []
predVecs = []
predVec = []
for row in r:
i,val = int(row[0]), float(row[1])
if ids == [] or i != ids[-1]:
ids.append(i)
predVec.append(val)
if len(predVec) == nObs:
predVecs.append(predVec)
predVec = []
return {"ids": numpy.array(ids),
"predVecs": numpy.array(predVecs)}
LSTM_MSE = readResults("outMSE.csv")
LSTM_ABS = readResults("outABS.csv")
### Specify the prediction vectors from each model
ids = MEM1['ids']
timeVecs = MEM1['timeVecs'] # Should be the same for all models
yy = MEM1['yy'] # Should be the same for all models
yMEM1 = MEM1['predVecs']
yMEM2 = MEM2['predVecs']
yLSTM_MSE = LSTM_MSE['predVecs']
yLSTM_ABS = LSTM_ABS['predVecs']
### Error metrics
def MSE(preds, labels):
ps = numpy.reshape(preds, [-1])
ls = numpy.reshape(labels, [-1])
return sum((ls - ps)**2) / len(ls)
def PE(preds, labels):
ps = numpy.reshape(preds, [-1])
ls = numpy.reshape(labels, [-1])
error = numpy.median(numpy.abs(numpy.nan_to_num((ls - ps)/ps)))
bias = numpy.median(numpy.nan_to_num((ls - ps)/ps))
return error, bias
for name, y in [("Four-compartment MEM ", yMEM1),
("MEM2 ", yMEM2),
("GRU, optimized for MSE ", yLSTM_MSE),
("GRU, optimized for Absolute Error", yLSTM_ABS)]:
print("Overall Mean Squared Error, " + name + ": " + str(MSE(y, yy)))
for name, y in [("Four-compartment MEM ", yMEM1),
("MEM2 ", yMEM2),
("GRU, optimized for MSE ", yLSTM_MSE),
("GRU, optimized for Absolute Error", yLSTM_ABS)]:
e,b = PE(y, yy)
print("Overall scaled error, " + name + ": " + str(e))
print("Overall bias, " + name + ": " + str(b))
def plotOne(y, yMEM, yLSTM, X, i, error = "Absolute"):
if error == "Absolute":
MyMEM,_ = PE(y, yMEM)
MyLSTM,_ = PE(y, yLSTM)
else:
MyMEM = MSE(y, yMEM)
MyLSTM = MSE(y, yLSTM)
plt.figure(figsize=(16,6))
plt.scatter(X, y, label='Concentration')
plt.plot(X, yMEM, label='Mixed Effects Model (' + error + ' Error = ' + str(MyMEM)[:5] + ')')
plt.plot(X, yLSTM, label='Gated Recurrent Unit (' + error + ' Error = ' + str(MyLSTM)[:5] + ')')
plt.xlabel("Time")
plt.ylabel("Concentration (predicted and observed)")
plt.title("Predicted and actual concentration versus time, instance ID " + str(i))
plt.legend(loc="best")
plt.xlim(1,20)
yMax = int(max(max(y), max(yMEM), max(yLSTM)) + 1)
plt.ylim(0,yMax)
plt.show()
plt.figure(figsize=(16,6))
plt.plot(X, [0]*len(X), 'k--')
plt.plot(X, list(y - yMEM), label="Residuals, MEM")
#plt.plot(X, list(y - numpy.reshape(preds, [-1])), label="Residuals, LSTM (patient features only)")
plt.plot(X, list(y - numpy.reshape(yLSTM, [-1])), label="Residuals, Gated Recurrent Unit")
plt.xlabel("Time")
plt.ylabel("Residual")
plt.title("Residuals versus time, instance ID " + str(i))
plt.legend(loc="best")
plt.xlim(1,47)
plt.show()
for y, yMEM, yLSTM, X, i in zip(yy, yMEM2, yLSTM_ABS, timeVecs, ids):
plotOne(y, yMEM, yLSTM, X, i, "Absolute")
for y, yMEM, yLSTM, X, i in zip(yy, yMEM2, yLSTM_MSE, timeVecs, ids):
plotOne(y, yMEM, yLSTM, X, i, "Mean Squared")
Xmem = []
Ymem = []
Xlstm = []
Ylstm = []
for i in range(-2000, 2001):
z = 1 + i*0.005
PEmem = numpy.nan_to_num((yy - yMEM2*z)/(yMEM2*z))
PElstm = numpy.nan_to_num((yy - yLSTM_ABS*z)/(yLSTM_ABS*z))
Xmem.append(numpy.median(PEmem))
Xlstm.append(numpy.median(PElstm))
Ymem.append(numpy.median(numpy.abs(PEmem)))
Ylstm.append(numpy.median(numpy.abs(PElstm)))
Xmem = numpy.array(Xmem)
Ymem = numpy.array(Ymem)
Xlstm = numpy.array(Xlstm)
Ylstm = numpy.array(Ylstm)
plt.figure(figsize=(16,6))
plt.plot(Xmem, Ymem, label='Mixed Effects Model')
plt.plot(Xlstm, Ylstm, label='Gated Recurrent Unit')
plt.xlabel("Bias")
plt.ylabel("Error")
plt.legend(loc="best")
plt.xlim(-0.5,0.1)
plt.ylim(0.5,0.85)
plt.title("Error vs Bias")
plt.show()
yy_ = numpy.reshape(yy, [-1])
yLSTM_ = numpy.reshape(yLSTM_ABS, [-1])
yMEM_ = numpy.reshape(yMEM2, [-1])
timeVecs_ = numpy.reshape(timeVecs, [-1])
vals = list(zip(timeVecs_,yy_,yLSTM_,yMEM_))
vals.sort()
K = 100
x = []
y = []
pLSTM = []
pMEM = []
resLSTM = []
resMEM = []
resLSTMabs = []
resMEMabs = []
resLSTMscaled = []
resMEMscaled = []
for i in range(len(vals)-10):
sliding = vals[i:i+K]
x_ = numpy.array([x[0] for x in sliding])
y_ = numpy.array([x[1] for x in sliding])
pLSTM_ = numpy.array([x[2] for x in sliding])
pMEM_ = numpy.array([x[3] for x in sliding])
resLSTM_ = pLSTM_ - y_
resMEM_ = pMEM_ - y_
x.append(numpy.mean(x_))
y.append(numpy.mean(y_))
pLSTM.append(numpy.mean(pLSTM_))
pMEM.append(numpy.mean(pMEM_))
resLSTM.append(numpy.mean(resLSTM_))
resMEM.append(numpy.mean(resMEM_))
resLSTMabs.append(numpy.mean(numpy.abs(resLSTM_)))
resMEMabs.append(numpy.mean(numpy.abs(resMEM_)))
resLSTMscaled.append(numpy.median(numpy.abs(numpy.nan_to_num((y_ - pLSTM_)/pLSTM_))))
resMEMscaled.append(numpy.median(numpy.abs(numpy.nan_to_num((y_ - pMEM_)/pMEM_))))
plt.figure(figsize=(16,6))
plt.plot(x, [0]*len(x), 'k--')
plt.plot(x, resMEM, label='Mixed Effects Model')
plt.plot(x, resLSTM, label='Gated Recurrent Unit')
plt.xlabel("Time")
plt.ylabel("Residual")
plt.legend(loc="best")
plt.xlim(0,20)
#plt.ylim(0.5,0.85)
plt.title("Residuals over time (" + str(K) + " observation sliding window)")
plt.show()
plt.figure(figsize=(16,6))
plt.plot(x, resMEMabs, label='Mixed Effects Model')
plt.plot(x, resLSTMabs, label='Gated Recurrent Unit')
plt.xlabel("Time")
plt.ylabel("Absolute residual")
plt.legend(loc="best")
plt.xlim(0,20)
#plt.ylim(0.5,0.85)
plt.title("Absolute residuals over time (" + str(K) + " observation sliding window)")
plt.show()
plt.figure(figsize=(16,6))
plt.plot(x, resMEMscaled, label='Mixed Effects Model')
plt.plot(x, resLSTMscaled, label='Gated Recurrent Unit')
plt.xlabel("Time")
plt.ylabel("Scaled absolute residual")
plt.legend(loc="best")
plt.xlim(0,20)
#plt.ylim(0.5,0.85)
plt.title("Scaled absolute residuals over time (" + str(K) + " observation sliding window)")
plt.show()
yy_ = numpy.reshape(yy, [-1])
yLSTM_ = numpy.reshape(yLSTM_ABS, [-1])
yMEM_ = numpy.reshape(yMEM2, [-1])
timeVecs_ = numpy.reshape(timeVecs, [-1])
valsLSTM = list(zip(yLSTM_,yy_))
valsMEM = list(zip(yMEM_,yy_))
valsLSTMabs = list(zip(yLSTM_, numpy.abs(numpy.nan_to_num((yy_ - yLSTM_)/yLSTM_))))
valsMEMabs = list(zip(yMEM_, numpy.abs(numpy.nan_to_num((yy_ - yMEM_)/yMEM_))))
valsLSTM.sort()
valsMEM.sort()
valsLSTMabs.sort()
valsMEMabs.sort()
K = 50
for vals,ylab,title in \
[(valsLSTM, "Concentration", "Prediction vs. Concentration, GRU"),
(valsMEM, "Concentration", "Prediction vs. Concentration, MEM"),
(valsLSTMabs, "Scaled absolute residual", "Prediction vs. residual, GRU"),
(valsMEMabs, "Scaled absolute residual", "Prediction vs. residual, MEM")]:
x = []
y = []
p = []
res = []
resabs = []
resscaled = []
scatX = [x[0] for x in vals]
scatY = [x[1] for x in vals]
for i in range(len(vals)-10):
sliding = vals[i:i+K]
p_ = numpy.array([x[0] for x in sliding])
y_ = numpy.array([x[1] for x in sliding])
x.append(numpy.mean(p_))
y.append(numpy.mean(y_))
plt.figure(figsize=(6,6))
if max(scatY) > 10:
plt.ylim(0,25)
plt.plot([0,100],[0,100], 'k-')
else:
plt.ylim(0,1)
plt.scatter(scatX, scatY, lw = 0)
plt.plot(x,y,'r', lw=2)
plt.title(title)
plt.xlabel("Prediction")
plt.ylabel(ylab)
#plt.legend(loc="best")
plt.xlim(0,25)
plt.show()