/* Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights embodied in the content of this file are licensed under the BSD (revised) open source license */ #include #include #include using namespace std; #include "loss_functions.h" #include "global_data.h" class squaredloss : public loss_function { public: squaredloss() { } float getLoss(float prediction, float label) { if (prediction <= global.sd->max_label && prediction >= global.sd->min_label) { float example_loss = (prediction - label) * (prediction - label); return example_loss; } else if (prediction < global.sd->min_label) if (label == global.sd->min_label) return 0.; else return (label - global.sd->min_label) * (label - global.sd->min_label) + 2. * (label-global.sd->min_label) * (global.sd->min_label - prediction); else if (label == global.sd->max_label) return 0.; else return (global.sd->max_label - label) * (global.sd->max_label - label) + 2. * (global.sd->max_label - label) * (prediction - global.sd->max_label); } float getUpdate(float prediction, float label,float eta_t, float norm) { if (eta_t < 1e-6){ /* When exp(-eta_t)~= 1 we replace 1-exp(-eta_t) * with its first order Taylor expansion around 0 * to avoid catastrophic cancellation. */ return (label - prediction)*eta_t/norm; } return (label - prediction)*(1-exp(-eta_t))/norm; } float getRevertingWeight(float prediction, float eta_t){ float t = 0.5*(global.sd->min_label+global.sd->max_label); float alternative = (prediction > t) ? global.sd->min_label : global.sd->max_label; return log((alternative-prediction)/(alternative-t))/eta_t; } float getSquareGrad(float prediction, float label) { return (prediction - label) * (prediction - label); } float first_derivative(float prediction, float label) { if (prediction < global.sd->min_label) prediction = global.sd->min_label; else if (prediction > global.sd->max_label) prediction = global.sd->max_label; return 2. * (prediction-label); } float second_derivative(float prediction, float label) { if (prediction <= global.sd->max_label && prediction >= global.sd->min_label) return 2.; else return 0.; } }; class classic_squaredloss : public loss_function { public: classic_squaredloss() { } float getLoss(float prediction, float label) { float example_loss = (prediction - label) * (prediction - label); return example_loss; } float getUpdate(float prediction, float label,float eta_t, float norm) { return eta_t*(label - prediction)/norm; } float getRevertingWeight(float prediction, float eta_t){ float t = 0.5*(global.sd->min_label+global.sd->max_label); float alternative = (prediction > t) ? global.sd->min_label : global.sd->max_label; return (t-prediction)/((alternative-prediction)*eta_t); } float getSquareGrad(float prediction, float label) { return (prediction - label) * (prediction - label); } float first_derivative(float prediction, float label) { return 2. * (prediction-label); } float second_derivative(float prediction, float label) { return 2.; } }; class hingeloss : public loss_function { public: hingeloss() { } float getLoss(float prediction, float label) { float e = 1 - label*prediction; return (e > 0) ? e : 0; } float getUpdate(float prediction, float label,float eta_t, float norm) { if(label*prediction >= label*label) return 0; float err=(label*label-label*prediction)/(label*label); float normal= eta_t; return label * (normal < err ? normal : err)/norm; } float getRevertingWeight(float prediction, float eta_t){ return fabs(prediction)/eta_t; } float getSquareGrad(float prediction, float label) { return first_derivative(prediction,label); } float first_derivative(float prediction, float label) { return (label*prediction >= label*label) ? 0 : -label; } float second_derivative(float prediction, float label) { return 0.; } }; class logloss : public loss_function { public: logloss() { } float getLoss(float prediction, float label) { return log(1 + exp(-label * prediction)); } float getUpdate(float prediction, float label, float eta_t, float norm) { float w,x; float d = exp(label * prediction); if(eta_t < 1e-6){ /* As with squared loss, for small eta_t we replace the update * with its first order Taylor expansion to avoid numerical problems */ return label*eta_t/((1+d)*norm); } x = eta_t + label*prediction + d; w = wexpmx(x); return -(label*w+prediction)/norm; } inline float wexpmx(float x){ /* This piece of code is approximating W(exp(x))-x. * W is the Lambert W function: W(z)*exp(W(z))=z. * The absolute error of this approximation is less than 9e-5. * Faster/better approximations can be substituted here. */ double w = x>=1. ? 0.86*x+0.01 : exp(0.8*x-0.65); //initial guess double r = x>=1. ? x-log(w)-w : 0.2*x+0.65-w; //residual double t = 1.+w; double u = 2.*t*(t+2.*r/3.); //magic return w*(1.+r/t*(u-r)/(u-2.*r))-x; //more magic } float getRevertingWeight(float prediction, float eta_t){ float z = -fabs(prediction); return (1-z-exp(z))/eta_t; } float first_derivative(float prediction, float label) { float v = - label/(1+exp(label * prediction)); return v; } float getSquareGrad(float prediction, float label) { float d = first_derivative(prediction,label); return d*d; } float second_derivative(float prediction, float label) { float p = 1 / (1+exp(label*prediction)); return p*(1-p); } }; class quantileloss : public loss_function { public: quantileloss(double &tau_) : tau(tau_) { } float getLoss(float prediction, float label) { float e = label - prediction; if(e > 0) { return tau * e; } else { return -(1 - tau) * e; } } float getUpdate(float prediction, float label, float eta_t, float norm) { float err = label - prediction; if(err == 0) return 0; float normal = eta_t;//base update size if(err > 0) { normal = tau*normal; return tau*(normal < err ? normal : err) / norm; } else { normal = -(1-tau) * normal; return ( normal < - err ? normal : err) / norm; } } float getRevertingWeight(float prediction, float eta_t){ float v,t; t = 0.5*(global.sd->min_label+global.sd->max_label); if(prediction > t) v = -(1-tau); else v = tau; return (t - prediction)/(eta_t*v); } float first_derivative(float prediction, float label) { float e = label - prediction; if(e == 0) return 0; return e > 0 ? -tau : (1-tau); } float getSquareGrad(float prediction, float label) { float fd = first_derivative(prediction,label); return fd*fd; } float second_derivative(float prediction, float label) { return 0.; } double tau; }; loss_function* getLossFunction(string funcName, double function_parameter) { if(funcName.compare("squared") == 0 || funcName.compare("Huber") == 0) { return new squaredloss(); } else if(funcName.compare("classic") == 0){ return new classic_squaredloss(); } else if(funcName.compare("hinge") == 0) { global.binary_label = true; return new hingeloss(); } else if(funcName.compare("logistic") == 0) { if (set_minmax != noop_mm) { global.sd->min_label = -100; global.sd->max_label = 100; global.binary_label = true; } return new logloss(); } else if(funcName.compare("quantile") == 0 || funcName.compare("pinball") == 0 || funcName.compare("absolute") == 0) { return new quantileloss(function_parameter); } else { cout << "Invalid loss function name: \'" << funcName << "\' Bailing!" << endl; exit(1); } cout << "end getLossFunction" << endl; }