00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "TMVA/OptimizeConfigParameters.h"
00024
00025 #include <limits>
00026 #include <cstdlib>
00027 #include "TMath.h"
00028 #include "TGraph.h"
00029 #include "TH1.h"
00030 #include "TH2.h"
00031 #include "TDirectory.h"
00032
00033 #include "TMVA/IMethod.h"
00034 #include "TMVA/MethodBase.h"
00035 #include "TMVA/GeneticFitter.h"
00036 #include "TMVA/MinuitFitter.h"
00037 #include "TMVA/Interval.h"
00038 #include "TMVA/PDF.h"
00039 #include "TMVA/MsgLogger.h"
00040 #include "TMVA/Tools.h"
00041
00042 ClassImp(TMVA::OptimizeConfigParameters)
00043
00044
00045 TMVA::OptimizeConfigParameters::OptimizeConfigParameters(MethodBase * const method, std::map<TString,TMVA::Interval> tuneParameters, TString fomType, TString optimizationFitType)
00046 : fMethod(method),
00047 fTuneParameters(tuneParameters),
00048 fFOMType(fomType),
00049 fOptimizationFitType(optimizationFitType),
00050 fMvaSig(NULL),
00051 fMvaBkg(NULL),
00052 fMvaSigFineBin(NULL),
00053 fMvaBkgFineBin(NULL)
00054 {
00055
00056 std::string name = "OptimizeConfigParameters_";
00057 name += std::string(GetMethod()->GetName());
00058 fLogger = new MsgLogger(name);
00059 if (fMethod->DoRegression()){
00060 Log() << kFATAL << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimization"
00061 << " --> exit" << Endl;
00062 }
00063 }
00064
00065
00066 TMVA::OptimizeConfigParameters::~OptimizeConfigParameters()
00067 {
00068
00069
00070 GetMethod()->BaseDir()->cd();
00071 Int_t n=Int_t(fFOMvsIter.size());
00072 Float_t *x = new Float_t[n];
00073 Float_t *y = new Float_t[n];
00074 Float_t ymin=+999999999;
00075 Float_t ymax=-999999999;
00076
00077 for (Int_t i=0;i<n;i++){
00078 x[i] = Float_t(i);
00079 y[i] = fFOMvsIter[i];
00080 if (ymin>y[i]) ymin=y[i];
00081 if (ymax<y[i]) ymax=y[i];
00082 }
00083
00084 TH2D *h=new TH2D(TString(GetMethod()->GetName())+"_FOMvsIterFrame","",2,0,n,2,ymin*0.95,ymax*1.05);
00085 h->SetXTitle("#iteration "+fOptimizationFitType);
00086 h->SetYTitle(fFOMType);
00087 TGraph *gFOMvsIter = new TGraph(n,x,y);
00088 gFOMvsIter->SetName((TString(GetMethod()->GetName())+"_FOMvsIter").Data());
00089 gFOMvsIter->Write();
00090 h->Write();
00091
00092
00093 }
00094
00095 std::map<TString,Double_t> TMVA::OptimizeConfigParameters::optimize()
00096 {
00097 if (fOptimizationFitType == "Scan" ) this->optimizeScan();
00098 else if (fOptimizationFitType == "FitGA" || fOptimizationFitType == "Minuit" ) this->optimizeFit();
00099 else {
00100 Log() << kFATAL << "You have chosen as optimization type " << fOptimizationFitType
00101 << " that is not (yet) coded --> exit()" << Endl;
00102 }
00103
00104 Log() << kINFO << "For " << GetMethod()->GetName() << " the optimized Parameters are: " << Endl;
00105 std::map<TString,Double_t>::iterator it;
00106 for(it=fTunedParameters.begin(); it!= fTunedParameters.end(); it++){
00107 Log() << kINFO << it->first << " = " << it->second << Endl;
00108 }
00109 return fTunedParameters;
00110
00111 }
00112 void TMVA::OptimizeConfigParameters::optimizeScan()
00113 {
00114
00115
00116
00117
00118
00119 Double_t bestFOM=-1000000, currentFOM;
00120
00121 std::map<TString,Double_t> currentParameters;
00122 std::map<TString,TMVA::Interval>::iterator it;
00123
00124
00125
00126 currentParameters.clear();
00127 fTunedParameters.clear();
00128 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
00129 currentParameters.insert(std::pair<TString,Double_t>(it->first,it->second.GetMin()));
00130 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,it->second.GetMin()));
00131 }
00132
00133 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
00134 for (Int_t ibin=0; ibin<it->second.GetNbins(); ibin++){
00135 currentParameters[it->first] = it->second.GetElement(ibin);
00136 GetMethod()->Reset();
00137 GetMethod()->SetTuneParameters(currentParameters);
00138
00139 GetMethod()->BaseDir()->cd();
00140 GetMethod()->GetTransformationHandler().CalcTransformations(
00141 GetMethod()->Data()->GetEventCollection());
00142 GetMethod()->Train();
00143 currentFOM = GetFOM();
00144
00145 if (currentFOM > bestFOM) {
00146 bestFOM = currentFOM;
00147 for (std::map<TString,Double_t>::iterator iter=currentParameters.begin();
00148 iter != currentParameters.end(); iter++){
00149 fTunedParameters[iter->first]=iter->second;
00150 }
00151 }
00152 }
00153 }
00154
00155 GetMethod()->Reset();
00156 GetMethod()->SetTuneParameters(fTunedParameters);
00157 }
00158
00159 void TMVA::OptimizeConfigParameters::optimizeFit()
00160 {
00161
00162 std::vector<TMVA::Interval*> ranges;
00163 std::map<TString, TMVA::Interval>::iterator it;
00164 std::vector<Double_t> pars;
00165
00166 for (it=fTuneParameters.begin(); it != fTuneParameters.end(); it++){
00167 ranges.push_back(new TMVA::Interval(it->second));
00168 pars.push_back( (it->second).GetMean() );
00169
00170
00171 }
00172
00173
00174
00175 FitterBase* fitter = NULL;
00176
00177 if ( fOptimizationFitType == "Minuit" ) {
00178 TString opt="";
00179 fitter = new MinuitFitter( *this,
00180 "FitterMinuit_BDTOptimize",
00181 ranges, opt );
00182 }else if ( fOptimizationFitType == "FitGA" ) {
00183 TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
00184 fitter = new GeneticFitter( *this,
00185 "FitterGA_BDTOptimize",
00186 ranges, opt );
00187 } else {
00188 Log() << kWARNING << " you did not specify a valid OptimizationFitType "
00189 << " will use the default (FitGA) " << Endl;
00190 TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
00191 fitter = new GeneticFitter( *this,
00192 "FitterGA_BDTOptimize",
00193 ranges, opt );
00194 }
00195
00196 fitter->CheckForUnusedOptions();
00197
00198
00199 fitter->Run(pars);
00200
00201
00202 for (UInt_t ipar=0; ipar<ranges.size(); ipar++) delete ranges[ipar];
00203
00204
00205 GetMethod()->Reset();
00206
00207 fTunedParameters.clear();
00208 Int_t jcount=0;
00209 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
00210 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,pars[jcount++]));
00211 }
00212
00213 GetMethod()->SetTuneParameters(fTunedParameters);
00214
00215 }
00216
00217
00218 Double_t TMVA::OptimizeConfigParameters::EstimatorFunction( std::vector<Double_t> & pars)
00219 {
00220
00221
00222 std::map< std::vector<Double_t> , Double_t>::const_iterator iter;
00223 iter = fAlreadyTrainedParCombination.find(pars);
00224
00225 if (iter != fAlreadyTrainedParCombination.end()) {
00226
00227
00228
00229 return iter->second;
00230 }else{
00231 std::map<TString,Double_t> currentParameters;
00232 Int_t icount =0;
00233
00234 std::map<TString, TMVA::Interval>::iterator it;
00235 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
00236 currentParameters[it->first] = pars[icount++];
00237 }
00238 GetMethod()->Reset();
00239 GetMethod()->SetTuneParameters(currentParameters);
00240 GetMethod()->BaseDir()->cd();
00241
00242 GetMethod()->GetTransformationHandler().CalcTransformations(
00243 GetMethod()->Data()->GetEventCollection());
00244
00245 GetMethod()->Train();
00246
00247 Double_t currentFOM = GetFOM();
00248
00249 fAlreadyTrainedParCombination.insert(std::make_pair(pars,-currentFOM));
00250 return -currentFOM;
00251 }
00252 }
00253
00254
00255 Double_t TMVA::OptimizeConfigParameters::GetFOM()
00256 {
00257
00258
00259
00260 Double_t fom=0;
00261 if (fMethod->DoRegression()){
00262 std::cout << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimisation"
00263 << " --> exit" << std::endl;
00264 std::exit(1);
00265 }else{
00266 if (fFOMType == "Separation") fom = GetSeparation();
00267 else if (fFOMType == "ROCIntegral") fom = GetROCIntegral();
00268 else if (fFOMType == "SigEffAt01") fom = GetSigEffAt(0.1);
00269 else if (fFOMType == "SigEffAt001") fom = GetSigEffAt(0.01);
00270 else {
00271 Log()<<kFATAL << " ERROR, you've specified as Figure of Merit in the \n"
00272 << " parameter optimisation " << fFOMType << " which has not\n"
00273 << " been implemented yet!! ---> exit " << Endl;
00274 }
00275 }
00276 fFOMvsIter.push_back(fom);
00277 return fom;
00278 }
00279
00280
00281 void TMVA::OptimizeConfigParameters::GetMVADists()
00282 {
00283
00284
00285 if (fMvaSig) fMvaSig->Delete();
00286 if (fMvaBkg) fMvaBkg->Delete();
00287 if (fMvaSigFineBin) fMvaSigFineBin->Delete();
00288 if (fMvaBkgFineBin) fMvaBkgFineBin->Delete();
00289
00290
00291
00292
00293
00294
00295
00296 fMvaSig = new TH1D("fMvaSig","",100,-1.5,1.5);
00297 fMvaBkg = new TH1D("fMvaBkg","",100,-1.5,1.5);
00298 fMvaSigFineBin = new TH1D("fMvaSigFineBin","",100000,-1.5,1.5);
00299 fMvaBkgFineBin = new TH1D("fMvaBkgFineBin","",100000,-1.5,1.5);
00300
00301 const std::vector<Event*> events=fMethod->Data()->GetEventCollection(Types::kTesting);
00302
00303 UInt_t signalClassNr = fMethod->DataInfo().GetClassInfo("Signal")->GetNumber();
00304
00305
00306
00307 for (UInt_t iev=0; iev < events.size() ; iev++){
00308
00309
00310
00311 if (events[iev]->GetClass() == signalClassNr) {
00312 fMvaSig->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
00313 fMvaSigFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
00314 } else {
00315 fMvaBkg->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
00316 fMvaBkgFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
00317 }
00318 }
00319 }
00320
00321 Double_t TMVA::OptimizeConfigParameters::GetSeparation()
00322 {
00323
00324
00325 GetMVADists();
00326 if (1){
00327 PDF *splS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 );
00328 PDF *splB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 );
00329 return gTools().GetSeparation(*splS,*splB);
00330 }else{
00331 std::cout << "Separation caclulcaton via histograms (not PDFs) seems to give still strange results!! Don't do that, check!!"<<std::endl;
00332 return gTools().GetSeparation(fMvaSigFineBin,fMvaBkgFineBin);
00333 }
00334 }
00335
00336
00337
00338 Double_t TMVA::OptimizeConfigParameters::GetROCIntegral()
00339 {
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350 GetMVADists();
00351
00352 Double_t integral = 0;
00353 if (0){
00354 PDF *pdfS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 );
00355 PDF *pdfB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 );
00356
00357 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
00358 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
00359
00360 UInt_t nsteps = 1000;
00361 Double_t step = (xmax-xmin)/Double_t(nsteps);
00362 Double_t cut = xmin;
00363 for (UInt_t i=0; i<nsteps; i++){
00364 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
00365 cut+=step;
00366 }
00367 integral*=step;
00368 }else{
00369
00370 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
00371 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
00372 std::cout << " Error in OptimizeConfigParameters GetROCIntegral, unequal histograms for sig and bkg.." << std::endl;
00373 std::exit(1);
00374 }else{
00375
00376 Double_t *cumulator = fMvaBkgFineBin->GetIntegral();
00377 Int_t nbins = fMvaSigFineBin->GetNbinsX();
00378
00379
00380
00381 Double_t sigIntegral = 0;
00382 for (Int_t ibin=1; ibin<=nbins; ibin++){
00383 sigIntegral += fMvaSigFineBin->GetBinContent(ibin) * fMvaSigFineBin->GetBinWidth(ibin);
00384 }
00385
00386
00387 for (Int_t ibin=1; ibin <= nbins; ibin++){
00388 integral += (cumulator[ibin]) * fMvaSigFineBin->GetBinContent(ibin)/sigIntegral * fMvaSigFineBin->GetBinWidth(ibin) ;
00389 }
00390 }
00391 }
00392
00393 return integral;
00394 }
00395
00396
00397
00398 Double_t TMVA::OptimizeConfigParameters::GetSigEffAt(Double_t bkgEff)
00399 {
00400
00401
00402 GetMVADists();
00403 Double_t sigEff=0;
00404
00405
00406 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
00407 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
00408 std::cout << " Error in OptimizeConfigParameters GetSigEffAt, unequal histograms for sig and bkg.." << std::endl;
00409 std::exit(1);
00410 }else{
00411 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
00412 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
00413
00414 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
00415 Int_t ibin=0;
00416
00417
00418
00419
00420
00421
00422
00423 while (bkgCumulator[nbins-ibin] > (1-bkgEff)) {
00424 sigEff = sigCumulator[nbins]-sigCumulator[nbins-ibin];
00425 ibin++;
00426 }
00427 }
00428 return sigEff;
00429 }
00430
00431