RuleFitAPI.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: RuleFitAPI.cxx 35727 2010-09-24 21:41:20Z stelzer $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss 
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : RuleFitAPI                                                            *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Implementation (see header file for description)                          *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch>  - Iowa State U., USA     *
00015  *                                                                                *
00016  * Copyright (c) 2005:                                                            *
00017  *      CERN, Switzerland                                                         * 
00018  *      Iowa State U.                                                             *
00019  *      MPI-KP Heidelberg, Germany                                                * 
00020  *                                                                                *
00021  * Redistribution and use in source and binary forms, with or without             *
00022  * modification, are permitted according to the terms listed in LICENSE           *
00023  * (http://tmva.sourceforge.net/LICENSE)                                          *
00024  **********************************************************************************/
00025 
00026 //_______________________________________________________________________
00027 //
00028 // J Friedman's RuleFit method
00029 //_______________________________________________________________________
00030 
00031 #include <algorithm>
00032 
00033 #include "TROOT.h"
00034 #include "TSystem.h"
00035 #include "TMath.h"
00036 
00037 #include "TMVA/MethodRuleFit.h"
00038 #include "TMVA/RuleFitAPI.h"
00039 #include "TMVA/RuleFit.h"
00040 #include "TMVA/Tools.h"
00041 #include "TMVA/Timer.h"
00042 
00043 ClassImp(TMVA::RuleFitAPI)
00044 
00045 TMVA::RuleFitAPI::RuleFitAPI( const MethodRuleFit *rfbase,
00046                               RuleFit *rulefit,
00047                               EMsgType minType = kINFO ) :
00048    fMethodRuleFit(rfbase),
00049    fRuleFit(rulefit),
00050    fRFProgram(kRfTrain),
00051    fLogger("RuleFitAPI",minType)
00052 {
00053    // standard constructor
00054    if (rfbase) {
00055       SetRFWorkDir(rfbase->GetRFWorkDir());
00056    } else {
00057       SetRFWorkDir("./rulefit");
00058    }
00059    InitRuleFit();
00060 }
00061 
00062 
00063 //_______________________________________________________________________
00064 TMVA::RuleFitAPI::~RuleFitAPI()
00065 {
00066    // destructor
00067 }
00068 
00069 //_______________________________________________________________________
00070 void TMVA::RuleFitAPI::WelcomeMessage()
00071 {
00072    // welcome message
00073    fLogger << kINFO
00074            << "\n"
00075            << "---------------------------------------------------------------------------\n"
00076            << "-   You are running the interface to Jerome Friedmans RuleFit(tm) code.   -\n"
00077            << "-   For a full manual see the following web page:                         -\n"
00078            << "-                                                                         -\n"
00079            << "-        http://www-stat.stanford.edu/~jhf/R-RuleFit.html                 -\n"
00080            << "-                                                                         -\n"
00081            << "---------------------------------------------------------------------------"
00082            << Endl;
00083 }
00084 //_______________________________________________________________________
00085 void TMVA::RuleFitAPI::HowtoSetupRF()
00086 {
00087    // howto message
00088    fLogger << kINFO
00089            << "\n"
00090            << "------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
00091            << "\n"
00092            << "1. Create a rulefit directory in your current work directory:\n"
00093            << "       mkdir " << fRFWorkDir << "\n\n"
00094            << "   the directory may be set using the option RuleFitDir\n"
00095            << "\n"
00096            << "2. Copy (or make a link) the file rf_go.exe into this directory\n"
00097            << "\n"
00098            << "The file can be obtained from Jerome Friedmans homepage (linux):\n"
00099            << "   wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
00100            << "\n"
00101            << "Don't forget to do:\n"
00102            << "   chmod +x rf_go.exe\n"
00103            << "\n"
00104            << "For Windows download:\n"
00105            << "   http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
00106            << "\n"
00107            << "NOTE: other platforms are not supported (see Friedmans homepage)\n"
00108            << "\n"
00109            << "---------------------------------------------------------------------------\n"
00110            << Endl;
00111 }
00112 //_______________________________________________________________________
00113 void TMVA::RuleFitAPI::InitRuleFit()
00114 {
00115    // default initialisation
00116    //   SetRFWorkDir("./rulefit");
00117    CheckRFWorkDir();
00118    FillIntParmsDef();
00119    FillRealParmsDef();
00120 }
00121 
00122 //_______________________________________________________________________
00123 void TMVA::RuleFitAPI::ImportSetup()
00124 {
00125    // import setup from MethodRuleFit
00126    fRFIntParms.p            = fMethodRuleFit->DataInfo().GetNVariables();
00127    fRFIntParms.max_rules    = fMethodRuleFit->GetRFNrules();
00128    fRFIntParms.tree_size    = fMethodRuleFit->GetRFNendnodes();
00129    fRFIntParms.path_steps   = fMethodRuleFit->GetGDNPathSteps();
00130    //
00131    fRFRealParms.path_inc    = fMethodRuleFit->GetGDPathStep();
00132    fRFRealParms.samp_fract  = fMethodRuleFit->GetTreeEveFrac();
00133    fRFRealParms.trim_qntl   = fMethodRuleFit->GetLinQuantile();
00134    fRFRealParms.conv_fac    = fMethodRuleFit->GetGDErrScale();
00135    //
00136    if      (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
00137       fRFIntParms.lmode = kRfLinear;
00138    else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
00139       fRFIntParms.lmode = kRfRules;
00140    else
00141       fRFIntParms.lmode = kRfBoth;
00142 }
00143 
00144 //_______________________________________________________________________
00145 void TMVA::RuleFitAPI::SetRFWorkDir(const char * wdir)
00146 {
00147    // set the directory containing rf_go.exe.
00148    fRFWorkDir = wdir;
00149 }
00150 
00151 //_______________________________________________________________________
00152 void TMVA::RuleFitAPI::CheckRFWorkDir()
00153 {
00154    // check if the rulefit work dir is properly setup.
00155    // it aborts (kFATAL) if not.
00156    //
00157    // Check existance of directory
00158    TString oldDir = gSystem->pwd();
00159    if (!gSystem->cd(fRFWorkDir)) {
00160       fLogger << kWARNING << "Must create a rulefit directory named : " << fRFWorkDir << Endl;
00161       HowtoSetupRF();
00162       fLogger << kFATAL << "Setup failed - aborting!" << Endl;
00163    }
00164    // check rf_go.exe
00165    FILE *f = fopen("rf_go.exe","r");
00166    if (f==0) {
00167       fLogger << kWARNING << "No rf_go.exe file in directory : " << fRFWorkDir << Endl;
00168       HowtoSetupRF();
00169       fLogger << kFATAL << "Setup failed - aborting!" << Endl;
00170    }
00171    fclose(f);
00172    gSystem->cd(oldDir.Data());
00173 }
00174 
00175 //_______________________________________________________________________
00176 void TMVA::RuleFitAPI::SetTrainParms()
00177 {
00178    // set the training parameters
00179    ImportSetup();
00180    //
00181    Int_t    n    = fMethodRuleFit->Data()->GetNTrainingEvents();
00182    //   Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
00183    fRFIntParms.n = n; // number of data points in tree
00184    fRFProgram    = kRfTrain;
00185 }
00186 
00187 //_______________________________________________________________________
00188 void TMVA::RuleFitAPI::SetTestParms()
00189 {
00190    // set the test params
00191    ImportSetup();
00192    Int_t    n    = fMethodRuleFit->Data()->GetNTestEvents();
00193    //   Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
00194    fRFIntParms.n = n; // number of data points in tree
00195    fRFProgram    = kRfPredict;
00196 }
00197 
00198 //_______________________________________________________________________
00199 void TMVA::RuleFitAPI::FillRealParmsDef()
00200 {
00201    // set default real params
00202    fRFRealParms.xmiss       = 9.0e30;
00203    fRFRealParms.trim_qntl   = 0.025;
00204    fRFRealParms.huber       = 0.8;
00205    fRFRealParms.inter_supp  = 3.0;
00206    fRFRealParms.memory_par  = 0.01;
00207    fRFRealParms.samp_fract  = 0.5; // calculated later
00208    fRFRealParms.path_inc    = 0.01;
00209    fRFRealParms.conv_fac    = 1.1;
00210 }
00211 
00212 //_______________________________________________________________________
00213 void TMVA::RuleFitAPI::FillIntParmsDef()
00214 {
00215    // set default int params
00216    fRFIntParms.mode           = (int)kRfClass;
00217    fRFIntParms.lmode          = (int)kRfBoth;
00218    //   fRFIntParms.n;
00219    //   fRFIntParms.p;
00220    fRFIntParms.max_rules      = 2000;
00221    fRFIntParms.tree_size      = 4;
00222    fRFIntParms.path_speed     = 2;
00223    fRFIntParms.path_xval      = 3;
00224    fRFIntParms.path_steps     = 50000;
00225    fRFIntParms.path_testfreq  = 100;
00226    fRFIntParms.tree_store     = 10000000;
00227    fRFIntParms.cat_store      = 1000000;
00228 
00229 }
00230 
00231 //_______________________________________________________________________
00232 Bool_t TMVA::RuleFitAPI::WriteAll()
00233 {
00234    // write all files read by rf_go.exe
00235    WriteIntParms();
00236    WriteRealParms();
00237    WriteLx();
00238    WriteProgram();
00239    WriteVarNames();
00240    if (fRFProgram==kRfTrain)   WriteTrain();
00241    if (fRFProgram==kRfPredict) WriteTest();
00242    if (fRFProgram==kRfVarimp)  WriteRealVarImp();
00243    return kTRUE;
00244 }
00245 
00246 //_______________________________________________________________________
00247 Bool_t TMVA::RuleFitAPI::WriteIntParms()
00248 {
00249    // write int params file
00250    std::ofstream f;
00251    if (!OpenRFile("intparms",f)) return kFALSE;
00252    WriteInt(f,&fRFIntParms.mode,sizeof(fRFIntParms)/sizeof(Int_t));
00253    return kTRUE;
00254 }
00255 
00256 //_______________________________________________________________________
00257 Bool_t TMVA::RuleFitAPI::WriteRealParms()
00258 {
00259    // write int params file
00260    std::ofstream f;
00261    if (!OpenRFile("realparms",f)) return kFALSE;
00262    WriteFloat(f,&fRFRealParms.xmiss,sizeof(fRFRealParms)/sizeof(Float_t));
00263    return kTRUE;
00264 }
00265 
00266 //_______________________________________________________________________
00267 Bool_t TMVA::RuleFitAPI::WriteLx()
00268 {
00269    // Save input variable mask
00270    //
00271    // If the lx vector size is not the same as inputVars,
00272    // resize it and fill it with 1
00273    // NOTE: Always set all to 1
00274    //  if (fRFLx.size() != m_inputVars->size()) {
00275    fRFLx.clear();
00276    fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
00277    //  }
00278    std::ofstream f;
00279    if (!OpenRFile("lx",f)) return kFALSE;
00280    WriteInt(f,&fRFLx[0],fRFLx.size());
00281    return kTRUE;
00282 }
00283 
00284 //_______________________________________________________________________
00285 Bool_t TMVA::RuleFitAPI::WriteProgram()
00286 {
00287    // write command to rf_go.exe
00288    std::ofstream f;
00289    if (!OpenRFile("program",f)) return kFALSE;
00290    TString program;
00291    switch (fRFProgram) {
00292    case kRfTrain:
00293       program = "rulefit";
00294       break;
00295    case kRfPredict:
00296       program = "rulefit_pred";
00297       break;
00298       // calculate variable importance
00299    case kRfVarimp:
00300       program = "varimp";
00301       break;
00302    default:
00303       fRFProgram = kRfTrain;
00304       program="rulefit";
00305       break;
00306    }
00307    f << program;
00308    return kTRUE;
00309 }
00310 
00311 //_______________________________________________________________________
00312 Bool_t TMVA::RuleFitAPI::WriteRealVarImp()
00313 {
00314    // write the minimum importance to be considered
00315    std::ofstream f;
00316    if (!OpenRFile("realvarimp",f)) return kFALSE;
00317    Float_t rvp[2];
00318    rvp[0] = 0.0; // Mode: see varimp() in rulefit.r
00319    rvp[1] = 0.0; // Minimum importance considered (1 is max)
00320    WriteFloat(f,&rvp[0],2);
00321    return kTRUE;
00322 }
00323 
00324 //_______________________________________________________________________
00325 Bool_t TMVA::RuleFitAPI::WriteRfOut()
00326 {
00327    // written by rf_go.exe; write rulefit output (rfout)
00328    fLogger << kWARNING << "WriteRfOut is not yet implemented" << Endl;
00329    return kTRUE;
00330 }
00331 
00332 //_______________________________________________________________________
00333 Bool_t TMVA::RuleFitAPI::WriteRfStatus()
00334 {
00335    // written by rf_go.exe; write rulefit status
00336    fLogger << kWARNING << "WriteRfStatus is not yet implemented" << Endl;
00337    return kTRUE;
00338 }
00339 
00340 //_______________________________________________________________________
00341 Bool_t TMVA::RuleFitAPI::WriteRuleFitMod()
00342 {
00343    // written by rf_go.exe (NOTE:Format unknown!)
00344    fLogger << kWARNING << "WriteRuleFitMod is not yet implemented" << Endl;
00345    return kTRUE;
00346 }
00347 
00348 //_______________________________________________________________________
00349 Bool_t TMVA::RuleFitAPI::WriteRuleFitSum()
00350 {
00351    // written by rf_go.exe (NOTE: format unknown!)
00352    fLogger << kWARNING << "WriteRuleFitSum is not yet implemented" << Endl;
00353    return kTRUE;
00354 }
00355 
00356 //_______________________________________________________________________
00357 Bool_t TMVA::RuleFitAPI::WriteTrain()
00358 {
00359    // write training data, columnwise
00360    std::ofstream fx;
00361    std::ofstream fy;
00362    std::ofstream fw;
00363    //
00364    if (!OpenRFile("train.x",fx)) return kFALSE;
00365    if (!OpenRFile("train.y",fy)) return kFALSE;
00366    if (!OpenRFile("train.w",fw)) return kFALSE;
00367    //
00368    Float_t x,y,w;
00369    //
00370    // The loop order cannot be changed.
00371    // The data is stored <var1(eve1), var1(eve2), ...var1(eveN), var2(eve1),....
00372    //
00373    for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
00374       for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
00375          const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
00376          x = ev->GetValue(ivar);
00377          WriteFloat(fx,&x,1);
00378          if (ivar==0) {
00379             w = ev->GetWeight();
00380             y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
00381             WriteFloat(fy,&y,1);
00382             WriteFloat(fw,&w,1);
00383          }
00384       }
00385    }
00386    fLogger << kINFO << "Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << Endl;
00387    return kTRUE;
00388 }
00389 
00390 //_______________________________________________________________________
00391 Bool_t TMVA::RuleFitAPI::WriteTest()
00392 {
00393    // Write test data
00394 
00395    fMethodRuleFit->Data()->SetCurrentType(Types::kTesting);
00396 
00397    std::ofstream f;
00398    //
00399    if (!OpenRFile("test.x",f)) return kFALSE;
00400    //
00401    Float_t vf;
00402    Float_t neve;
00403    //
00404    neve = static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
00405    WriteFloat(f,&neve,1);
00406    // Test data is saved as:
00407    // 0      : <N> num of events, type float, 4 bytes
00408    // 1-N    : First variable for all events
00409    // N+1-2N : Second variable...
00410    // ...
00411    for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
00412       for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
00413          vf =   fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
00414          WriteFloat(f,&vf,1);
00415       }
00416    }
00417    fLogger << kINFO << "Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << Endl;
00418    //
00419    return kTRUE;
00420 }
00421 
00422 //_______________________________________________________________________
00423 Bool_t TMVA::RuleFitAPI::WriteVarNames()
00424 {
00425    // write variable names, ascii
00426    std::ofstream f;
00427    if (!OpenRFile("varnames",f)) return kFALSE;
00428    for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
00429       f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << '\n';
00430    }
00431    return kTRUE;
00432 }
00433 
00434 //_______________________________________________________________________
00435 Bool_t TMVA::RuleFitAPI::WriteVarImp()
00436 
00437 {
00438    // written by rf_go.exe
00439    fLogger << kWARNING << "WriteVarImp is not yet implemented" << Endl;
00440    return kTRUE;
00441 }
00442 
00443 //_______________________________________________________________________
00444 Bool_t TMVA::RuleFitAPI::WriteYhat()
00445 {
00446    // written by rf_go.exe
00447    fLogger << kWARNING << "WriteYhat is not yet implemented" << Endl;
00448    return kTRUE;
00449 }
00450 
00451 //_______________________________________________________________________
00452 Bool_t TMVA::RuleFitAPI::ReadYhat()
00453 {
00454    // read the score
00455    fRFYhat.clear();
00456    //
00457    std::ifstream f;
00458    if (!OpenRFile("yhat",f)) return kFALSE;
00459    Int_t   neve;
00460    Float_t xval;
00461    ReadFloat(f,&xval,1);
00462    neve = static_cast<Int_t>(xval);
00463    if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
00464       fLogger << kWARNING << "Inconsistent size of yhat file and test tree!" << Endl;
00465       fLogger << kWARNING << "neve = " << neve << " , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << Endl;
00466       return kFALSE;
00467    }
00468    for (Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
00469       ReadFloat(f,&xval,1);
00470       fRFYhat.push_back(xval);
00471    }
00472    return kTRUE;
00473 }
00474 
00475 //_______________________________________________________________________
00476 Bool_t TMVA::RuleFitAPI::ReadVarImp()
00477 {
00478    // read variable importance
00479    fRFVarImp.clear();
00480    //
00481    std::ifstream f;
00482    if (!OpenRFile("varimp",f)) return kFALSE;
00483    UInt_t   nvars;
00484    Float_t xval;
00485    Float_t xmax=1.0;
00486    nvars=fMethodRuleFit->DataInfo().GetNVariables();
00487    //
00488    // First read all importances
00489    //
00490    for (UInt_t ivar=0; ivar<nvars; ivar++) {
00491       ReadFloat(f,&xval,1);
00492       if (ivar==0) {
00493          xmax=xval;
00494       } else {
00495          if (xval>xmax) xmax=xval;
00496       }
00497       fRFVarImp.push_back(xval);
00498    }
00499    //
00500    // Read the indices.
00501    // They are saved as float (!) by rf_go.exe.
00502    //
00503    for (UInt_t ivar=0; ivar<nvars; ivar++) {
00504       fRFVarImp[ivar] = fRFVarImp[ivar]/xmax;
00505       ReadFloat(f,&xval,1);
00506       fRFVarImpInd.push_back(Int_t(xval)-1);
00507    }
00508    return kTRUE;
00509 }
00510 
00511 //_______________________________________________________________________
00512 Bool_t TMVA::RuleFitAPI::ReadModelSum()
00513 {
00514    // read model from rulefit.sum
00515    fRFVarImp.clear();
00516    //
00517    fLogger << kVERBOSE << "Reading RuleFit summary file" << Endl;
00518    std::ifstream f;
00519    if (!OpenRFile("rulefit.sum",f)) return kFALSE;
00520    Int_t    lines=0;
00521    Int_t    nrules=0;
00522    Int_t    nvars=0;
00523    Int_t    nvarsOpt=0;
00524    Int_t    dumI;
00525    Float_t  dumF;
00526    Float_t  offset;
00527    Double_t impref=-1.0;
00528    Double_t imp;
00529 
00530    fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); // value used by Friedmans RuleFit
00531    //
00532    //--------------------------------------------
00533    //       first read rulefit.sum header
00534    //--------------------------------------------
00535    // line      type    val     descr
00536    //   0       <int>   86      N(rules)x2
00537    //   1       <int>   155     ???
00538    //   2       <int>   1       ???
00539    //   3       <int>   1916    ???
00540    //   4       <int>   2       N(vars) ?
00541    //   5       <int>   2       N(vars) ?
00542    //   6       <float> 9e+30   xmiss
00543    //   7       <float> 1.1e-1  a0 (model offset)
00544    //--------------------------------------------
00545    //
00546    // NOTE: a model without any rules, will look like
00547    // for the first four lines:
00548    //
00549    //   0        1
00550    //   1        1
00551    //   2        1
00552    //   3        0
00553    //
00554    // There will later be one block of dummy data for one rule.
00555    // In order to catch this situation, some special checks are made below.
00556    //
00557    Bool_t norules;
00558    lines += ReadInt(f,&nrules);
00559    norules = (nrules==1);
00560    lines += ReadInt(f,&dumI);
00561    norules = norules && (dumI==1);
00562    lines += ReadInt(f,&dumI);
00563    norules = norules && (dumI==1);
00564    lines += ReadInt(f,&dumI);
00565    norules = norules && (dumI==0);
00566    if (nrules==0) norules=kTRUE; // this ugly construction is needed:(
00567    if (norules) nrules = 0;
00568    //
00569    lines += ReadInt(f,&nvars);
00570    lines += ReadInt(f,&nvarsOpt);
00571    lines += ReadFloat(f,&dumF);
00572    lines += ReadFloat(f,&offset);
00573    fLogger << kDEBUG << "N(rules) = " << nrules   << Endl;
00574    fLogger << kDEBUG << "N(vars)  = " << nvars    << Endl;
00575    fLogger << kDEBUG << "N(varsO) = " << nvarsOpt << Endl;
00576    fLogger << kDEBUG << "xmiss    = " << dumF     << Endl;
00577    fLogger << kDEBUG << "offset   = " << offset   << Endl;
00578    if (nvars!=nvarsOpt) {
00579       fLogger << kWARNING << "Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << Endl;
00580    }
00581    std::vector<Double_t> rfSupp;
00582    std::vector<Double_t> rfCoef;
00583    std::vector<Int_t>    rfNcut;
00584    std::vector<Rule *>   rfRules;
00585    if (norules) {
00586       // if no rules, read 8 blocks of data
00587       // this corresponds to one dummy rule
00588       for (Int_t t=0; t<8; t++) {
00589          lines += ReadFloat(f,&dumF);
00590       }
00591    }
00592    //
00593    //--------------------------------------------
00594    //       read first part of rule info
00595    //--------------------------------------------
00596    // 
00597    //   8       <int>   10      ???
00598    //   9       <float> 0.185   support
00599    //   10      <float> 0.051   coefficient
00600    //   11      <float> 2       num of cuts in rule
00601    //   12      <float> 1       ??? not used by this interface
00602    //
00603    for (Int_t r=0; r<nrules; r++) {
00604       lines += ReadFloat(f,&dumF);
00605       lines += ReadFloat(f,&dumF);
00606       rfSupp.push_back(dumF);
00607       lines += ReadFloat(f,&dumF);
00608       rfCoef.push_back(dumF);
00609       lines += ReadFloat(f,&dumF);
00610       rfNcut.push_back(static_cast<int>(dumF+0.5));
00611       lines += ReadFloat(f,&dumF);
00612       //
00613    }
00614    //--------------------------------------------
00615    //       read second part of rule info
00616    //--------------------------------------------
00617    //
00618    // Per range (cut):
00619    //   0    <float> 1       varind
00620    //   1    <float> -1.0    low
00621    //   2    <float>  1.56   high
00622    //
00623 
00624    for (Int_t r=0; r<nrules; r++) {
00625       Int_t    varind;
00626       Double_t xmin;
00627       Double_t xmax;
00628       Rule *rule = new Rule(fRuleFit->GetRuleEnsemblePtr());
00629       rfRules.push_back( rule );
00630       RuleCut *rfcut = new RuleCut();
00631       rfcut->SetNvars(rfNcut[r]);
00632       rule->SetRuleCut( rfcut );
00633       // the below are set to default values since no info is
00634       // available in rulefit.sum
00635       rule->SetNorm(1.0);
00636       rule->SetSupport(0);
00637       rule->SetSSB(0.0);
00638       rule->SetSSBNeve(0.0);
00639       rule->SetImportanceRef(1.0);
00640       rule->SetSSB(0.0);
00641       rule->SetSSBNeve(0.0);
00642       // set support etc
00643       rule->SetSupport(rfSupp[r]);
00644       rule->SetCoefficient(rfCoef[r]);
00645       rule->CalcImportance();
00646       imp = rule->GetImportance();
00647       if (imp>impref) impref = imp; // find max importance
00648       //
00649       fLogger << kDEBUG << "Rule #" << r << " : " << nvars << Endl;
00650       fLogger << kDEBUG << "  support  = " << rfSupp[r] << Endl;
00651       fLogger << kDEBUG << "  sigma    = " << rule->GetSigma() << Endl;
00652       fLogger << kDEBUG << "  coeff    = " << rfCoef[r] << Endl;
00653       fLogger << kDEBUG << "  N(cut)   = " << rfNcut[r] << Endl;
00654 
00655       for (Int_t c=0; c<rfNcut[r]; c++) {
00656          lines += ReadFloat(f,&dumF);
00657          varind = static_cast<Int_t>(dumF+0.5)-1;
00658          lines += ReadFloat(f,&dumF);
00659          xmin   = static_cast<Double_t>(dumF);
00660          lines += ReadFloat(f,&dumF);
00661          xmax   = static_cast<Double_t>(dumF);
00662          // create Rule HERE!
00663          rfcut->SetSelector(c,varind);
00664          rfcut->SetCutMin(c,xmin);
00665          rfcut->SetCutMax(c,xmax);
00666          // the following is not nice - this is however defined
00667          // by the rulefit.sum format.
00668          rfcut->SetCutDoMin(c,(xmin<-8.99e35 ? kFALSE:kTRUE));
00669          rfcut->SetCutDoMax(c,(xmax> 8.99e35 ? kFALSE:kTRUE));
00670          //
00671       }
00672    }
00673    fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
00674    fRuleFit->GetRuleEnsemblePtr()->SetOffset( offset );
00675    //--------------------------------------------
00676    //       read second part of rule info
00677    //--------------------------------------------
00678    //
00679    // Per linear term:
00680    // 73      1               var index
00681    // 74      -1.99594        min
00682    // 75      1.99403         max
00683    // 76      -0.000741858    ??? average ???
00684    // 77      0.970935        std
00685    // 78      0               coeff
00686    //
00687    std::vector<Int_t>    varind;
00688    std::vector<Double_t> xmin;
00689    std::vector<Double_t> xmax;
00690    std::vector<Double_t> average;
00691    std::vector<Double_t> stdev;
00692    std::vector<Double_t> norm;
00693    std::vector<Double_t> coeff;
00694    //
00695    for (Int_t c=0; c<nvars; c++) {
00696       lines += ReadFloat(f,&dumF);
00697       varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
00698       lines += ReadFloat(f,&dumF);
00699       xmin.push_back(static_cast<Double_t>(dumF));
00700       lines += ReadFloat(f,&dumF);
00701       xmax.push_back(static_cast<Double_t>(dumF));
00702       lines += ReadFloat(f,&dumF);
00703       average.push_back(static_cast<Double_t>(dumF));
00704       lines += ReadFloat(f,&dumF);
00705       stdev.push_back(static_cast<Double_t>(dumF));
00706       Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
00707       norm.push_back(nv);
00708       lines += ReadFloat(f,&dumF);
00709       coeff.push_back(dumF/nv); // save coefficient for normalised var
00710       //
00711       fLogger << kDEBUG << "Linear #" << c << Endl;
00712       fLogger << kDEBUG << "  varind   = " << varind.back()  << Endl;
00713       fLogger << kDEBUG << "  xmin     = " << xmin.back()    << Endl;
00714       fLogger << kDEBUG << "  xmax     = " << xmax.back()    << Endl;
00715       fLogger << kDEBUG << "  average  = " << average.back() << Endl;
00716       fLogger << kDEBUG << "  stdev    = " << stdev.back()  << Endl;
00717       fLogger << kDEBUG << "  coeff    = " << coeff.back()  << Endl;
00718    }
00719    if (xmin.size()>0) {      
00720       fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(coeff);
00721       fRuleFit->GetRuleEnsemblePtr()->SetLinDM(xmin);
00722       fRuleFit->GetRuleEnsemblePtr()->SetLinDP(xmax);
00723       fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(norm);
00724    }
00725    //   fRuleFit->GetRuleEnsemblePtr()->CalcImportance();
00726    imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
00727    if (imp>impref) impref=imp;
00728    fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(impref);
00729    fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); // to fill fLinTermOK vector
00730 
00731    fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
00732    //   fRuleFit->GetRuleEnsemblePtr()->CalcRuleSupport();
00733 
00734    fLogger << kDEBUG << "Reading model done" << Endl;
00735    return kTRUE;
00736 }
00737 
00738 //_______________________________________________________________________
00739 Int_t TMVA::RuleFitAPI::RunRuleFit()
00740 {
00741    // execute rf_go.exe
00742    TString oldDir = gSystem->pwd();
00743    TString cmd = "./rf_go.exe"; 
00744    gSystem->cd(fRFWorkDir.Data());
00745    int rval = gSystem->Exec(cmd.Data());
00746    gSystem->cd(oldDir.Data());
00747    return rval;
00748 }

Generated on Tue Jul 5 15:25:36 2011 for ROOT_528-00b_version by  doxygen 1.5.1