MakeModelAndMeasurements.cxx

Go to the documentation of this file.
00001 // @(#)root/roostats:$Id:  cranmer $
00002 // Author: Kyle Cranmer, Akira Shibata
00003 /*************************************************************************
00004  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00005  * All rights reserved.                                                  *
00006  *                                                                       *
00007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00009  *************************************************************************/
00010 
00011 //_________________________________________________
00012 /*
00013 BEGIN_HTML
00014 <p>
00015 This is a package that creates a RooFit probability density function from ROOT histograms 
00016 of expected distributions and histograms that represent the +/- 1 sigma variations 
00017 from systematic effects. The resulting probability density function can then be used
00018 with any of the statistical tools provided within RooStats, such as the profile 
00019 likelihood ratio, Feldman-Cousins, etc.  In this version, the model is directly
00020 fed to a likelihodo ratio test, but it needs to be further factorized.</p>
00021 
00022 <p>
00023 The user needs to provide histograms (in picobarns per bin) and configure the job
00024 with XML.  The configuration XML is defined in the file config/Config.dtd, but essentially
00025 it is organized as follows (see config/Combination.xml and config/ee.xml for examples)</p>
00026 
00027 <ul>
00028 <li> - a top level 'Combination' that is composed of:</li>
00029 <ul>
00030  <li>- several 'Channels' (eg. ee, emu, mumu), which are composed of:</li>
00031  <ul>
00032   <li>- several 'Samples' (eg. signal, bkg1, bkg2, ...), each of which has:</li>
00033   <ul>
00034    <li> - a name</li>
00035    <li> - if the sample is normalized by theory (eg N = L*sigma) or not (eg. data driven)</li>
00036    <li> - a nominal expectation histogram</li>
00037    <li> - a named 'Normalization Factor' (which can be fixed or allowed to float in a fit)</li>
00038    <li> - several 'Overall Systematics' in normalization with:</li>
00039    <ul>
00040     <li> - a name</li>
00041     <li> - +/- 1 sigma variations (eg. 1.05 and 0.95 for a 5% uncertainty)</li>
00042    </ul>
00043    <li>- several 'Histogram Systematics' in shape with:</li>
00044    <ul>
00045     <li>- a name (which can be shared with the OverallSyst if correlated)</li>
00046     <li>- +/- 1 sigma variational histograms</li>
00047    </ul>
00048   </ul>
00049  </ul>
00050  <li>- several 'Measurements' (corresponding to a full fit of the model) each of which specifies</li>
00051  <ul>
00052   <li>- a name for this fit to be used in tables and files</li>
00053   <ul>
00054    <li>      - what is the luminosity associated to the measurement in picobarns</li>
00055    <li>      - which bins of the histogram should be used</li>
00056    <li>      - what is the relative uncertainty on the luminosity </li>
00057    <li>      - what is (are) the parameter(s) of interest that will be measured</li>
00058    <li>      - which parameters should be fixed/floating (eg. nuisance parameters)</li>
00059   </ul>
00060  </ul>
00061 </ul>
00062 END_HTML
00063 */
00064 //
00065 
00066 
00067 // from std
00068 #include <string>
00069 #include <vector>
00070 #include <map>
00071 #include <iostream>
00072 #include <sstream>
00073 
00074 // from root
00075 #include "TFile.h"
00076 #include "TH1F.h"
00077 #include "TDOMParser.h"
00078 #include "TXMLAttr.h"
00079 #include "TString.h"
00080 
00081 // from roofit
00082 #include "RooStats/ModelConfig.h"
00083 
00084 // from this package
00085 #include "Helper.h"
00086 #include "ConfigParser.h"
00087 #include "RooStats/HistFactory/EstimateSummary.h"
00088 #include "RooStats/HistFactory/HistoToWorkspaceFactory.h"
00089 
00090 
00091 using namespace RooFit;
00092 using namespace RooStats;
00093 using namespace HistFactory;
00094 
00095 void topDriver(string input);
00096 void fastDriver(string input); // in MakeModelAndMeasurementsFast
00097 
00098 //_____________________________batch only_____________________
00099 #ifndef __CINT__
00100 
00101 int main(int argc, char** argv) {
00102 
00103   if(! (argc>1)) {
00104     cerr << "need input file" << endl;
00105     exit(1);
00106   }
00107 
00108   if(argc==2){
00109     string input(argv[1]);
00110     fastDriver(input);
00111   }
00112 
00113   if(argc==3){
00114     string flag(argv[1]);
00115     string input(argv[2]);
00116     if(flag=="-standard_form")
00117       fastDriver(input);
00118     else if(flag=="-number_counting_form")
00119       topDriver(input);
00120     else
00121       cerr <<"unrecognized flag.  Options are -standard_form or -number_counting_form"<<endl;
00122 
00123   }
00124   return 0;
00125 }
00126 
00127 #endif
00128 
00129 void topDriver(string input ){
00130 
00131   // TO DO:
00132   // would like to fully factorize the XML parsing.  
00133   // No clear need to have some here and some in ConfigParser
00134 
00135   /*** read in the input xml ***/
00136   TDOMParser xmlparser;
00137   Int_t parseError = xmlparser.ParseFile( input.c_str() );
00138   if( parseError ) { 
00139     std::cerr << "Loading of xml document \"" << input
00140           << "\" failed" << std::endl;
00141   } 
00142 
00143   cout << "reading input : " << input << endl;
00144   TXMLDocument* xmldoc = xmlparser.GetXMLDocument();
00145   TXMLNode* rootNode = xmldoc->GetRootNode();
00146 
00147   if( rootNode->GetNodeName() == TString( "Combination" ) ){
00148     string outputFileName, outputFileNamePrefix;
00149     vector<string> xml_input;
00150 
00151     TListIter attribIt = rootNode->GetAttributes();
00152     TXMLAttr* curAttr = 0;
00153     while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00154       if( curAttr->GetName() == TString( "OutputFilePrefix" ) ) {
00155         outputFileNamePrefix=string(curAttr->GetValue());
00156         cout << "output file is : " << outputFileName << endl;
00157       }
00158     } 
00159     TXMLNode* node = rootNode->GetChildren();
00160     while( node != 0 ) {
00161       if( node->GetNodeName() == TString( "Input" ) ) {
00162         xml_input.push_back(node->GetText());
00163       }
00164       node = node->GetNextNode();
00165     }
00166     node = rootNode->GetChildren();
00167     while( node != 0 ) {
00168       if( node->GetNodeName() == TString( "Measurement" ) ) {
00169 
00170         Double_t nominalLumi=0, lumiRelError=0, lumiError=0;
00171         Int_t lowBin=0, highBin=0;
00172         string rowTitle, POI, mode;
00173         vector<string> systToFix;
00174         map<string,double> gammaSyst;
00175         map<string,double> uniformSyst;
00176         map<string,double> logNormSyst;
00177         bool exportOnly = false;
00178 
00179         //        TListIter attribIt = node->GetAttributes();
00180         //        TXMLAttr* curAttr = 0;
00181         attribIt = node->GetAttributes();
00182         curAttr = 0;
00183         while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00184           if( curAttr->GetName() == TString( "Lumi" ) ) {
00185             nominalLumi=atof(curAttr->GetValue());
00186           }
00187           if( curAttr->GetName() == TString( "LumiRelErr" ) ) {
00188             lumiRelError=atof(curAttr->GetValue());
00189           }
00190           if( curAttr->GetName() == TString( "BinLow" ) ) {
00191             lowBin=atoi(curAttr->GetValue());
00192           }
00193           if( curAttr->GetName() == TString( "BinHigh" ) ) {
00194             highBin=atoi(curAttr->GetValue());
00195           }
00196           if( curAttr->GetName() == TString( "Name" ) ) {
00197             rowTitle=curAttr->GetValue();
00198             outputFileName=outputFileNamePrefix+"_"+rowTitle+".root";
00199           }
00200           if( curAttr->GetName() == TString( "Mode" ) ) {
00201             mode=curAttr->GetValue();
00202           }
00203           if( curAttr->GetName() == TString( "ExportOnly" ) ) {
00204             if(curAttr->GetValue() == TString( "True" ) )
00205               exportOnly = true;
00206             else
00207               exportOnly = false;
00208           }
00209         }
00210         lumiError=nominalLumi*lumiRelError;
00211 
00212         TXMLNode* mnode = node->GetChildren();
00213         while( mnode != 0 ) {
00214           if( mnode->GetNodeName() == TString( "POI" ) ) {
00215             POI=mnode->GetText();
00216           }
00217           if( mnode->GetNodeName() == TString( "ParamSetting" ) ) {
00218             //            TListIter attribIt = mnode->GetAttributes();
00219             //TXMLAttr* curAttr = 0;
00220             attribIt = mnode->GetAttributes();
00221             curAttr = 0;
00222             while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00223               if( curAttr->GetName() == TString( "Const" ) ) {
00224                 if(curAttr->GetValue()==TString("True")){
00225                   AddSubStrings(systToFix, mnode->GetText());
00226                 }
00227               }
00228             }
00229           }
00230           if( mnode->GetNodeName() == TString( "ConstraintTerm" ) ) {
00231             vector<string> syst; string type = ""; double rel = 0;
00232             AddSubStrings(syst,mnode->GetText());
00233             //            TListIter attribIt = mnode->GetAttributes();
00234             //            TXMLAttr* curAttr = 0;
00235             attribIt = mnode->GetAttributes();
00236             curAttr = 0;
00237             while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00238               if( curAttr->GetName() == TString( "Type" ) ) {
00239                 type = curAttr->GetValue();
00240               }
00241               if( curAttr->GetName() == TString( "RelativeUncertainty" ) ) {
00242                 rel = atof(curAttr->GetValue());
00243               }
00244             }
00245             if (type=="Gamma" && rel!=0) {
00246               for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) gammaSyst[(*it).c_str()] = rel;
00247             }
00248             if (type=="Uniform" && rel!=0) {
00249               for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) uniformSyst[(*it).c_str()] = rel;
00250             }
00251             if (type=="LogNormal" && rel!=0) {
00252               for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) logNormSyst[(*it).c_str()] = rel;
00253             }
00254           }
00255           mnode = mnode->GetNextNode();
00256         }
00257 
00258         /* Do measurement */
00259         cout << "using lumi = " << nominalLumi << " and lumiError = " << lumiError
00260              << " including bins between " << lowBin << " and " << highBin << endl;
00261         cout << "fixing the following parameters:"  << endl;
00262         for(vector<string>::iterator itr=systToFix.begin(); itr!=systToFix.end(); ++itr){
00263           cout << "   " << *itr << endl;
00264         }
00265 
00266         /***
00267             Construction of Model. Only requirement is that they return vector<vector<EstimateSummary> >
00268             This is where we use the factory.
00269         ***/
00270 
00271         vector<vector<EstimateSummary> > summaries;
00272         if(xml_input.empty()){
00273           cerr << "no input channels found" << endl;
00274           exit(1);
00275         }
00276 
00277 
00278         vector<RooWorkspace*> chs;
00279         vector<string> ch_names;
00280         TFile* outFile = new TFile(outputFileName.c_str(), "recreate");
00281         HistoToWorkspaceFactory factory(outputFileNamePrefix, rowTitle, systToFix, nominalLumi, lumiError, lowBin, highBin , outFile);
00282 
00283 
00284         // for results tables
00285         fprintf(factory.pFile, " %s &", rowTitle.c_str() );
00286 
00287         // read the xml for each channel and combine
00288         for(vector<string>::iterator itr=xml_input.begin(); itr!=xml_input.end(); ++itr){
00289           vector<EstimateSummary> oneChannel;
00290           // read xml
00291           ReadXmlConfig(*itr, oneChannel, nominalLumi);
00292           // not really needed anymore
00293           summaries.push_back(oneChannel);
00294           // use factory to create the workspace
00295           string ch_name=oneChannel[0].channel;
00296           ch_names.push_back(ch_name);
00297           RooWorkspace * ws = factory.MakeSingleChannelModel(oneChannel, systToFix);
00298           chs.push_back(ws);
00299           // set poi in ModelConfig
00300           ModelConfig * proto_config = (ModelConfig *) ws->obj("ModelConfig");
00301           cout << "Setting Parameter of Interest as :" << POI << endl;
00302           RooRealVar* poi = (RooRealVar*) ws->var(POI.c_str());
00303           RooArgSet * params= new RooArgSet;
00304           params->add(*poi);
00305           proto_config->SetParameters(*params);
00306 
00307 
00308           // Gamma/Uniform Constraints:
00309           // turn some Gaussian constraints into Gamma/Uniform/LogNorm constraints, rename model newSimPdf
00310           if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) {
00311             factory.EditSyst(ws,("model_"+oneChannel[0].channel).c_str(),gammaSyst,uniformSyst,logNormSyst);
00312             proto_config->SetPdf(*ws->pdf("newSimPdf"));
00313           }
00314 
00315           // fill out ModelConfig and export
00316           RooAbsData* expData = ws->data("expData");
00317           proto_config->GuessObsAndNuisance(*expData);
00318           ws->writeToFile((outputFileNamePrefix+"_"+ch_name+"_"+rowTitle+"_model.root").c_str());
00319 
00320           // do fit unless exportOnly requested
00321           if(!exportOnly)
00322             factory.FitModel(ws, ch_name, "newSimPdf", "expData", false);
00323           fprintf(factory.pFile, " & " );
00324         }
00325 
00326         /***
00327             Make the combined model:
00328             If you want output histograms in root format, create and pass it to the combine routine.
00329             "combine" : will do the individual cross-section measurements plus combination
00330 
00331         ***/
00332 
00333 
00334           
00335         if(mode.find("comb")!=string::npos){ 
00336           RooWorkspace* ws=factory.MakeCombinedModel(ch_names,chs);
00337           // Gamma/Uniform Constraints:
00338           // turn some Gaussian constraints into Gamma/Uniform/logNormal constraints, rename model newSimPdf
00339           if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) 
00340             factory.EditSyst(ws,"simPdf",gammaSyst,uniformSyst,logNormSyst);
00341           //
00342           // set parameter of interest according to the configuration
00343           //
00344           ModelConfig * combined_config = (ModelConfig *) ws->obj("ModelConfig");
00345           cout << "Setting Parameter of Interest as :" << POI << endl;
00346           RooRealVar* poi = (RooRealVar*) ws->var((POI).c_str());
00347           //RooRealVar* poi = (RooRealVar*) ws->var((POI+"_comb").c_str());
00348           RooArgSet * params= new RooArgSet;
00349           cout << poi << endl;
00350           params->add(*poi);
00351           combined_config->SetParameters(*params);
00352           ws->Print();
00353 
00354           // Set new PDF if there are gamma/uniform constraint terms
00355           if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) 
00356             combined_config->SetPdf(*ws->pdf("newSimPdf"));
00357 
00358           RooAbsData* simData = ws->data("simData");
00359           combined_config->GuessObsAndNuisance(*simData);
00360           //      ws->writeToFile(("results/model_combined_edited.root").c_str());
00361           ws->writeToFile((outputFileNamePrefix+"_combined_"+rowTitle+"_model.root").c_str());
00362 
00363           // TO DO:
00364           // Totally factorize the statistical test in "fit Model" to a different area
00365           if(!exportOnly)
00366             factory.FitModel(ws, "combined", "simPdf", "simData", false);
00367         }
00368 
00369 
00370         fprintf(factory.pFile, " \\\\ \n");
00371 
00372         outFile->Close();
00373         delete outFile;
00374 
00375       }
00376       node = node->GetNextNode(); // next measurement
00377     }
00378   }
00379 }
00380 
00381 
00382 

Generated on Tue Jul 5 14:52:33 2011 for ROOT_528-00b_version by  doxygen 1.5.1