MakeModelAndMeasurementsFast.cxx

Go to the documentation of this file.
00001 // @(#)root/roostats:$Id:  cranmer $
00002 // Author: Kyle Cranmer, Akira Shibata
00003 /*************************************************************************
00004  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00005  * All rights reserved.                                                  *
00006  *                                                                       *
00007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00009  *************************************************************************/
00010 
00011 //_________________________________________________
00012 /*
00013 BEGIN_HTML
00014 <p>
00015 This is a package that creates a RooFit probability density function from ROOT histograms 
00016 of expected distributions and histograms that represent the +/- 1 sigma variations 
00017 from systematic effects. The resulting probability density function can then be used
00018 with any of the statistical tools provided within RooStats, such as the profile 
00019 likelihood ratio, Feldman-Cousins, etc.  In this version, the model is directly
00020 fed to a likelihodo ratio test, but it needs to be further factorized.</p>
00021 
00022 <p>
00023 The user needs to provide histograms (in picobarns per bin) and configure the job
00024 with XML.  The configuration XML is defined in the file config/Config.dtd, but essentially
00025 it is organized as follows (see config/Combination.xml and config/ee.xml for examples)</p>
00026 
00027 <ul>
00028 <li> - a top level 'Combination' that is composed of:</li>
00029 <ul>
00030  <li>- several 'Channels' (eg. ee, emu, mumu), which are composed of:</li>
00031  <ul>
00032   <li>- several 'Samples' (eg. signal, bkg1, bkg2, ...), each of which has:</li>
00033   <ul>
00034    <li> - a name</li>
00035    <li> - if the sample is normalized by theory (eg N = L*sigma) or not (eg. data driven)</li>
00036    <li> - a nominal expectation histogram</li>
00037    <li> - a named 'Normalization Factor' (which can be fixed or allowed to float in a fit)</li>
00038    <li> - several 'Overall Systematics' in normalization with:</li>
00039    <ul>
00040     <li> - a name</li>
00041     <li> - +/- 1 sigma variations (eg. 1.05 and 0.95 for a 5% uncertainty)</li>
00042    </ul>
00043    <li>- several 'Histogram Systematics' in shape with:</li>
00044    <ul>
00045     <li>- a name (which can be shared with the OverallSyst if correlated)</li>
00046     <li>- +/- 1 sigma variational histograms</li>
00047    </ul>
00048   </ul>
00049  </ul>
00050  <li>- several 'Measurements' (corresponding to a full fit of the model) each of which specifies</li>
00051  <ul>
00052   <li>- a name for this fit to be used in tables and files</li>
00053   <ul>
00054    <li>      - what is the luminosity associated to the measurement in picobarns</li>
00055    <li>      - which bins of the histogram should be used</li>
00056    <li>      - what is the relative uncertainty on the luminosity </li>
00057    <li>      - what is (are) the parameter(s) of interest that will be measured</li>
00058    <li>      - which parameters should be fixed/floating (eg. nuisance parameters)</li>
00059   </ul>
00060  </ul>
00061 </ul>
00062 END_HTML
00063 */
00064 //
00065 
00066 
00067 // from std
00068 #include <string>
00069 #include <vector>
00070 #include <map>
00071 #include <iostream>
00072 #include <sstream>
00073 
00074 // from root
00075 #include "TFile.h"
00076 #include "TH1F.h"
00077 #include "TDOMParser.h"
00078 #include "TXMLAttr.h"
00079 #include "TString.h"
00080 
00081 // from roofit
00082 #include "RooStats/ModelConfig.h"
00083 
00084 // from this package
00085 #include "Helper.h"
00086 #include "ConfigParser.h"
00087 #include "RooStats/HistFactory/EstimateSummary.h"
00088 #include "RooStats/HistFactory/HistoToWorkspaceFactoryFast.h"
00089 
00090 
00091 using namespace RooFit;
00092 using namespace RooStats;
00093 using namespace HistFactory;
00094 
00095 
00096 // main is int MakeModelAndMeasurements
00097 void fastDriver(string input ){
00098   // TO DO:
00099   // would like to fully factorize the XML parsing.  
00100   // No clear need to have some here and some in ConfigParser
00101 
00102   /*** read in the input xml ***/
00103   TDOMParser xmlparser;
00104   Int_t parseError = xmlparser.ParseFile( input.c_str() );
00105   if( parseError ) { 
00106     std::cerr << "Loading of xml document \"" << input
00107           << "\" failed" << std::endl;
00108   } 
00109 
00110   cout << "reading input : " << input << endl;
00111   TXMLDocument* xmldoc = xmlparser.GetXMLDocument();
00112   TXMLNode* rootNode = xmldoc->GetRootNode();
00113 
00114   if( rootNode->GetNodeName() == TString( "Combination" ) ){
00115     string outputFileName, outputFileNamePrefix;
00116     vector<string> xml_input;
00117 
00118     TListIter attribIt = rootNode->GetAttributes();
00119     TXMLAttr* curAttr = 0;
00120     while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00121       if( curAttr->GetName() == TString( "OutputFilePrefix" ) ) {
00122         outputFileNamePrefix=string(curAttr->GetValue());
00123         cout << "output file is : " << outputFileName << endl;
00124       }
00125     } 
00126     TXMLNode* node = rootNode->GetChildren();
00127     while( node != 0 ) {
00128       if( node->GetNodeName() == TString( "Input" ) ) {
00129         xml_input.push_back(node->GetText());
00130       }
00131       node = node->GetNextNode();
00132     }
00133     node = rootNode->GetChildren();
00134     while( node != 0 ) {
00135       if( node->GetNodeName() == TString( "Measurement" ) ) {
00136 
00137         Double_t nominalLumi=0, lumiRelError=0, lumiError=0;
00138         Int_t lowBin=0, highBin=0;
00139         string rowTitle, POI, mode;
00140         vector<string> systToFix;
00141         map<string,double> gammaSyst;
00142         map<string,double> uniformSyst;
00143         map<string,double> logNormSyst;
00144         bool exportOnly = false;
00145 
00146         //        TListIter attribIt = node->GetAttributes();
00147         //        TXMLAttr* curAttr = 0;
00148         attribIt = node->GetAttributes();
00149         curAttr = 0;
00150         while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00151           if( curAttr->GetName() == TString( "Lumi" ) ) {
00152             nominalLumi=atof(curAttr->GetValue());
00153           }
00154           if( curAttr->GetName() == TString( "LumiRelErr" ) ) {
00155             lumiRelError=atof(curAttr->GetValue());
00156           }
00157           if( curAttr->GetName() == TString( "BinLow" ) ) {
00158             lowBin=atoi(curAttr->GetValue());
00159           }
00160           if( curAttr->GetName() == TString( "BinHigh" ) ) {
00161             highBin=atoi(curAttr->GetValue());
00162           }
00163           if( curAttr->GetName() == TString( "Name" ) ) {
00164             rowTitle=curAttr->GetValue();
00165             outputFileName=outputFileNamePrefix+"_"+rowTitle+".root";
00166           }
00167           if( curAttr->GetName() == TString( "Mode" ) ) {
00168             mode=curAttr->GetValue();
00169           }
00170           if( curAttr->GetName() == TString( "ExportOnly" ) ) {
00171             if(curAttr->GetValue() == TString( "True" ) )
00172               exportOnly = true;
00173             else
00174               exportOnly = false;
00175           }
00176         }
00177         lumiError=nominalLumi*lumiRelError;
00178 
00179         TXMLNode* mnode = node->GetChildren();
00180         while( mnode != 0 ) {
00181           if( mnode->GetNodeName() == TString( "POI" ) ) {
00182             POI=mnode->GetText();
00183           }
00184           if( mnode->GetNodeName() == TString( "ParamSetting" ) ) {
00185             //            TListIter attribIt = mnode->GetAttributes();
00186             //TXMLAttr* curAttr = 0;
00187             attribIt = mnode->GetAttributes();
00188             curAttr = 0;
00189             while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00190               if( curAttr->GetName() == TString( "Const" ) ) {
00191                 if(curAttr->GetValue()==TString("True")){
00192                   AddSubStrings(systToFix, mnode->GetText());
00193                 }
00194               }
00195             }
00196           }
00197           if( mnode->GetNodeName() == TString( "ConstraintTerm" ) ) {
00198             vector<string> syst; string type = ""; double rel = 0;
00199             AddSubStrings(syst,mnode->GetText());
00200             //            TListIter attribIt = mnode->GetAttributes();
00201             //            TXMLAttr* curAttr = 0;
00202             attribIt = mnode->GetAttributes();
00203             curAttr = 0;
00204             while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
00205               if( curAttr->GetName() == TString( "Type" ) ) {
00206                 type = curAttr->GetValue();
00207               }
00208               if( curAttr->GetName() == TString( "RelativeUncertainty" ) ) {
00209                 rel = atof(curAttr->GetValue());
00210               }
00211             }
00212             if (type=="Gamma" && rel!=0) {
00213               for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) gammaSyst[(*it).c_str()] = rel;
00214             }
00215             if (type=="Uniform" && rel!=0) {
00216               for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) uniformSyst[(*it).c_str()] = rel;
00217             }
00218             if (type=="LogNormal" && rel!=0) {
00219               for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) logNormSyst[(*it).c_str()] = rel;
00220             }
00221           }
00222           mnode = mnode->GetNextNode();
00223         }
00224 
00225         /* Do measurement */
00226         cout << "using lumi = " << nominalLumi << " and lumiError = " << lumiError
00227              << " including bins between " << lowBin << " and " << highBin << endl;
00228         cout << "fixing the following parameters:"  << endl;
00229         for(vector<string>::iterator itr=systToFix.begin(); itr!=systToFix.end(); ++itr){
00230           cout << "   " << *itr << endl;
00231         }
00232 
00233         /***
00234             Construction of Model. Only requirement is that they return vector<vector<EstimateSummary> >
00235             This is where we use the factory.
00236         ***/
00237 
00238         vector<vector<EstimateSummary> > summaries;
00239         if(xml_input.empty()){
00240           cerr << "no input channels found" << endl;
00241           exit(1);
00242         }
00243 
00244 
00245         vector<RooWorkspace*> chs;
00246         vector<string> ch_names;
00247         TFile* outFile = new TFile(outputFileName.c_str(), "recreate");
00248         HistoToWorkspaceFactoryFast factory(outputFileNamePrefix, rowTitle, systToFix, nominalLumi, lumiError, lowBin, highBin , outFile);
00249 
00250 
00251         // for results tables
00252         fprintf(factory.pFile, " %s &", rowTitle.c_str() );
00253 
00254         // read the xml for each channel and combine
00255         for(vector<string>::iterator itr=xml_input.begin(); itr!=xml_input.end(); ++itr){
00256           vector<EstimateSummary> oneChannel;
00257           // read xml
00258           ReadXmlConfig(*itr, oneChannel, nominalLumi);
00259           // not really needed anymore
00260           summaries.push_back(oneChannel);
00261           // use factory to create the workspace
00262           string ch_name=oneChannel[0].channel;
00263           ch_names.push_back(ch_name);
00264           RooWorkspace * ws = factory.MakeSingleChannelModel(oneChannel, systToFix);
00265           chs.push_back(ws);
00266           // set poi in ModelConfig
00267           ModelConfig * proto_config = (ModelConfig *) ws->obj("ModelConfig");
00268           cout << "Setting Parameter of Interest as :" << POI << endl;
00269           RooRealVar* poi = (RooRealVar*) ws->var(POI.c_str());
00270           RooArgSet * params= new RooArgSet;
00271           params->add(*poi);
00272           proto_config->SetParameters(*params);
00273 
00274 
00275           // Gamma/Uniform Constraints:
00276           // turn some Gaussian constraints into Gamma/Uniform/LogNorm constraints, rename model newSimPdf
00277           if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) {
00278             factory.EditSyst(ws,("model_"+oneChannel[0].channel).c_str(),gammaSyst,uniformSyst,logNormSyst);
00279             proto_config->SetPdf(*ws->pdf("newSimPdf"));
00280           }
00281 
00282           // fill out ModelConfig and export
00283           RooAbsData* expData = ws->data("asimovData");
00284           proto_config->GuessObsAndNuisance(*expData);
00285           ws->writeToFile((outputFileNamePrefix+"_"+ch_name+"_"+rowTitle+"_model.root").c_str());
00286 
00287           // do fit unless exportOnly requested
00288           if(!exportOnly){
00289             if(ws->data("obsData")){
00290               factory.FitModel(ws, ch_name, "newSimPdf", "obsData", false);
00291             } else {
00292               factory.FitModel(ws, ch_name, "newSimPdf", "asimovData", false);
00293             }
00294 
00295           }
00296           fprintf(factory.pFile, " & " );
00297         }
00298 
00299         /***
00300             Make the combined model:
00301             If you want output histograms in root format, create and pass it to the combine routine.
00302             "combine" : will do the individual cross-section measurements plus combination
00303 
00304         ***/
00305 
00306 
00307           
00308         if(mode.find("comb")!=string::npos){ 
00309           RooWorkspace* ws=factory.MakeCombinedModel(ch_names,chs);
00310           // Gamma/Uniform Constraints:
00311           // turn some Gaussian constraints into Gamma/Uniform/logNormal constraints, rename model newSimPdf
00312           if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) 
00313             factory.EditSyst(ws,"simPdf",gammaSyst,uniformSyst,logNormSyst);
00314           //
00315           // set parameter of interest according to the configuration
00316           //
00317           ModelConfig * combined_config = (ModelConfig *) ws->obj("ModelConfig");
00318           cout << "Setting Parameter of Interest as :" << POI << endl;
00319           RooRealVar* poi = (RooRealVar*) ws->var((POI).c_str());
00320           //RooRealVar* poi = (RooRealVar*) ws->var((POI+"_comb").c_str());
00321           RooArgSet * params= new RooArgSet;
00322           cout << poi << endl;
00323           params->add(*poi);
00324           combined_config->SetParameters(*params);
00325           ws->Print();
00326 
00327           // Set new PDF if there are gamma/uniform constraint terms
00328           if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) 
00329             combined_config->SetPdf(*ws->pdf("newSimPdf"));
00330 
00331           RooAbsData* simData = ws->data("asimovData");
00332           combined_config->GuessObsAndNuisance(*simData);
00333           //      ws->writeToFile(("results/model_combined_edited.root").c_str());
00334           ws->writeToFile((outputFileNamePrefix+"_combined_"+rowTitle+"_model.root").c_str());
00335 
00336           // TO DO:
00337           // Totally factorize the statistical test in "fit Model" to a different area
00338           if(!exportOnly){
00339             if(ws->data("obsData")){
00340               factory.FitModel(ws, "combined", "simPdf", "obsData", false);
00341             } else {
00342               factory.FitModel(ws, "combined", "simPdf", "asimovData", false);
00343             }
00344           }
00345         }
00346 
00347 
00348         fprintf(factory.pFile, " \\\\ \n");
00349 
00350         outFile->Close();
00351         delete outFile;
00352 
00353       }
00354       node = node->GetNextNode(); // next measurement
00355     }
00356   }
00357 }
00358 
00359 
00360 

Generated on Tue Jul 5 14:52:33 2011 for ROOT_528-00b_version by  doxygen 1.5.1