00001 // @(#)root/splot:$Id: TSPlot.cxx 35916 2010-09-30 14:28:20Z brun $ 00002 // Author: Muriel Pivk, Anna Kreshuk 10/2005 00003 00004 /********************************************************************** 00005 * * 00006 * Copyright (c) 2005 ROOT Foundation, CERN/PH-SFT * 00007 * * 00008 **********************************************************************/ 00009 00010 #include "TSPlot.h" 00011 #include "TVirtualFitter.h" 00012 #include "TH1.h" 00013 #include "TTreePlayer.h" 00014 #include "TTreeFormula.h" 00015 #include "TTreeFormulaManager.h" 00016 #include "TSelectorDraw.h" 00017 #include "TBrowser.h" 00018 #include "TClass.h" 00019 #include "TMath.h" 00020 00021 extern void Yields(Int_t &, Double_t *, Double_t &f, Double_t *x, Int_t iflag); 00022 00023 ClassImp(TSPlot) 00024 00025 //____________________________________________________________________ 00026 //Begin_Html <!-- 00027 /* --> 00028 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> 00029 <p> 00030 <b><font size="+2">Overview</font></b> 00031 00032 </p><p> 00033 A common method used in High Energy Physics to perform measurements is 00034 the maximum Likelihood method, exploiting discriminating variables to 00035 disentangle signal from background. The crucial point for such an 00036 analysis to be reliable is to use an exhaustive list of sources of 00037 events combined with an accurate description of all the Probability 00038 Density Functions (PDF). 00039 </p><p>To assess the validity of the fit, a convincing quality check 00040 is to explore further the data sample by examining the distributions of 00041 control variables. A control variable can be obtained for instance by 00042 removing one of the discriminating variables before performing again 00043 the maximum Likelihood fit: this removed variable is a control 00044 variable. The expected distribution of this control variable, for 00045 signal, is to be compared to the one extracted, for signal, from the 00046 data sample. In order to be able to do so, one must be able to unfold 00047 from the distribution of the whole data sample. 00048 </p><p>The TSPlot method allows to reconstruct the distributions for 00049 the control variable, independently for each of the various sources of 00050 events, without making use of any <em>a priori</em> knowledge on <u>this</u> 00051 variable. The aim is thus to use the knowledge available for the 00052 discriminating variables to infer the behaviour of the individual 00053 sources of events with respect to the control variable. 00054 </p><p> 00055 TSPlot is optimal if the control variable is uncorrelated with the discriminating variables. 00056 00057 </p><p> 00058 A detail description of the formalism itself, called <!-- MATH 00059 $\hbox{$_s$}{\cal P}lot$ 00060 --> 00061 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48">, is given in [<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/node1.html#bib:sNIM">1</a>]. 00062 00063 </p><p> 00064 <b><font size="+2">The method</font></b> 00065 00066 </p><p> 00067 The <!-- MATH 00068 $\hbox{$_s$}{\cal P}lot$ 00069 --> 00070 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> technique is developped in the above context of a maximum Likelihood method making use of discriminating variables. 00071 00072 </p><p>One considers a data sample in which are merged several species 00073 of events. These species represent various signal components and 00074 background components which all together account for the data sample. 00075 The different terms of the log-Likelihood are: 00076 </p><ul> 00077 <li><img src="gif/sPlot_img6.png" alt="$N$" align="bottom" border="0" height="17" width="22">: the total number of events in the data sample, 00078 </li> 00079 <li><!-- MATH 00080 ${\rm N}_{\rm s}$ 00081 --> 00082 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25">: the number of species of events populating the data sample, 00083 </li> 00084 <li><img src="gif/sPlot_img8.png" alt="$N_i$" align="middle" border="0" height="34" width="25">: the number of events expected on the average for the <img src="gif/sPlot_img9.png" alt="$i^{\rm th}$" align="bottom" border="0" height="20" width="25"> species, 00085 </li> 00086 <li><!-- MATH 00087 ${\rm f}_i(y_e)$ 00088 --> 00089 <img src="gif/sPlot_img10.png" alt="${\rm f}_i(y_e)$" align="middle" border="0" height="37" width="47">: the value of the PDFs of the discriminating variables <img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> for the <img src="gif/sPlot_img12.png" alt="$i^{th}$" align="bottom" border="0" height="20" width="25"> species and for event <img src="gif/sPlot_img13.png" alt="$e$" align="bottom" border="0" height="17" width="13">, 00090 </li> 00091 <li><img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">: the set of control variables which, by definition, do not appear in the expression of the Likelihood function <img src="gif/sPlot_img15.png" alt="${\cal L}$" align="bottom" border="0" height="18" width="18">. 00092 </li> 00093 </ul> 00094 The extended log-Likelihood reads: 00095 <br> 00096 <div align="right"> 00097 00098 <!-- MATH 00099 \begin{equation} 00100 {\cal L}=\sum_{e=1}^{N}\ln \Big\{ \sum_{i=1}^{{\rm N}_{\rm s}}N_i{\rm f}_i(y_e) \Big\} -\sum_{i=1}^{{\rm N}_{\rm s}}N_i ~. 00101 \end{equation} 00102 --> 00103 <table align="center" width="100%"> 00104 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:eLik"></a><img src="gif/sPlot_img16.png" alt="\begin{displaymath} 00105 {\cal L}=\sum_{e=1}^{N}\ln \Big\{ \sum_{i=1}^{{\rm N}_{\rm s}}N_i{\rm f}_i(y_e) \Big\} -\sum_{i=1}^{{\rm N}_{\rm s}}N_i ~. 00106 \end{displaymath}" border="0" height="59" width="276"></td> 00107 <td align="right" width="10"> 00108 (1)</td></tr> 00109 </tbody></table> 00110 <br clear="all"></div><p></p> 00111 From this expression, after maximization of <img src="gif/sPlot_img15.png" alt="${\cal L}$" align="bottom" border="0" height="18" width="18"> with respect to the <img src="gif/sPlot_img8.png" alt="$N_i$" align="middle" border="0" height="34" width="25"> parameters, a weight can be computed for every event and each species, in order to obtain later the true distribution <!-- MATH 00112 ${\hbox{\bf {M}}}_i(x)$ 00113 --> 00114 <img src="gif/sPlot_img17.png" alt="${\hbox{\bf {M}}}_i(x)$" align="middle" border="0" height="37" width="56"> of variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">. If <img src="gif/sPlot_img18.png" alt="${\rm n}$" align="bottom" border="0" height="17" width="15"> is one of the <!-- MATH 00115 ${\rm N}_{\rm s}$ 00116 --> 00117 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25"> species present in the data sample, the weight for this species is defined by: 00118 <br> 00119 <div align="right"> 00120 00121 <!-- MATH 00122 \begin{equation} 00123 \begin{Large}\fbox{$ 00124 {_s{\cal P}}_{\rm n}(y_e)={\sum_{j=1}^{{\rm N}_{\rm s}} \hbox{\bf V}_{{\rm n}j}{\rm f}_j(y_e)\over\sum_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e) } $}\end{Large} ~, 00125 \end{equation} 00126 --> 00127 <table align="center" width="100%"> 00128 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:weightxnotiny"></a><img src="gif/sPlot_img19.png" alt="\begin{displaymath} 00129 \begin{Large} 00130 \fbox{$ 00131 {_s{\cal P}}_{\rm n}(y_e)={\sum_{j=1}^... 00132 ...um_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e) } $}\end{Large} ~, 00133 \end{displaymath}" border="0" height="76" width="279"></td> 00134 <td align="right" width="10"> 00135 (2)</td></tr> 00136 </tbody></table> 00137 <br clear="all"></div><p></p> 00138 where <!-- MATH 00139 $\hbox{\bf V}_{{\rm n}j}$ 00140 --> 00141 <img src="gif/sPlot_img20.png" alt="$\hbox{\bf V}_{{\rm n}j}$" align="middle" border="0" height="34" width="35"> 00142 is the covariance matrix resulting from the Likelihood maximization. 00143 This matrix can be used directly from the fit, but this is numerically 00144 less accurate than the direct computation: 00145 <br> 00146 <div align="right"> 00147 00148 <!-- MATH 00149 \begin{equation} 00150 \hbox{\bf V}^{-1}_{{\rm n}j}~=~ 00151 {\partial^2(-{\cal L})\over\partial N_{\rm n}\partial N_j}~=~ 00152 \sum_{e=1}^N {{\rm f}_{\rm n}(y_e){\rm f}_j(y_e)\over(\sum_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e))^2} ~. 00153 \end{equation} 00154 --> 00155 <table align="center" width="100%"> 00156 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:VarianceMatrixDirect"></a><img src="gif/sPlot_img21.png" alt="\begin{displaymath} 00157 \hbox{\bf V}^{-1}_{{\rm n}j}~=~ 00158 {\partial^2(-{\cal L})\over\... 00159 ...y_e)\over(\sum_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e))^2} ~. 00160 \end{displaymath}" border="0" height="58" width="360"></td> 00161 <td align="right" width="10"> 00162 (3)</td></tr> 00163 </tbody></table> 00164 <br clear="all"></div><p></p> 00165 The distribution of the control variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> obtained by histogramming the weighted events reproduces, on average, the true distribution <!-- MATH 00166 ${\hbox{\bf {M}}}_{\rm n}(x)$ 00167 --> 00168 <img src="gif/sPlot_img22.png" alt="${\hbox{\bf {M}}}_{\rm n}(x)$" align="middle" border="0" height="37" width="59">. 00169 00170 <p> 00171 The class TSPlot allows to reconstruct the true distribution <!-- MATH 00172 ${\hbox{\bf {M}}}_{\rm n}(x)$ 00173 --> 00174 <img src="gif/sPlot_img22.png" alt="${\hbox{\bf {M}}}_{\rm n}(x)$" align="middle" border="0" height="37" width="59"> of a control variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> for each of the <!-- MATH 00175 ${\rm N}_{\rm s}$ 00176 --> 00177 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25"> species from the sole knowledge of the PDFs of the discriminating variables <img src="gif/sPlot_img23.png" alt="${\rm f}_i(y)$" align="middle" border="0" height="37" width="40">. The plots obtained thanks to the TSPlot class are called <!-- MATH 00178 $\hbox{$_s$}{\cal P}lots$ 00179 --> 00180 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">. 00181 00182 </p><p> 00183 <b><font size="+2">Some properties and checks</font></b> 00184 00185 </p><p> 00186 Beside reproducing the true distribution, <!-- MATH 00187 $\hbox{$_s$}{\cal P}lots$ 00188 --> 00189 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> bear remarkable properties: 00190 00191 </p><ul> 00192 <li> 00193 Each <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">-distribution is properly normalized: 00194 <br> 00195 <div align="right"> 00196 00197 <!-- MATH 00198 \begin{equation} 00199 \sum_{e=1}^{N} {_s{\cal P}}_{\rm n}(y_e)~=~N_{\rm n}~. 00200 \end{equation} 00201 --> 00202 <table align="center" width="100%"> 00203 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:NormalizationOK"></a><img src="gif/sPlot_img24.png" alt="\begin{displaymath} 00204 \sum_{e=1}^{N} {_s{\cal P}}_{\rm n}(y_e)~=~N_{\rm n}~. 00205 \end{displaymath}" border="0" height="58" width="158"></td> 00206 <td align="right" width="10"> 00207 (4)</td></tr> 00208 </tbody></table> 00209 <br clear="all"></div><p></p> 00210 </li> 00211 <li> 00212 For any event: 00213 <br> 00214 <div align="right"> 00215 00216 <!-- MATH 00217 \begin{equation} 00218 \sum_{l=1}^{{\rm N}_{\rm s}} {_s{\cal P}}_l(y_e) ~=~1 ~. 00219 \end{equation} 00220 --> 00221 <table align="center" width="100%"> 00222 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:numberconservation"></a><img src="gif/sPlot_img25.png" alt="\begin{displaymath} 00223 \sum_{l=1}^{{\rm N}_{\rm s}} {_s{\cal P}}_l(y_e) ~=~1 ~. 00224 \end{displaymath}" border="0" height="59" width="140"></td> 00225 <td align="right" width="10"> 00226 (5)</td></tr> 00227 </tbody></table> 00228 <br clear="all"></div><p></p> 00229 That is to say that, summing up the <!-- MATH 00230 ${\rm N}_{\rm s}$ 00231 --> 00232 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25"> <!-- MATH 00233 $\hbox{$_s$}{\cal P}lots$ 00234 --> 00235 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">, one recovers the data sample distribution in <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">, and summing up the number of events entering in a <!-- MATH 00236 $\hbox{$_s$}{\cal P}lot$ 00237 --> 00238 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> for a given species, one recovers the yield of the species, as provided by the fit. The property <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:NormalizationOK">4</a> is implemented in the TSPlot class as a check. 00239 </li> 00240 <li>the sum of the statistical uncertainties per bin 00241 <br> 00242 <div align="right"> 00243 00244 <!-- MATH 00245 \begin{equation} 00246 \sigma[N_{\rm n}\ _s\tilde{\rm M}_{\rm n}(x) {\delta x}]~=~\sqrt{\sum_{e \subset {\delta x}} ({_s{\cal P}}_{\rm n})^2} ~. 00247 \end{equation} 00248 --> 00249 <table align="center" width="100%"> 00250 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:ErrorPerBin"></a><img src="gif/sPlot_img26.png" alt="\begin{displaymath} 00251 \sigma[N_{\rm n}\ _s\tilde{\rm M}_{\rm n}(x) {\delta x}]~=~\sqrt{\sum_{e \subset {\delta x}} ({_s{\cal P}}_{\rm n})^2} ~. 00252 \end{displaymath}" border="0" height="55" width="276"></td> 00253 <td align="right" width="10"> 00254 (6)</td></tr> 00255 </tbody></table> 00256 <br clear="all"></div><p></p> 00257 reproduces the statistical uncertainty on the yield <img src="gif/sPlot_img27.png" alt="$N_{\rm n}$" align="middle" border="0" height="34" width="28">, as provided by the fit: <!-- MATH 00258 $\sigma[N_{\rm n}]\equiv\sqrt{\hbox{\bf V}_{{\rm n}{\rm n}}}$ 00259 --> 00260 <img src="gif/sPlot_img28.png" alt="$\sigma[N_{\rm n}]\equiv\sqrt{\hbox{\bf V}_{{\rm n}{\rm n}}}$" align="middle" border="0" height="40" width="123">. 00261 Because of that and since the determination of the yields is optimal 00262 when obtained using a Likelihood fit, one can conclude that the<!-- MATH 00263 $\hbox{$_s$}{\cal P}lot$ 00264 --> 00265 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> technique is itself an optimal method to reconstruct distributions of control variables. 00266 </li> 00267 </ul> 00268 00269 <p> 00270 <b><font size="+2">Different steps followed by TSPlot</font></b> 00271 00272 </p><p> 00273 00274 </p><ol> 00275 <li>A maximum Likelihood fit is performed to obtain the yields <img src="gif/sPlot_img8.png" alt="$N_i$" align="middle" border="0" height="34" width="25"> of the various species. 00276 The fit relies on discriminating variables <img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> uncorrelated with a control variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">: 00277 the later is therefore totally absent from the fit. 00278 </li> 00279 <li>The weights <img src="gif/sPlot_img29.png" alt="${_s{\cal P}}$" align="middle" border="0" height="34" width="27"> are calculated using Eq. (<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:weightxnotiny">2</a>) where the covariance matrix is taken from Minuit. 00280 </li> 00281 <li>Histograms of <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> are filled by weighting the events with <img src="gif/sPlot_img29.png" alt="${_s{\cal P}}$" align="middle" border="0" height="34" width="27">. 00282 </li> 00283 <li>Error bars per bin are given by Eq. (<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:ErrorPerBin">6</a>). 00284 </li> 00285 </ol> 00286 The <!-- MATH 00287 $\hbox{$_s$}{\cal P}lots$ 00288 --> 00289 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> reproduce the true distributions of the species in the control variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">, within the above defined statistical uncertainties. 00290 00291 <p> 00292 <b><font size="+2">Illustrations</font></b> 00293 00294 </p><p> 00295 To illustrate the technique, one considers an example derived from the analysis where <!-- MATH 00296 $\hbox{$_s$}{\cal P}lots$ 00297 --> 00298 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> 00299 have been first used (charmless B decays). One is dealing with a data 00300 sample in which two species are present: the first is termed signal and 00301 the second background. A maximum Likelihood fit is performed to obtain 00302 the two yields <img src="gif/sPlot_img30.png" alt="$N_1$" align="middle" border="0" height="34" width="27"> and <img src="gif/sPlot_img31.png" alt="$N_2$" align="middle" border="0" height="34" width="27">. The fit relies on two discriminating variables collectively denoted <img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> which are chosen within three possible variables denoted <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39">, <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20">. 00303 The variable which is not incorporated in <img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> is used as the control variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">. The six distributions of the three variables are assumed to be the ones depicted in Fig. <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfs">1</a>. 00304 00305 </p><p> 00306 00307 </p><div align="center"><a name="fig:pdfs"></a><a name="106"></a> 00308 <table> 00309 <caption align="bottom"><strong>Figure 1:</strong> 00310 Distributions of the three discriminating variables available to perform the Likelihood fit: 00311 <img src="gif/sPlot_img32.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39">, <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35">, <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20">. 00312 Among the three variables, two are used to perform the fit while one is 00313 kept out of the fit to serve the purpose of a control variable. The 00314 three distributions on the top (resp. bottom) of the figure correspond 00315 to the signal (resp. background). The unit of the vertical axis is 00316 chosen such that it indicates the number of entries per bin, if one 00317 slices the histograms in 25 bins.</caption> 00318 <tbody><tr><td><img src="gif/sPlot_img33.png" alt="\begin{figure}\begin{center} 00319 \mbox{{\psfig{file=pdfmesNIM.eps,width=0.33\linewi... 00320 ...th}} 00321 {\psfig{file=pdffiNIM.eps,width=0.33\linewidth}}} 00322 \end{center}\end{figure}" border="0" height="162" width="544"></td></tr> 00323 </tbody></table> 00324 </div> 00325 00326 <p> 00327 A data sample being built through a Monte Carlo simulation based on the distributions shown in Fig. <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfs">1</a>, one obtains the three distributions of Fig. <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfstot">2</a>. Whereas the distribution of <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> clearly indicates the presence of the signal, the distribution of <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> are less obviously populated by signal. 00328 00329 </p><p> 00330 00331 </p><div align="center"><a name="fig:pdfstot"></a><a name="169"></a> 00332 <table> 00333 <caption align="bottom"><strong>Figure 2:</strong> 00334 Distributions of the three discriminating variables for signal plus 00335 background. The three distributions are the ones obtained from a data 00336 sample obtained through a Monte Carlo simulation based on the 00337 distributions shown in Fig. <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfs">1</a>. The data sample consists of 500 signal events and 5000 background events.</caption> 00338 <tbody><tr><td><img src="gif/sPlot_img34.png" alt="\begin{figure}\begin{center} 00339 \mbox{{\psfig{file=genmesTOTNIM.eps,width=0.33\lin... 00340 ...} 00341 {\psfig{file=genfiTOTNIM.eps,width=0.33\linewidth}}} 00342 \end{center}\end{figure}" border="0" height="160" width="545"></td></tr> 00343 </tbody></table> 00344 </div> 00345 00346 <p> 00347 Chosing <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> as discriminating variables to determine <img src="gif/sPlot_img30.png" alt="$N_1$" align="middle" border="0" height="34" width="27"> and <img src="gif/sPlot_img31.png" alt="$N_2$" align="middle" border="0" height="34" width="27"> through a maximum Likelihood fit, one builds, for the control variable <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> which is unknown to the fit, the two <!-- MATH 00348 $\hbox{$_s$}{\cal P}lots$ 00349 --> 00350 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> for signal and background shown in Fig. <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:messPlots">3</a>. One observes that the <!-- MATH 00351 $\hbox{$_s$}{\cal P}lot$ 00352 --> 00353 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> 00354 for signal reproduces correctly the PDF even where the latter vanishes, 00355 although the error bars remain sizeable. This results from the almost 00356 complete cancellation between positive and negative weights: the sum of 00357 weights is close to zero while the sum of weights squared is not. The 00358 occurence of negative weights occurs through the appearance of the 00359 covariance matrix, and its negative components, in the definition of 00360 Eq. (<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:weightxnotiny">2</a>). 00361 00362 </p><p> 00363 A word of caution is in order with respect to the error bars. Whereas 00364 their sum in quadrature is identical to the statistical uncertainties 00365 of the yields determined by the fit, and if, in addition, they are 00366 asymptotically correct, the error bars should be handled with care for 00367 low statistics and/or for too fine binning. This is because the error 00368 bars do not incorporate two known properties of the PDFs: PDFs are 00369 positive definite and can be non-zero in a given x-bin, even if in the 00370 particular data sample at hand, no event is observed in this bin. The 00371 latter limitation is not specific to<!-- MATH 00372 $\hbox{$_s$}{\cal P}lots$ 00373 --> 00374 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">, 00375 rather it is always present when one is willing to infer the PDF at the 00376 origin of an histogram, when, for some bins, the number of entries does 00377 not guaranty the applicability of the Gaussian regime. In such 00378 situations, a satisfactory practice is to attach allowed ranges to the 00379 histogram to indicate the upper and lower limits of the PDF value which 00380 are consistent with the actual observation, at a given confidence 00381 level. 00382 </p><p> 00383 00384 </p><div align="center"><a name="fig:messPlots"></a><a name="127"></a> 00385 <table> 00386 <caption align="bottom"><strong>Figure 3:</strong> 00387 The <!-- MATH 00388 $\hbox{$_s$}{\cal P}lots$ 00389 --> 00390 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> (signal on the left, background on the right) obtained for <img src="gif/sPlot_img32.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> are represented as dots with error bars. They are obtained from a fit using only information from <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20">.</caption> 00391 <tbody><tr><td><img src="gif/sPlot_img35.png" alt="\begin{figure}\begin{center} 00392 \mbox{\psfig{file=mass-sig-sPlot.eps,width=0.48\li... 00393 ... \psfig{file=mass-bkg-sPlot.eps,width=0.48\linewidth}} 00394 \end{center}\end{figure}" border="0" height="181" width="539"></td></tr> 00395 </tbody></table> 00396 </div> 00397 00398 <p> 00399 Chosing <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> and <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> as discriminating variables to determine <img src="gif/sPlot_img30.png" alt="$N_1$" align="middle" border="0" height="34" width="27"> and <img src="gif/sPlot_img31.png" alt="$N_2$" align="middle" border="0" height="34" width="27"> through a maximum Likelihood fit, one builds, for the control variable <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> which is unknown to the fit, the two <!-- MATH 00400 $\hbox{$_s$}{\cal P}lots$ 00401 --> 00402 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> for signal and background shown in Fig. <a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:FisPlots">4</a>. In the <!-- MATH 00403 $\hbox{$_s$}{\cal P}lot$ 00404 --> 00405 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> for signal one observes that error bars are the largest in the <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> regions where the background is the largest. 00406 00407 </p><p> 00408 00409 </p><div align="center"><a name="fig:FisPlots"></a><a name="136"></a> 00410 <table> 00411 <caption align="bottom"><strong>Figure 4:</strong> 00412 The <!-- MATH 00413 $\hbox{$_s$}{\cal P}lots$ 00414 --> 00415 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> (signal on the left, background on the right) obtained for <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> are represented as dots with error bars. They are obtained from a fit using only information from <img src="gif/sPlot_img32.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> and <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35">.</caption> 00416 <tbody><tr><td><img src="gif/sPlot_img36.png" alt="\begin{figure}\begin{center} 00417 \mbox{\psfig{file=fisher-sig-sPlot.eps,width=0.48\... 00418 ...psfig{file=fisher-bkg-sPlot.eps,width=0.48\linewidth}} 00419 \end{center}\end{figure}" border="0" height="180" width="539"></td></tr> 00420 </tbody></table> 00421 </div> 00422 00423 <p> 00424 The results above can be obtained by running the tutorial TestSPlot.C 00425 </p> 00426 <!--*/ 00427 //-->End_Html 00428 00429 00430 //____________________________________________________________________ 00431 TSPlot::TSPlot() : 00432 fTree(0), 00433 fTreename(0), 00434 fVarexp(0), 00435 fSelection(0) 00436 { 00437 // default constructor (used by I/O only) 00438 fNx = 0; 00439 fNy=0; 00440 fNevents = 0; 00441 fNSpecies=0; 00442 fNumbersOfEvents=0; 00443 } 00444 00445 //____________________________________________________________________ 00446 TSPlot::TSPlot(Int_t nx, Int_t ny, Int_t ne, Int_t ns, TTree *tree) : 00447 fTreename(0), 00448 fVarexp(0), 00449 fSelection(0) 00450 00451 { 00452 //normal TSPlot constructor 00453 // nx : number of control variables 00454 // ny : number of discriminating variables 00455 // ne : total number of events 00456 // ns : number of species 00457 // tree: input data 00458 00459 fNx = nx; 00460 fNy=ny; 00461 fNevents = ne; 00462 fNSpecies=ns; 00463 00464 fXvar.ResizeTo(fNevents, fNx); 00465 fYvar.ResizeTo(fNevents, fNy); 00466 fYpdf.ResizeTo(fNevents, fNSpecies*fNy); 00467 fSWeights.ResizeTo(fNevents, fNSpecies*(fNy+1)); 00468 fTree = tree; 00469 fNumbersOfEvents = 0; 00470 } 00471 00472 //____________________________________________________________________ 00473 TSPlot::~TSPlot() 00474 { 00475 // destructor 00476 00477 if (fNumbersOfEvents) 00478 delete [] fNumbersOfEvents; 00479 if (!fXvarHists.IsEmpty()) 00480 fXvarHists.Delete(); 00481 if (!fYvarHists.IsEmpty()) 00482 fYvarHists.Delete(); 00483 if (!fYpdfHists.IsEmpty()) 00484 fYpdfHists.Delete(); 00485 } 00486 00487 //____________________________________________________________________ 00488 void TSPlot::Browse(TBrowser *b) 00489 { 00490 //To browse the histograms 00491 00492 if (!fSWeightsHists.IsEmpty()) { 00493 TIter next(&fSWeightsHists); 00494 TH1D* h = 0; 00495 while ((h = (TH1D*)next())) 00496 b->Add(h,h->GetName()); 00497 } 00498 00499 if (!fYpdfHists.IsEmpty()) { 00500 TIter next(&fYpdfHists); 00501 TH1D* h = 0; 00502 while ((h = (TH1D*)next())) 00503 b->Add(h,h->GetName()); 00504 } 00505 if (!fYvarHists.IsEmpty()) { 00506 TIter next(&fYvarHists); 00507 TH1D* h = 0; 00508 while ((h = (TH1D*)next())) 00509 b->Add(h,h->GetName()); 00510 } 00511 if (!fXvarHists.IsEmpty()) { 00512 TIter next(&fXvarHists); 00513 TH1D* h = 0; 00514 while ((h = (TH1D*)next())) 00515 b->Add(h,h->GetName()); 00516 } 00517 b->Add(&fSWeights, "sWeights"); 00518 } 00519 00520 00521 //____________________________________________________________________ 00522 void TSPlot::SetInitialNumbersOfSpecies(Int_t *numbers) 00523 { 00524 //Set the initial number of events of each species - used 00525 //as initial estimates in minuit 00526 00527 if (!fNumbersOfEvents) 00528 fNumbersOfEvents = new Double_t[fNSpecies]; 00529 for (Int_t i=0; i<fNSpecies; i++) 00530 fNumbersOfEvents[i]=numbers[i]; 00531 } 00532 00533 //____________________________________________________________________ 00534 void TSPlot::MakeSPlot(Option_t *option) 00535 { 00536 //Calculates the sWeights 00537 //The option controls the print level 00538 //"Q" - no print out 00539 //"V" - prints the estimated #of events in species - default 00540 //"VV" - as "V" + the minuit printing + sums of weights for control 00541 00542 00543 if (!fNumbersOfEvents){ 00544 Error("MakeSPlot","Initial numbers of events in species have not been set"); 00545 return; 00546 } 00547 Int_t i, j, ispecies; 00548 00549 TString opt = option; 00550 opt.ToUpper(); 00551 opt.ReplaceAll("VV", "W"); 00552 00553 //make sure that global fitter is minuit 00554 char s[]="TFitter"; 00555 if (TVirtualFitter::GetFitter()){ 00556 Int_t strdiff=strcmp(TVirtualFitter::GetFitter()->IsA()->GetName(), s); 00557 if (strdiff!=0) 00558 delete TVirtualFitter::GetFitter(); 00559 } 00560 00561 00562 TVirtualFitter *minuit = TVirtualFitter::Fitter(0, 2); 00563 fPdfTot.ResizeTo(fNevents, fNSpecies); 00564 00565 //now let's do it, excluding different yvars 00566 //for iplot = -1 none is excluded 00567 for (Int_t iplot=-1; iplot<fNy; iplot++){ 00568 for (i=0; i<fNevents; i++){ 00569 for (ispecies=0; ispecies<fNSpecies; ispecies++){ 00570 fPdfTot(i, ispecies)=1; 00571 for (j=0; j<fNy; j++){ 00572 if (j!=iplot) 00573 fPdfTot(i, ispecies)*=fYpdf(i, ispecies*fNy+j); 00574 } 00575 } 00576 } 00577 minuit->Clear(); 00578 minuit->SetFCN(Yields); 00579 Double_t arglist[10]; 00580 //set the print level 00581 if (opt.Contains("Q")||opt.Contains("V")){ 00582 arglist[0]=-1; 00583 } 00584 if (opt.Contains("W")) 00585 arglist[0]=0; 00586 minuit->ExecuteCommand("SET PRINT", arglist, 1); 00587 00588 minuit->SetObjectFit(&fPdfTot); //a tricky way to get fPdfTot matrix to fcn 00589 for (ispecies=0; ispecies<fNSpecies; ispecies++) 00590 minuit->SetParameter(ispecies, "", fNumbersOfEvents[ispecies], 1, 0, 0); 00591 00592 minuit->ExecuteCommand("MIGRAD", arglist, 0); 00593 for (ispecies=0; ispecies<fNSpecies; ispecies++){ 00594 fNumbersOfEvents[ispecies]=minuit->GetParameter(ispecies); 00595 if (!opt.Contains("Q")) 00596 printf("estimated #of events in species %d = %f\n", ispecies, fNumbersOfEvents[ispecies]); 00597 } 00598 if (!opt.Contains("Q")) 00599 printf("\n"); 00600 Double_t *covmat = minuit->GetCovarianceMatrix(); 00601 SPlots(covmat, iplot); 00602 00603 if (opt.Contains("W")){ 00604 Double_t *sumweight = new Double_t[fNSpecies]; 00605 for (i=0; i<fNSpecies; i++){ 00606 sumweight[i]=0; 00607 for (j=0; j<fNevents; j++) 00608 sumweight[i]+=fSWeights(j, (iplot+1)*fNSpecies + i); 00609 printf("checking sum of weights[%d]=%f\n", i, sumweight[i]); 00610 } 00611 printf("\n"); 00612 delete [] sumweight; 00613 } 00614 } 00615 } 00616 00617 //____________________________________________________________________ 00618 void TSPlot::SPlots(Double_t *covmat, Int_t i_excl) 00619 { 00620 //Computes the sWeights from the covariance matrix 00621 00622 Int_t i, ispecies, k; 00623 Double_t numerator, denominator; 00624 for (i=0; i<fNevents; i++){ 00625 denominator=0; 00626 for (ispecies=0; ispecies<fNSpecies; ispecies++) 00627 denominator+=fNumbersOfEvents[ispecies]*fPdfTot(i, ispecies); 00628 for (ispecies=0; ispecies<fNSpecies; ispecies++){ 00629 numerator=0; 00630 for (k=0; k<fNSpecies; k++) 00631 numerator+=covmat[ispecies*fNSpecies+k]*fPdfTot(i, k); 00632 fSWeights(i, (i_excl+1)*fNSpecies + ispecies)=numerator/denominator; 00633 } 00634 } 00635 00636 } 00637 00638 //____________________________________________________________________ 00639 void TSPlot::GetSWeights(TMatrixD &weights) 00640 { 00641 //Returns the matrix of sweights 00642 00643 if (weights.GetNcols()!=fNSpecies*(fNy+1) || weights.GetNrows()!=fNevents) 00644 weights.ResizeTo(fNevents, fNSpecies*(fNy+1)); 00645 weights = fSWeights; 00646 } 00647 00648 //____________________________________________________________________ 00649 void TSPlot::GetSWeights(Double_t *weights) 00650 { 00651 //Returns the matrix of sweights. It is assumed that the array passed in the argurment is big enough 00652 00653 for (Int_t i=0; i<fNevents; i++){ 00654 for (Int_t j=0; j<fNSpecies; j++){ 00655 weights[i*fNSpecies+j]=fSWeights(i, j); 00656 } 00657 } 00658 } 00659 00660 //____________________________________________________________________ 00661 void TSPlot::FillXvarHists(Int_t nbins) 00662 { 00663 //Fills the histograms of x variables (not weighted) with nbins 00664 00665 Int_t i, j; 00666 00667 if (!fXvarHists.IsEmpty()){ 00668 if (((TH1D*)fXvarHists.First())->GetNbinsX()!=nbins) 00669 fXvarHists.Delete(); 00670 else 00671 return; 00672 } 00673 00674 //make the histograms 00675 char name[10]; 00676 for (i=0; i<fNx; i++){ 00677 snprintf(name,10, "x%d", i); 00678 TH1D *h = new TH1D(name, name, nbins, fMinmax(0, i), fMinmax(1, i)); 00679 for (j=0; j<fNevents; j++) 00680 h->Fill(fXvar(j, i)); 00681 fXvarHists.Add(h); 00682 } 00683 00684 } 00685 00686 //____________________________________________________________________ 00687 TObjArray* TSPlot::GetXvarHists() 00688 { 00689 //Returns the array of histograms of x variables (not weighted) 00690 //If histograms have not already 00691 //been filled, they are filled with default binning 100. 00692 00693 Int_t nbins = 100; 00694 if (fXvarHists.IsEmpty()) 00695 FillXvarHists(nbins); 00696 else if (((TH1D*)fXvarHists.First())->GetNbinsX()!=nbins) 00697 FillXvarHists(nbins); 00698 return &fXvarHists; 00699 } 00700 00701 //____________________________________________________________________ 00702 TH1D *TSPlot::GetXvarHist(Int_t ixvar) 00703 { 00704 //Returns the histogram of variable #ixvar 00705 //If histograms have not already 00706 //been filled, they are filled with default binning 100. 00707 00708 Int_t nbins = 100; 00709 if (fXvarHists.IsEmpty()) 00710 FillXvarHists(nbins); 00711 else if (((TH1D*)fXvarHists.First())->GetNbinsX()!=nbins) 00712 FillXvarHists(nbins); 00713 00714 return (TH1D*)fXvarHists.UncheckedAt(ixvar); 00715 } 00716 00717 //____________________________________________________________________ 00718 void TSPlot::FillYvarHists(Int_t nbins) 00719 { 00720 //Fill the histograms of y variables 00721 00722 Int_t i, j; 00723 00724 if (!fYvarHists.IsEmpty()){ 00725 if (((TH1D*)fYvarHists.First())->GetNbinsX()!=nbins) 00726 fYvarHists.Delete(); 00727 else 00728 return; 00729 } 00730 00731 //make the histograms 00732 char name[10]; 00733 for (i=0; i<fNy; i++){ 00734 snprintf(name,10, "y%d", i); 00735 TH1D *h=new TH1D(name, name, nbins, fMinmax(0, fNx+i), fMinmax(1, fNx+i)); 00736 for (j=0; j<fNevents; j++) 00737 h->Fill(fYvar(j, i)); 00738 fYvarHists.Add(h); 00739 } 00740 } 00741 00742 //____________________________________________________________________ 00743 TObjArray* TSPlot::GetYvarHists() 00744 { 00745 //Returns the array of histograms of y variables. If histograms have not already 00746 //been filled, they are filled with default binning 100. 00747 00748 Int_t nbins = 100; 00749 if (fYvarHists.IsEmpty()) 00750 FillYvarHists(nbins); 00751 else if (((TH1D*)fYvarHists.First())->GetNbinsX()!=nbins) 00752 FillYvarHists(nbins); 00753 return &fYvarHists; 00754 } 00755 00756 //____________________________________________________________________ 00757 TH1D *TSPlot::GetYvarHist(Int_t iyvar) 00758 { 00759 //Returns the histogram of variable iyvar.If histograms have not already 00760 //been filled, they are filled with default binning 100. 00761 00762 Int_t nbins = 100; 00763 if (fYvarHists.IsEmpty()) 00764 FillYvarHists(nbins); 00765 else if (((TH1D*)fYvarHists.First())->GetNbinsX()!=nbins) 00766 FillYvarHists(nbins); 00767 return (TH1D*)fYvarHists.UncheckedAt(iyvar); 00768 } 00769 00770 //____________________________________________________________________ 00771 void TSPlot::FillYpdfHists(Int_t nbins) 00772 { 00773 //Fills the histograms of pdf-s of y variables with binning nbins 00774 00775 Int_t i, j, ispecies; 00776 00777 if (!fYpdfHists.IsEmpty()){ 00778 if (((TH1D*)fYpdfHists.First())->GetNbinsX()!=nbins) 00779 fYpdfHists.Delete(); 00780 else 00781 return; 00782 } 00783 00784 char name[30]; 00785 for (ispecies=0; ispecies<fNSpecies; ispecies++){ 00786 for (i=0; i<fNy; i++){ 00787 snprintf(name,30, "pdf_species%d_y%d", ispecies, i); 00788 //TH1D *h = new TH1D(name, name, nbins, ypdfmin[ispecies*fNy+i], ypdfmax[ispecies*fNy+i]); 00789 TH1D *h = new TH1D(name, name, nbins, fMinmax(0, fNx+fNy+ispecies*fNy+i), fMinmax(1, fNx+fNy+ispecies*fNy+i)); 00790 for (j=0; j<fNevents; j++) 00791 h->Fill(fYpdf(j, ispecies*fNy+i)); 00792 fYpdfHists.Add(h); 00793 } 00794 } 00795 } 00796 00797 //____________________________________________________________________ 00798 TObjArray* TSPlot::GetYpdfHists() 00799 { 00800 //Returns the array of histograms of pdf's of y variables with binning nbins 00801 //If histograms have not already 00802 //been filled, they are filled with default binning 100. 00803 00804 Int_t nbins = 100; 00805 if (fYpdfHists.IsEmpty()) 00806 FillYpdfHists(nbins); 00807 00808 return &fYpdfHists; 00809 } 00810 00811 //____________________________________________________________________ 00812 TH1D *TSPlot::GetYpdfHist(Int_t iyvar, Int_t ispecies) 00813 { 00814 //Returns the histogram of the pdf of variable #iyvar for species #ispecies, binning nbins 00815 //If histograms have not already 00816 //been filled, they are filled with default binning 100. 00817 00818 Int_t nbins = 100; 00819 if (fYpdfHists.IsEmpty()) 00820 FillYpdfHists(nbins); 00821 00822 return (TH1D*)fYpdfHists.UncheckedAt(fNy*ispecies+iyvar); 00823 } 00824 00825 //____________________________________________________________________ 00826 void TSPlot::FillSWeightsHists(Int_t nbins) 00827 { 00828 //The order of histograms in the array: 00829 //x0_species0, x0_species1,..., x1_species0, x1_species1,..., y0_species0, y0_species1,... 00830 //If the histograms have already been filled with a different binning, they are refilled 00831 //and all histograms are deleted 00832 00833 if (fSWeights.GetNoElements()==0){ 00834 Error("GetSWeightsHists", "SWeights were not computed"); 00835 return; 00836 } 00837 00838 if (!fSWeightsHists.IsEmpty()){ 00839 if (((TH1D*)fSWeightsHists.First())->GetNbinsX()!=nbins) 00840 fSWeightsHists.Delete(); 00841 else 00842 return; 00843 } 00844 00845 char name[30]; 00846 00847 //Fill histograms of x-variables weighted with sWeights 00848 for (Int_t ivar=0; ivar<fNx; ivar++){ 00849 for (Int_t ispecies=0; ispecies<fNSpecies; ispecies++){ 00850 snprintf(name,30, "x%d_species%d", ivar, ispecies); 00851 TH1D *h = new TH1D(name, name, nbins, fMinmax(0, ivar), fMinmax(1, ivar)); 00852 h->Sumw2(); 00853 for (Int_t ievent=0; ievent<fNevents; ievent++) 00854 h->Fill(fXvar(ievent, ivar), fSWeights(ievent, ispecies)); 00855 fSWeightsHists.AddLast(h); 00856 } 00857 } 00858 00859 //Fill histograms of y-variables (exluded from the fit), weighted with sWeights 00860 for (Int_t iexcl=0; iexcl<fNy; iexcl++){ 00861 for(Int_t ispecies=0; ispecies<fNSpecies; ispecies++){ 00862 snprintf(name,30, "y%d_species%d", iexcl, ispecies); 00863 TH1D *h = new TH1D(name, name, nbins, fMinmax(0, fNx+iexcl), fMinmax(1, fNx+iexcl)); 00864 h->Sumw2(); 00865 for (Int_t ievent=0; ievent<fNevents; ievent++) 00866 h->Fill(fYvar(ievent, iexcl), fSWeights(ievent, fNSpecies*(iexcl+1)+ispecies)); 00867 fSWeightsHists.AddLast(h); 00868 } 00869 } 00870 } 00871 00872 //____________________________________________________________________ 00873 TObjArray *TSPlot::GetSWeightsHists() 00874 { 00875 //Returns an array of all histograms of variables, weighted with sWeights 00876 //If histograms have not been already filled, they are filled with default binning 50 00877 //The order of histograms in the array: 00878 //x0_species0, x0_species1,..., x1_species0, x1_species1,..., y0_species0, y0_species1,... 00879 00880 Int_t nbins = 50; //default binning 00881 if (fSWeightsHists.IsEmpty()) 00882 FillSWeightsHists(nbins); 00883 00884 return &fSWeightsHists; 00885 } 00886 00887 //____________________________________________________________________ 00888 void TSPlot::RefillHist(Int_t type, Int_t nvar, Int_t nbins, Double_t min, Double_t max, Int_t nspecies) 00889 { 00890 //The Fill...Hist() methods fill the histograms with the real limits on the variables 00891 //This method allows to refill the specified histogram with user-set boundaries min and max 00892 //Parameters: 00893 //type = 1 - histogram of x variable #nvar 00894 // = 2 - histogram of y variable #nvar 00895 // = 3 - histogram of y_pdf for y #nvar and species #nspecies 00896 // = 4 - histogram of x variable #nvar, species #nspecies, WITH sWeights 00897 // = 5 - histogram of y variable #nvar, species #nspecies, WITH sWeights 00898 00899 if (type<1 || type>5){ 00900 Error("RefillHist", "type must lie between 1 and 5"); 00901 return; 00902 } 00903 char name[20]; 00904 Int_t j; 00905 TH1D *hremove; 00906 if (type==1){ 00907 hremove = (TH1D*)fXvarHists.RemoveAt(nvar); 00908 delete hremove; 00909 snprintf(name,20,"x%d",nvar); 00910 TH1D *h = new TH1D(name, name, nbins, min, max); 00911 for (j=0; j<fNevents;j++) 00912 h->Fill(fXvar(j, nvar)); 00913 fXvarHists.AddAt(h, nvar); 00914 } 00915 if (type==2){ 00916 hremove = (TH1D*)fYvarHists.RemoveAt(nvar); 00917 delete hremove; 00918 snprintf(name,20, "y%d", nvar); 00919 TH1D *h = new TH1D(name, name, nbins, min, max); 00920 for (j=0; j<fNevents;j++) 00921 h->Fill(fYvar(j, nvar)); 00922 fXvarHists.AddAt(h, nvar); 00923 } 00924 if (type==3){ 00925 hremove = (TH1D*)fYpdfHists.RemoveAt(nspecies*fNy+nvar); 00926 delete hremove; 00927 snprintf(name,20, "pdf_species%d_y%d", nspecies, nvar); 00928 TH1D *h=new TH1D(name, name, nbins, min, max); 00929 for (j=0; j<fNevents; j++) 00930 h->Fill(fYpdf(j, nspecies*fNy+nvar)); 00931 fYpdfHists.AddAt(h, nspecies*fNy+nvar); 00932 } 00933 if (type==4){ 00934 hremove = (TH1D*)fSWeightsHists.RemoveAt(fNSpecies*nvar+nspecies); 00935 delete hremove; 00936 snprintf(name,20, "x%d_species%d", nvar, nspecies); 00937 TH1D *h = new TH1D(name, name, nbins, min, max); 00938 h->Sumw2(); 00939 for (Int_t ievent=0; ievent<fNevents; ievent++) 00940 h->Fill(fXvar(ievent, nvar), fSWeights(ievent, nspecies)); 00941 fSWeightsHists.AddAt(h, fNSpecies*nvar+nspecies); 00942 } 00943 if (type==5){ 00944 hremove = (TH1D*)fSWeightsHists.RemoveAt(fNx*fNSpecies + fNSpecies*nvar+nspecies); 00945 delete hremove; 00946 snprintf(name,20, "y%d_species%d", nvar, nspecies); 00947 TH1D *h = new TH1D(name, name, nbins, min, max); 00948 h->Sumw2(); 00949 for (Int_t ievent=0; ievent<fNevents; ievent++) 00950 h->Fill(fYvar(ievent, nvar), fSWeights(ievent, nspecies)); 00951 fSWeightsHists.AddAt(h, fNx*fNSpecies + fNSpecies*nvar+nspecies); 00952 } 00953 } 00954 //____________________________________________________________________ 00955 TH1D *TSPlot::GetSWeightsHist(Int_t ixvar, Int_t ispecies,Int_t iyexcl) 00956 { 00957 //Returns the histogram of a variable, weithed with sWeights 00958 //If histograms have not been already filled, they are filled with default binning 50 00959 //If parameter ixvar!=-1, the histogram of x-variable #ixvar is returned for species ispecies 00960 //If parameter ixvar==-1, the histogram of y-variable #iyexcl is returned for species ispecies 00961 //If the histogram has already been filled and the binning is different from the parameter nbins 00962 //all histograms with old binning will be deleted and refilled. 00963 00964 00965 Int_t nbins = 50; //default binning 00966 if (fSWeightsHists.IsEmpty()) 00967 FillSWeightsHists(nbins); 00968 00969 if (ixvar==-1) 00970 return (TH1D*)fSWeightsHists.UncheckedAt(fNx*fNSpecies + fNSpecies*iyexcl+ispecies); 00971 else 00972 return (TH1D*)fSWeightsHists.UncheckedAt(fNSpecies*ixvar + ispecies); 00973 00974 } 00975 00976 00977 //____________________________________________________________________ 00978 void TSPlot::SetTree(TTree *tree) 00979 { 00980 // Set the input Tree 00981 fTree = tree; 00982 } 00983 00984 //____________________________________________________________________ 00985 void TSPlot::SetTreeSelection(const char* varexp, const char *selection, Long64_t firstentry) 00986 { 00987 //Specifies the variables from the tree to be used for splot 00988 // 00989 //Variables fNx, fNy, fNSpecies and fNEvents should already be set! 00990 // 00991 //In the 1st parameter it is assumed that first fNx variables are x(control variables), 00992 //then fNy y variables (discriminating variables), 00993 //then fNy*fNSpecies ypdf variables (probability distribution functions of dicriminating 00994 //variables for different species). The order of pdfs should be: species0_y0, species0_y1,... 00995 //species1_y0, species1_y1,...species[fNSpecies-1]_y0... 00996 //The 2nd parameter allows to make a cut 00997 //TTree::Draw method description contains more details on specifying expression and selection 00998 00999 TTreeFormula **var; 01000 std::vector<TString> cnames; 01001 TList *formulaList = new TList(); 01002 TSelectorDraw *selector = (TSelectorDraw*)(((TTreePlayer*)fTree->GetPlayer())->GetSelector()); 01003 01004 Long64_t entry,entryNumber, curentry; 01005 Int_t i,nch; 01006 Int_t ncols; 01007 TObjArray *leaves = fTree->GetListOfLeaves(); 01008 01009 fTreename= new TString(fTree->GetName()); 01010 if (varexp) 01011 fVarexp = new TString(varexp); 01012 if (selection) 01013 fSelection= new TString(selection); 01014 01015 nch = varexp ? strlen(varexp) : 0; 01016 01017 01018 //*-*- Compile selection expression if there is one 01019 TTreeFormula *select = 0; 01020 if (selection && strlen(selection)) { 01021 select = new TTreeFormula("Selection",selection,fTree); 01022 if (!select) return; 01023 if (!select->GetNdim()) { delete select; return; } 01024 formulaList->Add(select); 01025 } 01026 //*-*- if varexp is empty, take first nx + ny + ny*nspecies columns by default 01027 01028 if (nch == 0) { 01029 ncols = fNx + fNy + fNy*fNSpecies; 01030 for (i=0;i<ncols;i++) { 01031 cnames.push_back( leaves->At(i)->GetName() ); 01032 } 01033 //*-*- otherwise select only the specified columns 01034 } else { 01035 ncols = selector->SplitNames(varexp,cnames); 01036 } 01037 var = new TTreeFormula* [ncols]; 01038 Double_t *xvars = new Double_t[ncols]; 01039 01040 fMinmax.ResizeTo(2, ncols); 01041 for (i=0; i<ncols; i++){ 01042 fMinmax(0, i)=1e30; 01043 fMinmax(1, i)=-1e30; 01044 } 01045 01046 //*-*- Create the TreeFormula objects corresponding to each column 01047 for (i=0;i<ncols;i++) { 01048 var[i] = new TTreeFormula("Var1",cnames[i].Data(),fTree); 01049 formulaList->Add(var[i]); 01050 } 01051 01052 //*-*- Create a TreeFormulaManager to coordinate the formulas 01053 TTreeFormulaManager *manager=0; 01054 if (formulaList->LastIndex()>=0) { 01055 manager = new TTreeFormulaManager; 01056 for(i=0;i<=formulaList->LastIndex();i++) { 01057 manager->Add((TTreeFormula*)formulaList->At(i)); 01058 } 01059 manager->Sync(); 01060 } 01061 //*-*- loop on all selected entries 01062 // fSelectedRows = 0; 01063 Int_t tnumber = -1; 01064 Long64_t selectedrows=0; 01065 for (entry=firstentry;entry<firstentry+fNevents;entry++) { 01066 entryNumber = fTree->GetEntryNumber(entry); 01067 if (entryNumber < 0) break; 01068 Long64_t localEntry = fTree->LoadTree(entryNumber); 01069 if (localEntry < 0) break; 01070 if (tnumber != fTree->GetTreeNumber()) { 01071 tnumber = fTree->GetTreeNumber(); 01072 if (manager) manager->UpdateFormulaLeaves(); 01073 } 01074 Int_t ndata = 1; 01075 if (manager && manager->GetMultiplicity()) { 01076 ndata = manager->GetNdata(); 01077 } 01078 01079 for(Int_t inst=0;inst<ndata;inst++) { 01080 Bool_t loaded = kFALSE; 01081 if (select) { 01082 if (select->EvalInstance(inst) == 0) { 01083 continue; 01084 } 01085 } 01086 01087 if (inst==0) loaded = kTRUE; 01088 else if (!loaded) { 01089 // EvalInstance(0) always needs to be called so that 01090 // the proper branches are loaded. 01091 for (i=0;i<ncols;i++) { 01092 var[i]->EvalInstance(0); 01093 } 01094 loaded = kTRUE; 01095 } 01096 01097 for (i=0;i<ncols;i++) { 01098 xvars[i] = var[i]->EvalInstance(inst); 01099 } 01100 01101 curentry = entry-firstentry; 01102 //printf("event#%d\n", curentry); 01103 //for (i=0; i<ncols; i++) 01104 // printf("xvars[%d]=%f\n", i, xvars[i]); 01105 //selectedrows++; 01106 for (i=0; i<fNx; i++){ 01107 fXvar(selectedrows, i) = xvars[i]; 01108 if (fXvar(selectedrows, i) < fMinmax(0, i)) 01109 fMinmax(0, i)=fXvar(selectedrows, i); 01110 if (fXvar(selectedrows, i) > fMinmax(1, i)) 01111 fMinmax(1, i)=fXvar(selectedrows, i); 01112 } 01113 for (i=0; i<fNy; i++){ 01114 fYvar(selectedrows, i) = xvars[i+fNx]; 01115 //printf("y_in_loop(%d, %d)=%f, xvars[%d]=%f\n", selectedrows, i, fYvar(selectedrows, i), i+fNx, xvars[i+fNx]); 01116 if (fYvar(selectedrows, i) < fMinmax(0, i+fNx)) 01117 fMinmax(0, i+fNx) = fYvar(selectedrows, i); 01118 if (fYvar(selectedrows, i) > fMinmax(1, i+fNx)) 01119 fMinmax(1, i+fNx) = fYvar(selectedrows, i); 01120 for (Int_t j=0; j<fNSpecies; j++){ 01121 fYpdf(selectedrows, j*fNy + i)=xvars[j*fNy + i+fNx+fNy]; 01122 if (fYpdf(selectedrows, j*fNy+i) < fMinmax(0, j*fNy+i+fNx+fNy)) 01123 fMinmax(0, j*fNy+i+fNx+fNy) = fYpdf(selectedrows, j*fNy+i); 01124 if (fYpdf(selectedrows, j*fNy+i) > fMinmax(1, j*fNy+i+fNx+fNy)) 01125 fMinmax(1, j*fNy+i+fNx+fNy) = fYpdf(selectedrows, j*fNy+i); 01126 } 01127 } 01128 selectedrows++; 01129 } 01130 } 01131 fNevents=selectedrows; 01132 // for (i=0; i<fNevents; i++){ 01133 // printf("event#%d\n", i); 01134 //for (Int_t iy=0; iy<fNy; iy++) 01135 // printf("y[%d]=%f\n", iy, fYvar(i, iy)); 01136 //for (Int_t ispecies=0; ispecies<fNSpecies; ispecies++){ 01137 // for (Int_t iy=0; iy<fNy; iy++) 01138 // printf("ypdf[sp. %d, y %d]=%f\n", ispecies, iy, fYpdf(i, ispecies*fNy+iy)); 01139 // } 01140 //} 01141 delete [] xvars; 01142 delete [] var; 01143 } 01144 01145 //____________________________________________________________________ 01146 void Yields(Int_t &, Double_t *, Double_t &f, Double_t *x, Int_t /*iflag*/) 01147 { 01148 // FCN-function for Minuit 01149 01150 Double_t lik; 01151 Int_t i, ispecies; 01152 01153 TVirtualFitter *fitter = TVirtualFitter::GetFitter(); 01154 TMatrixD *pdftot = (TMatrixD*)fitter->GetObjectFit(); 01155 Int_t nev = pdftot->GetNrows(); 01156 Int_t nes = pdftot->GetNcols(); 01157 f=0; 01158 for (i=0; i<nev; i++){ 01159 lik=0; 01160 for (ispecies=0; ispecies<nes; ispecies++) 01161 lik+=x[ispecies]*(*pdftot)(i, ispecies); 01162 if (lik<0) lik=1; 01163 f+=TMath::Log(lik); 01164 } 01165 //extended likelihood, equivalent to chi2 01166 Double_t ntot=0; 01167 for (i=0; i<nes; i++) 01168 ntot += x[i]; 01169 f = -2*(f-ntot); 01170 } 01171