ROOT_528-00b_version: math/splot/src/TSPlot.cxx Source File

00001 // @(#)root/splot:$Id: TSPlot.cxx 35916 2010-09-30 14:28:20Z brun $
00002 // Author: Muriel Pivk, Anna Kreshuk    10/2005  
00003 
00004 /**********************************************************************
00005  *                                                                    *
00006  * Copyright (c) 2005 ROOT Foundation,  CERN/PH-SFT                   *
00007  *                                                                    *
00008  **********************************************************************/
00009 
00010 #include "TSPlot.h"
00011 #include "TVirtualFitter.h"
00012 #include "TH1.h"
00013 #include "TTreePlayer.h"
00014 #include "TTreeFormula.h"
00015 #include "TTreeFormulaManager.h"
00016 #include "TSelectorDraw.h"
00017 #include "TBrowser.h"
00018 #include "TClass.h"
00019 #include "TMath.h"
00020 
00021 extern void Yields(Int_t &, Double_t *, Double_t &f, Double_t *x, Int_t iflag);
00022 
00023 ClassImp(TSPlot)
00024 
00025 //____________________________________________________________________
00026 //Begin_Html <!--
00027 /* -->
00028 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
00029 <p>
00030 <b><font size="+2">Overview</font></b>
00031 
00032 </p><p>
00033 A common method used in High Energy Physics to perform measurements is
00034 the maximum Likelihood method, exploiting discriminating variables to
00035 disentangle signal from background. The crucial point for such an
00036 analysis to be reliable is to use an exhaustive list of sources of
00037 events combined with an accurate description of all the Probability
00038 Density Functions (PDF).
00039 </p><p>To assess the validity of the fit, a convincing quality check
00040 is to explore further the data sample by examining the distributions of
00041 control variables. A control variable can be obtained for instance by
00042 removing one of the discriminating variables before performing again
00043 the maximum Likelihood fit: this removed variable is a control
00044 variable. The expected distribution of this control variable, for
00045 signal, is to be compared to the one extracted, for signal, from the
00046 data sample. In order to be able to do so, one must be able to unfold
00047 from the distribution of the whole data sample.
00048 </p><p>The TSPlot method allows to reconstruct the distributions for
00049 the control variable, independently for each of the various sources of
00050 events, without making use of any <em>a priori</em> knowledge on <u>this</u>
00051 variable. The aim is thus to use the knowledge available for the
00052 discriminating variables to infer the behaviour of the individual
00053 sources of events with respect to the control variable.
00054 </p><p>
00055 TSPlot is optimal if the control variable is uncorrelated with the discriminating variables.
00056 
00057 </p><p>
00058 A detail description of the formalism itself, called <!-- MATH
00059  $\hbox{$_s$}{\cal P}lot$
00060  -->
00061 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48">, is given in&nbsp;[<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/node1.html#bib:sNIM">1</a>].
00062 
00063 </p><p>
00064 <b><font size="+2">The method</font></b> 
00065 
00066 </p><p>
00067 The <!-- MATH
00068  $\hbox{$_s$}{\cal P}lot$
00069  -->
00070 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> technique is developped in the above context of a maximum Likelihood method making use of discriminating variables.
00071 
00072 </p><p>One considers a data sample in which are merged several species
00073 of events. These species represent various signal components and
00074 background components which all together account for the data sample.
00075 The different terms of the log-Likelihood are:
00076 </p><ul>
00077 <li><img src="gif/sPlot_img6.png" alt="$N$" align="bottom" border="0" height="17" width="22">: the total number of events in the data sample,
00078 </li>
00079 <li><!-- MATH
00080  ${\rm N}_{\rm s}$
00081  -->
00082 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25">: the number of species of events populating the data sample,
00083 </li>
00084 <li><img src="gif/sPlot_img8.png" alt="$N_i$" align="middle" border="0" height="34" width="25">: the number of events expected on the average for the <img src="gif/sPlot_img9.png" alt="$i^{\rm th}$" align="bottom" border="0" height="20" width="25"> species,
00085 </li>
00086 <li><!-- MATH
00087  ${\rm f}_i(y_e)$
00088  -->
00089 <img src="gif/sPlot_img10.png" alt="${\rm f}_i(y_e)$" align="middle" border="0" height="37" width="47">: the value of the PDFs of the discriminating variables <img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> for the <img src="gif/sPlot_img12.png" alt="$i^{th}$" align="bottom" border="0" height="20" width="25"> species and for event <img src="gif/sPlot_img13.png" alt="$e$" align="bottom" border="0" height="17" width="13">, 
00090 </li>
00091 <li><img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">: the set of control variables which, by definition, do not appear in the expression of the Likelihood function <img src="gif/sPlot_img15.png" alt="${\cal L}$" align="bottom" border="0" height="18" width="18">.
00092 </li>
00093 </ul>
00094 The extended log-Likelihood reads:
00095 <br>
00096 <div align="right">
00097 
00098 <!-- MATH
00099  \begin{equation}
00100 {\cal L}=\sum_{e=1}^{N}\ln \Big\{ \sum_{i=1}^{{\rm N}_{\rm s}}N_i{\rm f}_i(y_e) \Big\} -\sum_{i=1}^{{\rm N}_{\rm s}}N_i ~.
00101 \end{equation}
00102  -->
00103 <table align="center" width="100%">
00104 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:eLik"></a><img src="gif/sPlot_img16.png" alt="\begin{displaymath}
00105 {\cal L}=\sum_{e=1}^{N}\ln \Big\{ \sum_{i=1}^{{\rm N}_{\rm s}}N_i{\rm f}_i(y_e) \Big\} -\sum_{i=1}^{{\rm N}_{\rm s}}N_i ~.
00106 \end{displaymath}" border="0" height="59" width="276"></td>
00107 <td align="right" width="10">
00108 (1)</td></tr>
00109 </tbody></table>
00110 <br clear="all"></div><p></p>
00111 From this expression, after maximization of <img src="gif/sPlot_img15.png" alt="${\cal L}$" align="bottom" border="0" height="18" width="18"> with respect to the <img src="gif/sPlot_img8.png" alt="$N_i$" align="middle" border="0" height="34" width="25"> parameters, a weight can be computed for every event and each species, in order to obtain later the true distribution <!-- MATH
00112  ${\hbox{\bf {M}}}_i(x)$
00113  -->
00114 <img src="gif/sPlot_img17.png" alt="${\hbox{\bf {M}}}_i(x)$" align="middle" border="0" height="37" width="56"> of variable <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">. If <img src="gif/sPlot_img18.png" alt="${\rm n}$" align="bottom" border="0" height="17" width="15"> is one of the <!-- MATH
00115  ${\rm N}_{\rm s}$
00116  -->
00117 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25"> species present in the data sample, the weight for this species is defined by:
00118 <br>
00119 <div align="right">
00120 
00121 <!-- MATH
00122  \begin{equation}
00123 \begin{Large}\fbox{$
00124 {_s{\cal P}}_{\rm n}(y_e)={\sum_{j=1}^{{\rm N}_{\rm s}} \hbox{\bf V}_{{\rm n}j}{\rm f}_j(y_e)\over\sum_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e) } $}\end{Large} ~,
00125 \end{equation}
00126  -->
00127 <table align="center" width="100%">
00128 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:weightxnotiny"></a><img src="gif/sPlot_img19.png" alt="\begin{displaymath}
00129 \begin{Large}
00130 \fbox{$
00131 {_s{\cal P}}_{\rm n}(y_e)={\sum_{j=1}^...
00132 ...um_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e) } $}\end{Large} ~,
00133 \end{displaymath}" border="0" height="76" width="279"></td>
00134 <td align="right" width="10">
00135 (2)</td></tr>
00136 </tbody></table>
00137 <br clear="all"></div><p></p>
00138 where <!-- MATH
00139  $\hbox{\bf V}_{{\rm n}j}$
00140  -->
00141 <img src="gif/sPlot_img20.png" alt="$\hbox{\bf V}_{{\rm n}j}$" align="middle" border="0" height="34" width="35">
00142 is the covariance matrix resulting from the Likelihood maximization.
00143 This matrix can be used directly from the fit, but this is numerically
00144 less accurate than the direct computation:
00145 <br>
00146 <div align="right">
00147 
00148 <!-- MATH
00149  \begin{equation}
00150 \hbox{\bf V}^{-1}_{{\rm n}j}~=~
00151 {\partial^2(-{\cal L})\over\partial N_{\rm n}\partial N_j}~=~
00152 \sum_{e=1}^N {{\rm f}_{\rm n}(y_e){\rm f}_j(y_e)\over(\sum_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e))^2} ~.
00153 \end{equation}
00154  -->
00155 <table align="center" width="100%">
00156 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:VarianceMatrixDirect"></a><img src="gif/sPlot_img21.png" alt="\begin{displaymath}
00157 \hbox{\bf V}^{-1}_{{\rm n}j}~=~
00158 {\partial^2(-{\cal L})\over\...
00159 ...y_e)\over(\sum_{k=1}^{{\rm N}_{\rm s}}N_k{\rm f}_k(y_e))^2} ~.
00160 \end{displaymath}" border="0" height="58" width="360"></td>
00161 <td align="right" width="10">
00162 (3)</td></tr>
00163 </tbody></table>
00164 <br clear="all"></div><p></p>
00165 The distribution of the control variable&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> obtained by histogramming the weighted events reproduces, on average, the true distribution <!-- MATH
00166  ${\hbox{\bf {M}}}_{\rm n}(x)$
00167  -->
00168 <img src="gif/sPlot_img22.png" alt="${\hbox{\bf {M}}}_{\rm n}(x)$" align="middle" border="0" height="37" width="59">.
00169 
00170 <p>
00171 The class TSPlot allows to reconstruct the true distribution <!-- MATH
00172  ${\hbox{\bf {M}}}_{\rm n}(x)$
00173  -->
00174 <img src="gif/sPlot_img22.png" alt="${\hbox{\bf {M}}}_{\rm n}(x)$" align="middle" border="0" height="37" width="59"> of a control variable&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> for each of the <!-- MATH
00175  ${\rm N}_{\rm s}$
00176  -->
00177 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25"> species from the sole knowledge of the PDFs of the discriminating variables <img src="gif/sPlot_img23.png" alt="${\rm f}_i(y)$" align="middle" border="0" height="37" width="40">. The plots obtained thanks to the TSPlot class are called <!-- MATH
00178  $\hbox{$_s$}{\cal P}lots$
00179  -->
00180 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">.
00181 
00182 </p><p>
00183 <b><font size="+2">Some properties and checks</font></b>
00184 
00185 </p><p>
00186 Beside reproducing the true distribution, <!-- MATH
00187  $\hbox{$_s$}{\cal P}lots$
00188  -->
00189 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> bear remarkable properties:
00190 
00191 </p><ul>
00192 <li>
00193 Each <img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">-distribution is properly normalized:
00194 <br>
00195 <div align="right">
00196 
00197 <!-- MATH
00198  \begin{equation}
00199 \sum_{e=1}^{N} {_s{\cal P}}_{\rm n}(y_e)~=~N_{\rm n}~.
00200 \end{equation}
00201  -->
00202 <table align="center" width="100%">
00203 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:NormalizationOK"></a><img src="gif/sPlot_img24.png" alt="\begin{displaymath}
00204 \sum_{e=1}^{N} {_s{\cal P}}_{\rm n}(y_e)~=~N_{\rm n}~.
00205 \end{displaymath}" border="0" height="58" width="158"></td>
00206 <td align="right" width="10">
00207 (4)</td></tr>
00208 </tbody></table>
00209 <br clear="all"></div><p></p>
00210 </li>
00211 <li>
00212 For any event:
00213 <br>
00214 <div align="right">
00215 
00216 <!-- MATH
00217  \begin{equation}
00218 \sum_{l=1}^{{\rm N}_{\rm s}} {_s{\cal P}}_l(y_e) ~=~1 ~.
00219 \end{equation}
00220  -->
00221 <table align="center" width="100%">
00222 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:numberconservation"></a><img src="gif/sPlot_img25.png" alt="\begin{displaymath}
00223 \sum_{l=1}^{{\rm N}_{\rm s}} {_s{\cal P}}_l(y_e) ~=~1 ~.
00224 \end{displaymath}" border="0" height="59" width="140"></td>
00225 <td align="right" width="10">
00226 (5)</td></tr>
00227 </tbody></table>
00228 <br clear="all"></div><p></p>
00229 That is to say that, summing up the <!-- MATH
00230  ${\rm N}_{\rm s}$
00231  -->
00232 <img src="gif/sPlot_img7.png" alt="${\rm N}_{\rm s}$" align="middle" border="0" height="34" width="25"> <!-- MATH
00233  $\hbox{$_s$}{\cal P}lots$
00234  -->
00235 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">, one recovers the data sample distribution in&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">, and summing up the number of events entering in a <!-- MATH
00236  $\hbox{$_s$}{\cal P}lot$
00237  -->
00238 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> for a given species, one recovers the yield of the species, as provided by the fit. The property&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:NormalizationOK">4</a> is implemented in the TSPlot class as a check.
00239 </li>
00240 <li>the sum of the statistical uncertainties per bin
00241 <br>
00242 <div align="right">
00243 
00244 <!-- MATH
00245  \begin{equation}
00246 \sigma[N_{\rm n}\  _s\tilde{\rm M}_{\rm n}(x) {\delta x}]~=~\sqrt{\sum_{e \subset {\delta x}} ({_s{\cal P}}_{\rm n})^2} ~.
00247 \end{equation}
00248  -->
00249 <table align="center" width="100%">
00250 <tbody><tr valign="middle"><td align="center" nowrap="nowrap"><a name="eq:ErrorPerBin"></a><img src="gif/sPlot_img26.png" alt="\begin{displaymath}
00251 \sigma[N_{\rm n}\ _s\tilde{\rm M}_{\rm n}(x) {\delta x}]~=~\sqrt{\sum_{e \subset {\delta x}} ({_s{\cal P}}_{\rm n})^2} ~.
00252 \end{displaymath}" border="0" height="55" width="276"></td>
00253 <td align="right" width="10">
00254 (6)</td></tr>
00255 </tbody></table>
00256 <br clear="all"></div><p></p>
00257 reproduces the statistical uncertainty on the yield <img src="gif/sPlot_img27.png" alt="$N_{\rm n}$" align="middle" border="0" height="34" width="28">, as provided by the fit: <!-- MATH
00258  $\sigma[N_{\rm n}]\equiv\sqrt{\hbox{\bf V}_{{\rm n}{\rm n}}}$
00259  -->
00260 <img src="gif/sPlot_img28.png" alt="$\sigma[N_{\rm n}]\equiv\sqrt{\hbox{\bf V}_{{\rm n}{\rm n}}}$" align="middle" border="0" height="40" width="123">.
00261 Because of that and since the determination of the yields is optimal
00262 when obtained using a Likelihood fit, one can conclude that the<!-- MATH
00263  $\hbox{$_s$}{\cal P}lot$
00264  -->
00265  <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> technique is itself an optimal method to reconstruct distributions of control variables.
00266 </li>
00267 </ul>
00268 
00269 <p>
00270 <b><font size="+2">Different steps followed by TSPlot</font></b>
00271 
00272 </p><p>
00273 
00274 </p><ol>
00275 <li>A maximum Likelihood fit is performed to obtain the yields <img src="gif/sPlot_img8.png" alt="$N_i$" align="middle" border="0" height="34" width="25"> of the various species. 
00276 The fit relies on discriminating variables&nbsp;<img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> uncorrelated with a control variable&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">:
00277 the later is therefore totally absent from the fit. 
00278 </li>
00279 <li>The weights <img src="gif/sPlot_img29.png" alt="${_s{\cal P}}$" align="middle" border="0" height="34" width="27"> are calculated using Eq.&nbsp;(<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:weightxnotiny">2</a>) where the covariance matrix is taken from Minuit.
00280 </li>
00281 <li>Histograms of&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> are filled by weighting the events with <img src="gif/sPlot_img29.png" alt="${_s{\cal P}}$" align="middle" border="0" height="34" width="27">. 
00282 </li>
00283 <li>Error bars per bin are given by Eq.&nbsp;(<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:ErrorPerBin">6</a>). 
00284 </li>
00285 </ol>
00286 The <!-- MATH
00287  $\hbox{$_s$}{\cal P}lots$
00288  -->
00289 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> reproduce the true distributions of the species in the control variable&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">, within the above defined statistical uncertainties.
00290 
00291 <p>
00292 <b><font size="+2">Illustrations</font></b>
00293 
00294 </p><p>
00295 To illustrate the technique, one considers an example derived from the analysis where <!-- MATH
00296  $\hbox{$_s$}{\cal P}lots$
00297  -->
00298 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">
00299 have been first used (charmless B decays). One is dealing with a data
00300 sample in which two species are present: the first is termed signal and
00301 the second background. A maximum Likelihood fit is performed to obtain
00302 the two yields <img src="gif/sPlot_img30.png" alt="$N_1$" align="middle" border="0" height="34" width="27"> and <img src="gif/sPlot_img31.png" alt="$N_2$" align="middle" border="0" height="34" width="27">. The fit relies on two discriminating variables collectively denoted&nbsp;<img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> which are chosen within three possible variables denoted <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39">, <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20">.
00303 The variable which is not incorporated in&nbsp;<img src="gif/sPlot_img11.png" alt="$y$" align="middle" border="0" height="33" width="15"> is used as the control variable&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15">. The six distributions of the three variables are assumed to be the ones depicted in Fig.&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfs">1</a>.
00304 
00305 </p><p>
00306 
00307 </p><div align="center"><a name="fig:pdfs"></a><a name="106"></a>
00308 <table>
00309 <caption align="bottom"><strong>Figure 1:</strong>
00310 Distributions of the three discriminating variables available to perform the Likelihood fit: 
00311 <img src="gif/sPlot_img32.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39">, <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35">, <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20">.
00312 Among the three variables, two are used to perform the fit while one is
00313 kept out of the fit to serve the purpose of a control variable. The
00314 three distributions on the top (resp. bottom) of the figure correspond
00315 to the signal (resp. background). The unit of the vertical axis is
00316 chosen such that it indicates the number of entries per bin, if one
00317 slices the histograms in 25 bins.</caption>
00318 <tbody><tr><td><img src="gif/sPlot_img33.png" alt="\begin{figure}\begin{center}
00319 \mbox{{\psfig{file=pdfmesNIM.eps,width=0.33\linewi...
00320 ...th}}
00321 {\psfig{file=pdffiNIM.eps,width=0.33\linewidth}}}
00322 \end{center}\end{figure}" border="0" height="162" width="544"></td></tr>
00323 </tbody></table>
00324 </div>
00325 
00326 <p>
00327 A data sample being built through a Monte Carlo simulation based on the distributions shown in Fig.&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfs">1</a>, one obtains the three distributions of Fig.&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfstot">2</a>. Whereas the distribution of&nbsp;<img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> clearly indicates the presence of the signal, the distribution of <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> are less obviously populated by signal.
00328 
00329 </p><p>
00330 
00331 </p><div align="center"><a name="fig:pdfstot"></a><a name="169"></a>
00332 <table>
00333 <caption align="bottom"><strong>Figure 2:</strong>
00334 Distributions of the three discriminating variables for signal plus
00335 background. The three distributions are the ones obtained from a data
00336 sample obtained through a Monte Carlo simulation based on the
00337 distributions shown in Fig.&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:pdfs">1</a>.  The data sample consists of 500 signal events and 5000 background events.</caption>
00338 <tbody><tr><td><img src="gif/sPlot_img34.png" alt="\begin{figure}\begin{center}
00339 \mbox{{\psfig{file=genmesTOTNIM.eps,width=0.33\lin...
00340 ...}
00341 {\psfig{file=genfiTOTNIM.eps,width=0.33\linewidth}}}
00342 \end{center}\end{figure}" border="0" height="160" width="545"></td></tr>
00343 </tbody></table>
00344 </div>
00345 
00346 <p>
00347 Chosing <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> as discriminating variables to determine <img src="gif/sPlot_img30.png" alt="$N_1$" align="middle" border="0" height="34" width="27"> and <img src="gif/sPlot_img31.png" alt="$N_2$" align="middle" border="0" height="34" width="27"> through a maximum Likelihood fit, one builds, for the control variable <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> which is unknown to the fit, the two <!-- MATH
00348  $\hbox{$_s$}{\cal P}lots$
00349  -->
00350 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> for signal and background shown in Fig.&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:messPlots">3</a>. One observes that the <!-- MATH
00351  $\hbox{$_s$}{\cal P}lot$
00352  -->
00353 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48">
00354 for signal reproduces correctly the PDF even where the latter vanishes,
00355 although the error bars remain sizeable. This results from the almost
00356 complete cancellation between positive and negative weights: the sum of
00357 weights is close to zero while the sum of weights squared is not. The
00358 occurence of negative weights occurs through the appearance of the
00359 covariance matrix, and its negative components, in the definition of
00360 Eq.&nbsp;(<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#eq:weightxnotiny">2</a>).
00361 
00362 </p><p>
00363 A word of caution is in order with respect to the error bars. Whereas
00364 their sum in quadrature is identical to the statistical uncertainties
00365 of the yields determined by the fit, and if, in addition, they are
00366 asymptotically correct, the error bars should be handled with care for
00367 low statistics and/or for too fine binning. This is because the error
00368 bars do not incorporate two known properties of the PDFs: PDFs are
00369 positive definite and can be non-zero in a given x-bin, even if in the
00370 particular data sample at hand, no event is observed in this bin. The
00371 latter limitation is not specific to<!-- MATH
00372  $\hbox{$_s$}{\cal P}lots$
00373  -->
00374  <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57">,
00375 rather it is always present when one is willing to infer the PDF at the
00376 origin of an histogram, when, for some bins, the number of entries does
00377 not guaranty the applicability of the Gaussian regime. In such
00378 situations, a satisfactory practice is to attach allowed ranges to the
00379 histogram to indicate the upper and lower limits of the PDF value which
00380 are consistent with the actual observation, at a given confidence
00381 level.
00382 </p><p>
00383 
00384 </p><div align="center"><a name="fig:messPlots"></a><a name="127"></a>
00385 <table>
00386 <caption align="bottom"><strong>Figure 3:</strong>
00387 The <!-- MATH
00388  $\hbox{$_s$}{\cal P}lots$
00389  -->
00390 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> (signal on the left, background on the right) obtained for <img src="gif/sPlot_img32.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> are represented as dots with error bars. They are obtained from a fit using only information from <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> and <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20">.</caption>
00391 <tbody><tr><td><img src="gif/sPlot_img35.png" alt="\begin{figure}\begin{center}
00392 \mbox{\psfig{file=mass-sig-sPlot.eps,width=0.48\li...
00393 ... \psfig{file=mass-bkg-sPlot.eps,width=0.48\linewidth}}
00394 \end{center}\end{figure}" border="0" height="181" width="539"></td></tr>
00395 </tbody></table>
00396 </div>
00397 
00398 <p>
00399 Chosing <img src="gif/sPlot_img1.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> and <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35"> as discriminating variables to determine <img src="gif/sPlot_img30.png" alt="$N_1$" align="middle" border="0" height="34" width="27"> and <img src="gif/sPlot_img31.png" alt="$N_2$" align="middle" border="0" height="34" width="27"> through a maximum Likelihood fit, one builds, for the control variable <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> which is unknown to the fit, the two <!-- MATH
00400  $\hbox{$_s$}{\cal P}lots$
00401  -->
00402 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> for signal and background shown in Fig.&nbsp;<a href="http://www.slac.stanford.edu/%7Epivk/sPlot/sPlot_ROOT/sPlot_ROOT.html#fig:FisPlots">4</a>. In the <!-- MATH
00403  $\hbox{$_s$}{\cal P}lot$
00404  -->
00405 <img src="gif/sPlot_img5.png" alt="$\hbox{$_s$}{\cal P}lot$" align="middle" border="0" height="34" width="48"> for signal one observes that error bars are the largest in the&nbsp;<img src="gif/sPlot_img14.png" alt="$x$" align="bottom" border="0" height="17" width="15"> regions where the background is the largest.
00406 
00407 </p><p>
00408 
00409 </p><div align="center"><a name="fig:FisPlots"></a><a name="136"></a>
00410 <table>
00411 <caption align="bottom"><strong>Figure 4:</strong>
00412 The <!-- MATH
00413  $\hbox{$_s$}{\cal P}lots$
00414  -->
00415 <img src="gif/sPlot_img4.png" alt="$\hbox {$_s$}{\cal P}lots$" align="middle" border="0" height="34" width="57"> (signal on the left, background on the right) obtained for <img src="gif/sPlot_img3.png" alt="${\cal F}$" align="bottom" border="0" height="18" width="20"> are represented as dots with error bars. They are obtained from a fit using only information from <img src="gif/sPlot_img32.png" alt="${m_{\rm ES}}$" align="middle" border="0" height="33" width="39"> and <img src="gif/sPlot_img2.png" alt="$\Delta E$" align="bottom" border="0" height="17" width="35">.</caption>
00416 <tbody><tr><td><img src="gif/sPlot_img36.png" alt="\begin{figure}\begin{center}
00417 \mbox{\psfig{file=fisher-sig-sPlot.eps,width=0.48\...
00418 ...psfig{file=fisher-bkg-sPlot.eps,width=0.48\linewidth}}
00419 \end{center}\end{figure}" border="0" height="180" width="539"></td></tr>
00420 </tbody></table>
00421 </div>
00422 
00423 <p>
00424 The results above can be obtained by running the tutorial TestSPlot.C
00425 </p>
00426 <!--*/
00427 //-->End_Html
00428 
00429 
00430 //____________________________________________________________________
00431 TSPlot::TSPlot() : 
00432  fTree(0),
00433  fTreename(0),
00434  fVarexp(0),
00435  fSelection(0)
00436 {
00437    // default constructor (used by I/O only)
00438    fNx = 0;
00439    fNy=0;
00440    fNevents = 0;
00441    fNSpecies=0;
00442    fNumbersOfEvents=0;
00443 }
00444 
00445 //____________________________________________________________________
00446 TSPlot::TSPlot(Int_t nx, Int_t ny, Int_t ne, Int_t ns, TTree *tree) : 
00447  fTreename(0),
00448  fVarexp(0),
00449  fSelection(0)
00450 
00451 {
00452    //normal TSPlot constructor
00453    // nx :  number of control variables
00454    // ny :  number of discriminating variables
00455    // ne :  total number of events
00456    // ns :  number of species
00457    // tree: input data
00458    
00459    fNx = nx;
00460    fNy=ny;
00461    fNevents = ne;
00462    fNSpecies=ns;
00463 
00464    fXvar.ResizeTo(fNevents, fNx);
00465    fYvar.ResizeTo(fNevents, fNy);
00466    fYpdf.ResizeTo(fNevents, fNSpecies*fNy);
00467    fSWeights.ResizeTo(fNevents, fNSpecies*(fNy+1));
00468    fTree = tree;
00469    fNumbersOfEvents = 0;
00470 }
00471 
00472 //____________________________________________________________________
00473 TSPlot::~TSPlot()
00474 {
00475    // destructor
00476    
00477    if (fNumbersOfEvents)
00478       delete [] fNumbersOfEvents;
00479    if (!fXvarHists.IsEmpty())
00480       fXvarHists.Delete();
00481    if (!fYvarHists.IsEmpty())
00482       fYvarHists.Delete();
00483    if (!fYpdfHists.IsEmpty())
00484       fYpdfHists.Delete();
00485 }
00486 
00487 //____________________________________________________________________
00488 void TSPlot::Browse(TBrowser *b)
00489 {
00490    //To browse the histograms
00491 
00492    if (!fSWeightsHists.IsEmpty()) {
00493       TIter next(&fSWeightsHists);
00494       TH1D* h = 0;
00495       while ((h = (TH1D*)next()))
00496          b->Add(h,h->GetName());
00497    }
00498 
00499    if (!fYpdfHists.IsEmpty()) {
00500       TIter next(&fYpdfHists);
00501       TH1D* h = 0;
00502       while ((h = (TH1D*)next()))
00503          b->Add(h,h->GetName());
00504    }
00505    if (!fYvarHists.IsEmpty()) {
00506       TIter next(&fYvarHists);
00507       TH1D* h = 0;
00508       while ((h = (TH1D*)next()))
00509          b->Add(h,h->GetName());
00510    }
00511    if (!fXvarHists.IsEmpty()) {
00512       TIter next(&fXvarHists);
00513       TH1D* h = 0;
00514       while ((h = (TH1D*)next()))
00515          b->Add(h,h->GetName());
00516    }
00517    b->Add(&fSWeights, "sWeights");
00518 }
00519 
00520 
00521 //____________________________________________________________________
00522 void TSPlot::SetInitialNumbersOfSpecies(Int_t *numbers)
00523 {
00524 //Set the initial number of events of each species - used
00525 //as initial estimates in minuit
00526 
00527    if (!fNumbersOfEvents)
00528       fNumbersOfEvents = new Double_t[fNSpecies];
00529    for (Int_t i=0; i<fNSpecies; i++)
00530       fNumbersOfEvents[i]=numbers[i];
00531 }
00532 
00533 //____________________________________________________________________
00534 void TSPlot::MakeSPlot(Option_t *option)
00535 {
00536 //Calculates the sWeights
00537 //The option controls the print level
00538 //"Q" - no print out
00539 //"V" - prints the estimated #of events in species - default
00540 //"VV" - as "V" + the minuit printing + sums of weights for control
00541 
00542 
00543    if (!fNumbersOfEvents){
00544       Error("MakeSPlot","Initial numbers of events in species have not been set");
00545       return;
00546    }
00547    Int_t i, j, ispecies;
00548 
00549    TString opt = option;
00550    opt.ToUpper();
00551    opt.ReplaceAll("VV", "W");
00552 
00553    //make sure that global fitter is minuit
00554    char s[]="TFitter";
00555    if (TVirtualFitter::GetFitter()){
00556       Int_t strdiff=strcmp(TVirtualFitter::GetFitter()->IsA()->GetName(), s);
00557       if (strdiff!=0)
00558          delete TVirtualFitter::GetFitter();
00559    }
00560 
00561 
00562    TVirtualFitter *minuit = TVirtualFitter::Fitter(0, 2);
00563    fPdfTot.ResizeTo(fNevents, fNSpecies);
00564 
00565    //now let's do it, excluding different yvars
00566    //for iplot = -1 none is excluded
00567    for (Int_t iplot=-1; iplot<fNy; iplot++){
00568       for (i=0; i<fNevents; i++){
00569          for (ispecies=0; ispecies<fNSpecies; ispecies++){
00570             fPdfTot(i, ispecies)=1;
00571             for (j=0; j<fNy; j++){
00572                if (j!=iplot)
00573                   fPdfTot(i, ispecies)*=fYpdf(i, ispecies*fNy+j);
00574             }
00575          }
00576       }
00577       minuit->Clear();
00578       minuit->SetFCN(Yields);
00579       Double_t arglist[10];
00580       //set the print level
00581       if (opt.Contains("Q")||opt.Contains("V")){
00582          arglist[0]=-1;
00583       }
00584       if (opt.Contains("W"))
00585          arglist[0]=0;
00586       minuit->ExecuteCommand("SET PRINT", arglist, 1);
00587 
00588       minuit->SetObjectFit(&fPdfTot); //a tricky way to get fPdfTot matrix to fcn
00589       for (ispecies=0; ispecies<fNSpecies; ispecies++)
00590          minuit->SetParameter(ispecies, "", fNumbersOfEvents[ispecies], 1, 0, 0);
00591 
00592       minuit->ExecuteCommand("MIGRAD", arglist, 0);
00593       for (ispecies=0; ispecies<fNSpecies; ispecies++){
00594          fNumbersOfEvents[ispecies]=minuit->GetParameter(ispecies);
00595          if (!opt.Contains("Q"))
00596             printf("estimated #of events in species %d = %f\n", ispecies, fNumbersOfEvents[ispecies]);
00597       }
00598       if (!opt.Contains("Q"))
00599          printf("\n");
00600       Double_t *covmat = minuit->GetCovarianceMatrix();
00601       SPlots(covmat, iplot);
00602 
00603       if (opt.Contains("W")){
00604          Double_t *sumweight = new Double_t[fNSpecies];
00605          for (i=0; i<fNSpecies; i++){
00606             sumweight[i]=0;
00607             for (j=0; j<fNevents; j++)
00608                sumweight[i]+=fSWeights(j, (iplot+1)*fNSpecies + i);
00609             printf("checking sum of weights[%d]=%f\n", i, sumweight[i]);
00610          }
00611          printf("\n");
00612          delete [] sumweight;
00613       }
00614    }
00615 }
00616 
00617 //____________________________________________________________________
00618 void TSPlot::SPlots(Double_t *covmat, Int_t i_excl)
00619 {
00620 //Computes the sWeights from the covariance matrix
00621 
00622    Int_t i, ispecies, k;
00623    Double_t numerator, denominator;
00624    for (i=0; i<fNevents; i++){
00625       denominator=0;
00626       for (ispecies=0; ispecies<fNSpecies; ispecies++)
00627          denominator+=fNumbersOfEvents[ispecies]*fPdfTot(i, ispecies);
00628       for (ispecies=0; ispecies<fNSpecies; ispecies++){
00629          numerator=0;
00630          for (k=0; k<fNSpecies; k++)
00631             numerator+=covmat[ispecies*fNSpecies+k]*fPdfTot(i, k);
00632          fSWeights(i, (i_excl+1)*fNSpecies + ispecies)=numerator/denominator;
00633       }
00634    }
00635 
00636 }
00637 
00638 //____________________________________________________________________
00639 void TSPlot::GetSWeights(TMatrixD &weights)
00640 {
00641 //Returns the matrix of sweights
00642 
00643    if (weights.GetNcols()!=fNSpecies*(fNy+1) || weights.GetNrows()!=fNevents)
00644       weights.ResizeTo(fNevents, fNSpecies*(fNy+1));
00645    weights = fSWeights;
00646 }
00647 
00648 //____________________________________________________________________
00649 void TSPlot::GetSWeights(Double_t *weights)
00650 {
00651 //Returns the matrix of sweights. It is assumed that the array passed in the argurment is big enough
00652 
00653    for (Int_t i=0; i<fNevents; i++){
00654       for (Int_t j=0; j<fNSpecies; j++){
00655          weights[i*fNSpecies+j]=fSWeights(i, j);
00656       }
00657    }
00658 }
00659 
00660 //____________________________________________________________________
00661 void TSPlot::FillXvarHists(Int_t nbins)
00662 {
00663 //Fills the histograms of x variables (not weighted) with nbins
00664 
00665    Int_t i, j;
00666 
00667    if (!fXvarHists.IsEmpty()){
00668       if (((TH1D*)fXvarHists.First())->GetNbinsX()!=nbins)
00669          fXvarHists.Delete();
00670       else
00671          return;
00672    }
00673 
00674    //make the histograms
00675    char name[10];
00676    for (i=0; i<fNx; i++){
00677       snprintf(name,10, "x%d", i);
00678       TH1D *h = new TH1D(name, name, nbins, fMinmax(0, i), fMinmax(1, i));
00679       for (j=0; j<fNevents; j++)
00680          h->Fill(fXvar(j, i));
00681       fXvarHists.Add(h);
00682    }
00683                     
00684 }
00685 
00686 //____________________________________________________________________
00687 TObjArray* TSPlot::GetXvarHists()
00688 {
00689 //Returns the array of histograms of x variables (not weighted)
00690 //If histograms have not already
00691 //been filled, they are filled with default binning 100.
00692 
00693    Int_t nbins = 100;
00694    if (fXvarHists.IsEmpty())
00695       FillXvarHists(nbins);
00696    else if (((TH1D*)fXvarHists.First())->GetNbinsX()!=nbins)
00697       FillXvarHists(nbins);
00698    return &fXvarHists;
00699 }
00700 
00701 //____________________________________________________________________
00702 TH1D *TSPlot::GetXvarHist(Int_t ixvar)
00703 {
00704 //Returns the histogram of variable #ixvar
00705 //If histograms have not already
00706 //been filled, they are filled with default binning 100.
00707 
00708    Int_t nbins = 100;
00709    if (fXvarHists.IsEmpty())
00710       FillXvarHists(nbins);
00711    else if (((TH1D*)fXvarHists.First())->GetNbinsX()!=nbins)
00712       FillXvarHists(nbins);
00713 
00714    return (TH1D*)fXvarHists.UncheckedAt(ixvar);
00715 }
00716 
00717 //____________________________________________________________________
00718 void TSPlot::FillYvarHists(Int_t nbins)
00719 {
00720 //Fill the histograms of y variables
00721 
00722    Int_t i, j;
00723 
00724    if (!fYvarHists.IsEmpty()){
00725       if (((TH1D*)fYvarHists.First())->GetNbinsX()!=nbins)
00726          fYvarHists.Delete();
00727       else
00728          return;
00729    }
00730 
00731    //make the histograms
00732    char name[10];
00733    for (i=0; i<fNy; i++){
00734       snprintf(name,10, "y%d", i);
00735       TH1D *h=new TH1D(name, name, nbins, fMinmax(0, fNx+i), fMinmax(1, fNx+i));
00736       for (j=0; j<fNevents; j++)
00737          h->Fill(fYvar(j, i));
00738       fYvarHists.Add(h);
00739    }
00740 }
00741 
00742 //____________________________________________________________________
00743 TObjArray* TSPlot::GetYvarHists()
00744 {
00745 //Returns the array of histograms of y variables. If histograms have not already
00746 //been filled, they are filled with default binning 100.
00747 
00748    Int_t nbins = 100;
00749    if (fYvarHists.IsEmpty())
00750       FillYvarHists(nbins);
00751    else if (((TH1D*)fYvarHists.First())->GetNbinsX()!=nbins)
00752       FillYvarHists(nbins);
00753    return &fYvarHists;
00754 }
00755 
00756 //____________________________________________________________________
00757 TH1D *TSPlot::GetYvarHist(Int_t iyvar)
00758 {
00759 //Returns the histogram of variable iyvar.If histograms have not already
00760 //been filled, they are filled with default binning 100.
00761  
00762    Int_t nbins = 100;
00763    if (fYvarHists.IsEmpty())
00764       FillYvarHists(nbins);
00765    else if (((TH1D*)fYvarHists.First())->GetNbinsX()!=nbins)
00766       FillYvarHists(nbins);
00767    return (TH1D*)fYvarHists.UncheckedAt(iyvar);
00768 }
00769 
00770 //____________________________________________________________________
00771 void TSPlot::FillYpdfHists(Int_t nbins)
00772 {
00773 //Fills the histograms of pdf-s of y variables with binning nbins
00774 
00775    Int_t i, j, ispecies;
00776 
00777    if (!fYpdfHists.IsEmpty()){
00778       if (((TH1D*)fYpdfHists.First())->GetNbinsX()!=nbins)
00779          fYpdfHists.Delete();
00780       else
00781          return;
00782    }
00783 
00784    char name[30];
00785    for (ispecies=0; ispecies<fNSpecies; ispecies++){
00786       for (i=0; i<fNy; i++){
00787          snprintf(name,30, "pdf_species%d_y%d", ispecies, i);
00788          //TH1D *h = new TH1D(name, name, nbins, ypdfmin[ispecies*fNy+i], ypdfmax[ispecies*fNy+i]);
00789          TH1D *h = new TH1D(name, name, nbins, fMinmax(0, fNx+fNy+ispecies*fNy+i), fMinmax(1, fNx+fNy+ispecies*fNy+i)); 
00790          for (j=0; j<fNevents; j++)
00791             h->Fill(fYpdf(j, ispecies*fNy+i));
00792          fYpdfHists.Add(h);
00793       }
00794    }
00795 }
00796 
00797 //____________________________________________________________________
00798 TObjArray* TSPlot::GetYpdfHists()
00799 {
00800 //Returns the array of histograms of pdf's of y variables with binning nbins
00801 //If histograms have not already
00802 //been filled, they are filled with default binning 100.
00803 
00804    Int_t nbins = 100;
00805    if (fYpdfHists.IsEmpty())
00806       FillYpdfHists(nbins);
00807 
00808    return &fYpdfHists;
00809 }
00810 
00811 //____________________________________________________________________
00812 TH1D *TSPlot::GetYpdfHist(Int_t iyvar, Int_t ispecies)
00813 {
00814 //Returns the histogram of the pdf of variable #iyvar for species #ispecies, binning nbins
00815 //If histograms have not already
00816 //been filled, they are filled with default binning 100.
00817 
00818    Int_t nbins = 100;
00819    if (fYpdfHists.IsEmpty())
00820       FillYpdfHists(nbins);
00821 
00822    return (TH1D*)fYpdfHists.UncheckedAt(fNy*ispecies+iyvar);
00823 }
00824 
00825 //____________________________________________________________________
00826 void TSPlot::FillSWeightsHists(Int_t nbins)
00827 {
00828    //The order of histograms in the array:
00829    //x0_species0, x0_species1,..., x1_species0, x1_species1,..., y0_species0, y0_species1,...
00830    //If the histograms have already been filled with a different binning, they are refilled
00831    //and all histograms are deleted
00832 
00833    if (fSWeights.GetNoElements()==0){
00834       Error("GetSWeightsHists", "SWeights were not computed");
00835       return;
00836    }
00837 
00838    if (!fSWeightsHists.IsEmpty()){
00839       if (((TH1D*)fSWeightsHists.First())->GetNbinsX()!=nbins)
00840          fSWeightsHists.Delete();
00841       else
00842          return;
00843    }
00844 
00845    char name[30];
00846 
00847    //Fill histograms of x-variables weighted with sWeights
00848    for (Int_t ivar=0; ivar<fNx; ivar++){
00849       for (Int_t ispecies=0; ispecies<fNSpecies; ispecies++){
00850             snprintf(name,30, "x%d_species%d", ivar, ispecies);
00851             TH1D *h = new TH1D(name, name, nbins, fMinmax(0, ivar), fMinmax(1, ivar));
00852             h->Sumw2();
00853             for (Int_t ievent=0; ievent<fNevents; ievent++)
00854                h->Fill(fXvar(ievent, ivar), fSWeights(ievent, ispecies));
00855             fSWeightsHists.AddLast(h); 
00856          }
00857    }
00858 
00859    //Fill histograms of y-variables (exluded from the fit), weighted with sWeights
00860    for (Int_t iexcl=0; iexcl<fNy; iexcl++){
00861       for(Int_t ispecies=0; ispecies<fNSpecies; ispecies++){
00862             snprintf(name,30, "y%d_species%d", iexcl, ispecies);
00863             TH1D *h = new TH1D(name, name, nbins, fMinmax(0, fNx+iexcl), fMinmax(1, fNx+iexcl));
00864             h->Sumw2();
00865             for (Int_t ievent=0; ievent<fNevents; ievent++)
00866                h->Fill(fYvar(ievent, iexcl), fSWeights(ievent, fNSpecies*(iexcl+1)+ispecies));
00867             fSWeightsHists.AddLast(h);
00868       }
00869    }
00870 }
00871 
00872 //____________________________________________________________________
00873 TObjArray *TSPlot::GetSWeightsHists()
00874 {
00875    //Returns an array of all histograms of variables, weighted with sWeights
00876    //If histograms have not been already filled, they are filled with default binning 50
00877    //The order of histograms in the array:
00878    //x0_species0, x0_species1,..., x1_species0, x1_species1,..., y0_species0, y0_species1,...
00879 
00880    Int_t nbins = 50; //default binning
00881    if (fSWeightsHists.IsEmpty())
00882       FillSWeightsHists(nbins);
00883 
00884    return &fSWeightsHists;
00885 }
00886 
00887 //____________________________________________________________________
00888 void TSPlot::RefillHist(Int_t type, Int_t nvar, Int_t nbins, Double_t min, Double_t max, Int_t nspecies)
00889 {
00890    //The Fill...Hist() methods fill the histograms with the real limits on the variables
00891    //This method allows to refill the specified histogram with user-set boundaries min and max
00892    //Parameters:
00893    //type = 1 - histogram of x variable #nvar
00894    //     = 2 - histogram of y variable #nvar
00895    //     = 3 - histogram of y_pdf for y #nvar and species #nspecies
00896    //     = 4 - histogram of x variable #nvar, species #nspecies, WITH sWeights
00897    //     = 5 - histogram of y variable #nvar, species #nspecies, WITH sWeights
00898 
00899    if (type<1 || type>5){
00900       Error("RefillHist", "type must lie between 1 and 5");
00901       return;
00902    }
00903    char name[20];
00904    Int_t j;
00905    TH1D *hremove;
00906    if (type==1){
00907       hremove = (TH1D*)fXvarHists.RemoveAt(nvar);
00908       delete hremove;
00909       snprintf(name,20,"x%d",nvar);
00910       TH1D *h = new TH1D(name, name, nbins, min, max);
00911       for (j=0; j<fNevents;j++)
00912          h->Fill(fXvar(j, nvar));
00913       fXvarHists.AddAt(h, nvar);
00914    }
00915    if (type==2){
00916       hremove = (TH1D*)fYvarHists.RemoveAt(nvar);
00917       delete hremove;
00918       snprintf(name,20, "y%d", nvar);
00919       TH1D *h = new TH1D(name, name, nbins, min, max);
00920       for (j=0; j<fNevents;j++)
00921          h->Fill(fYvar(j, nvar));
00922       fXvarHists.AddAt(h, nvar); 
00923    }
00924    if (type==3){
00925       hremove = (TH1D*)fYpdfHists.RemoveAt(nspecies*fNy+nvar);
00926       delete hremove;
00927       snprintf(name,20, "pdf_species%d_y%d", nspecies, nvar);
00928       TH1D *h=new TH1D(name, name, nbins, min, max);
00929       for (j=0; j<fNevents; j++)
00930          h->Fill(fYpdf(j, nspecies*fNy+nvar));
00931       fYpdfHists.AddAt(h, nspecies*fNy+nvar);
00932    }
00933    if (type==4){
00934       hremove = (TH1D*)fSWeightsHists.RemoveAt(fNSpecies*nvar+nspecies);
00935       delete hremove;
00936       snprintf(name,20, "x%d_species%d", nvar, nspecies);
00937       TH1D *h = new TH1D(name, name, nbins, min, max);
00938       h->Sumw2();
00939       for (Int_t ievent=0; ievent<fNevents; ievent++)
00940          h->Fill(fXvar(ievent, nvar), fSWeights(ievent, nspecies));
00941       fSWeightsHists.AddAt(h, fNSpecies*nvar+nspecies);
00942    }
00943    if (type==5){
00944       hremove = (TH1D*)fSWeightsHists.RemoveAt(fNx*fNSpecies + fNSpecies*nvar+nspecies);
00945       delete hremove;
00946       snprintf(name,20, "y%d_species%d", nvar, nspecies);
00947       TH1D *h = new TH1D(name, name, nbins, min, max);
00948       h->Sumw2();
00949       for (Int_t ievent=0; ievent<fNevents; ievent++)
00950          h->Fill(fYvar(ievent, nvar), fSWeights(ievent, nspecies));
00951       fSWeightsHists.AddAt(h, fNx*fNSpecies + fNSpecies*nvar+nspecies); 
00952    }  
00953 }
00954 //____________________________________________________________________
00955 TH1D *TSPlot::GetSWeightsHist(Int_t ixvar, Int_t ispecies,Int_t iyexcl)
00956 {
00957    //Returns the histogram of a variable, weithed with sWeights
00958    //If histograms have not been already filled, they are filled with default binning 50
00959    //If parameter ixvar!=-1, the histogram of x-variable #ixvar is returned for species ispecies
00960    //If parameter ixvar==-1, the histogram of y-variable #iyexcl is returned for species ispecies
00961    //If the histogram has already been filled and the binning is different from the parameter nbins
00962    //all histograms with old binning will be deleted and refilled.
00963 
00964 
00965    Int_t nbins = 50; //default binning
00966    if (fSWeightsHists.IsEmpty())
00967       FillSWeightsHists(nbins);
00968 
00969    if (ixvar==-1)
00970       return (TH1D*)fSWeightsHists.UncheckedAt(fNx*fNSpecies + fNSpecies*iyexcl+ispecies);
00971    else
00972       return (TH1D*)fSWeightsHists.UncheckedAt(fNSpecies*ixvar + ispecies);
00973 
00974 }
00975 
00976 
00977 //____________________________________________________________________
00978 void TSPlot::SetTree(TTree *tree)
00979 {
00980    // Set the input Tree
00981    fTree = tree;
00982 }
00983 
00984 //____________________________________________________________________
00985 void TSPlot::SetTreeSelection(const char* varexp, const char *selection, Long64_t firstentry)
00986 {
00987    //Specifies the variables from the tree to be used for splot
00988    //
00989    //Variables fNx, fNy, fNSpecies and fNEvents should already be set!
00990    //
00991    //In the 1st parameter it is assumed that first fNx variables are x(control variables), 
00992    //then fNy y variables (discriminating variables), 
00993    //then fNy*fNSpecies ypdf variables (probability distribution functions of dicriminating 
00994    //variables for different species). The order of pdfs should be: species0_y0, species0_y1,...
00995    //species1_y0, species1_y1,...species[fNSpecies-1]_y0...
00996    //The 2nd parameter allows to make a cut
00997    //TTree::Draw method description contains more details on specifying expression and selection 
00998 
00999    TTreeFormula **var;
01000    std::vector<TString> cnames;
01001    TList *formulaList = new TList();
01002    TSelectorDraw *selector = (TSelectorDraw*)(((TTreePlayer*)fTree->GetPlayer())->GetSelector());
01003 
01004    Long64_t entry,entryNumber, curentry;
01005    Int_t i,nch;
01006    Int_t ncols;
01007    TObjArray *leaves = fTree->GetListOfLeaves();
01008 
01009    fTreename= new TString(fTree->GetName());
01010    if (varexp)
01011       fVarexp = new TString(varexp);
01012    if (selection)
01013       fSelection= new TString(selection);
01014 
01015    nch = varexp ? strlen(varexp) : 0;
01016 
01017 
01018 //*-*- Compile selection expression if there is one
01019    TTreeFormula *select = 0;
01020    if (selection && strlen(selection)) {
01021       select = new TTreeFormula("Selection",selection,fTree);
01022       if (!select) return;
01023       if (!select->GetNdim()) { delete select; return; }
01024       formulaList->Add(select);
01025    }
01026 //*-*- if varexp is empty, take first nx + ny + ny*nspecies columns by default
01027 
01028    if (nch == 0) {
01029       ncols = fNx + fNy + fNy*fNSpecies;
01030       for (i=0;i<ncols;i++) {
01031          cnames.push_back( leaves->At(i)->GetName() );
01032       }
01033 //*-*- otherwise select only the specified columns
01034    } else {
01035       ncols = selector->SplitNames(varexp,cnames);
01036    }
01037    var = new TTreeFormula* [ncols];
01038    Double_t *xvars = new Double_t[ncols];
01039 
01040    fMinmax.ResizeTo(2, ncols);
01041    for (i=0; i<ncols; i++){
01042       fMinmax(0, i)=1e30; 
01043       fMinmax(1, i)=-1e30;
01044    }
01045 
01046 //*-*- Create the TreeFormula objects corresponding to each column
01047    for (i=0;i<ncols;i++) {
01048       var[i] = new TTreeFormula("Var1",cnames[i].Data(),fTree);
01049       formulaList->Add(var[i]);
01050    }
01051 
01052 //*-*- Create a TreeFormulaManager to coordinate the formulas
01053    TTreeFormulaManager *manager=0;
01054    if (formulaList->LastIndex()>=0) {
01055       manager = new TTreeFormulaManager;
01056       for(i=0;i<=formulaList->LastIndex();i++) {
01057          manager->Add((TTreeFormula*)formulaList->At(i));
01058       }
01059       manager->Sync();
01060    }
01061 //*-*- loop on all selected entries
01062    // fSelectedRows = 0;
01063    Int_t tnumber = -1;
01064    Long64_t selectedrows=0;
01065    for (entry=firstentry;entry<firstentry+fNevents;entry++) {
01066       entryNumber = fTree->GetEntryNumber(entry);
01067       if (entryNumber < 0) break;
01068       Long64_t localEntry = fTree->LoadTree(entryNumber);
01069       if (localEntry < 0) break;
01070       if (tnumber != fTree->GetTreeNumber()) {
01071          tnumber = fTree->GetTreeNumber();
01072          if (manager) manager->UpdateFormulaLeaves();
01073       }
01074       Int_t ndata = 1;
01075       if (manager && manager->GetMultiplicity()) {
01076          ndata = manager->GetNdata();
01077       }
01078 
01079       for(Int_t inst=0;inst<ndata;inst++) {
01080          Bool_t loaded = kFALSE;
01081          if (select) {
01082             if (select->EvalInstance(inst) == 0) {
01083                continue;
01084             }
01085          }
01086 
01087          if (inst==0) loaded = kTRUE;
01088          else if (!loaded) {
01089             // EvalInstance(0) always needs to be called so that
01090             // the proper branches are loaded.
01091             for (i=0;i<ncols;i++) {
01092                var[i]->EvalInstance(0);
01093             }
01094             loaded = kTRUE;
01095          }
01096 
01097          for (i=0;i<ncols;i++) {
01098             xvars[i] = var[i]->EvalInstance(inst);
01099          }
01100          
01101          curentry = entry-firstentry;
01102          //printf("event#%d\n", curentry);
01103          //for (i=0; i<ncols; i++)
01104           //  printf("xvars[%d]=%f\n", i, xvars[i]);
01105          //selectedrows++;
01106          for (i=0; i<fNx; i++){
01107             fXvar(selectedrows, i) = xvars[i];
01108             if (fXvar(selectedrows, i) < fMinmax(0, i))
01109                fMinmax(0, i)=fXvar(selectedrows, i);
01110             if (fXvar(selectedrows, i) > fMinmax(1, i))
01111                fMinmax(1, i)=fXvar(selectedrows, i);
01112          }
01113          for (i=0; i<fNy; i++){
01114             fYvar(selectedrows, i) = xvars[i+fNx];
01115             //printf("y_in_loop(%d, %d)=%f, xvars[%d]=%f\n", selectedrows, i, fYvar(selectedrows, i), i+fNx, xvars[i+fNx]);
01116             if (fYvar(selectedrows, i) < fMinmax(0, i+fNx))
01117                fMinmax(0, i+fNx) = fYvar(selectedrows, i);
01118             if (fYvar(selectedrows, i) > fMinmax(1, i+fNx))
01119                fMinmax(1, i+fNx) = fYvar(selectedrows, i);
01120             for (Int_t j=0; j<fNSpecies; j++){
01121                fYpdf(selectedrows, j*fNy + i)=xvars[j*fNy + i+fNx+fNy];
01122                if (fYpdf(selectedrows, j*fNy+i) < fMinmax(0, j*fNy+i+fNx+fNy))
01123                   fMinmax(0, j*fNy+i+fNx+fNy) = fYpdf(selectedrows, j*fNy+i);
01124                if (fYpdf(selectedrows, j*fNy+i) > fMinmax(1, j*fNy+i+fNx+fNy))
01125                   fMinmax(1, j*fNy+i+fNx+fNy) = fYpdf(selectedrows, j*fNy+i);
01126             }
01127          }
01128       selectedrows++;   
01129       }
01130    }
01131    fNevents=selectedrows;
01132   // for (i=0; i<fNevents; i++){
01133     //  printf("event#%d\n", i);
01134       //for (Int_t iy=0; iy<fNy; iy++)
01135         // printf("y[%d]=%f\n", iy, fYvar(i, iy));
01136       //for (Int_t ispecies=0; ispecies<fNSpecies; ispecies++){
01137       //   for (Int_t iy=0; iy<fNy; iy++)
01138         //    printf("ypdf[sp. %d, y %d]=%f\n", ispecies, iy, fYpdf(i, ispecies*fNy+iy));
01139      // }
01140    //}
01141    delete [] xvars;
01142    delete [] var;
01143 }
01144 
01145 //____________________________________________________________________
01146 void Yields(Int_t &, Double_t *, Double_t &f, Double_t *x, Int_t /*iflag*/)
01147 {
01148 // FCN-function for Minuit
01149 
01150    Double_t lik;
01151    Int_t i, ispecies;
01152 
01153    TVirtualFitter *fitter = TVirtualFitter::GetFitter();
01154    TMatrixD *pdftot = (TMatrixD*)fitter->GetObjectFit();
01155    Int_t nev = pdftot->GetNrows();
01156    Int_t nes = pdftot->GetNcols();
01157    f=0;
01158    for (i=0; i<nev; i++){
01159       lik=0;
01160       for (ispecies=0; ispecies<nes; ispecies++)
01161          lik+=x[ispecies]*(*pdftot)(i, ispecies);
01162       if (lik<0) lik=1;
01163       f+=TMath::Log(lik);
01164    }
01165    //extended likelihood, equivalent to chi2
01166    Double_t ntot=0;
01167    for (i=0; i<nes; i++)
01168       ntot += x[i];
01169    f = -2*(f-ntot);
01170 }
01171