ROOT_528-00b_version: tutorials/quadp/portfolio.C Source File

00001 #include "Riostream.h"
00002 #include "TCanvas.h"
00003 #include "TFile.h"
00004 #include "TMath.h"
00005 #include "TTree.h"
00006 #include "TArrayF.h"
00007 #include "TH1.h"
00008 #include "TF1.h"
00009 #include "TLegend.h"
00010 #include "TSystem.h"
00011 
00012 #include "TMatrixD.h"
00013 #include "TMatrixDSym.h"
00014 #include "TVectorD.h"
00015 #include "TQpProbDens.h"
00016 #include "TGondzioSolver.h"
00017 
00018 //+ This macro shows in detail the use of the quadratic programming package quadp .
00019 // Running this macro :
00020 //     .x portfolio.C+
00021 // or  gSystem->Load("libQuadp"); .L portFolio.C+; portfolio()
00022 //
00023 // Let's first review what we exactly mean by "quadratic programming" :
00024 //
00025 // We want to minimize the following objective function :
00026 //
00027 //   c^T x + ( 1/2 ) x^T Q x    wrt. the vector x
00028 //
00029 //   c is a vector and Q a symmetric positive definite matrix
00030 //
00031 // You might wonder what is so special about this objective which is quadratic in
00032 // the unknowns, that can not be done by Minuit/Fumili . Well, we have in addition
00033 // the following boundary conditions on x:
00034 //
00035 //          A x =  b
00036 //  clo <=  C x <= cup
00037 //  xlo <=    x <= xup  ,  where A and C are arbitray matrices and the rest are vectors
00038 //
00039 // Not all these constraints have to be defined . Our example will only use xlo, A and b
00040 // Still, this could be handled by a general non-linear minimizer like Minuit by introducing
00041 // so-called "slack" variables . However, quadp is tailored to objective functions not more
00042 // complex than being quadratic . This allows usage of solving techniques which are even
00043 // stable for problems involving for instance 500 variables, 100 inequality conditions
00044 // and 50 equality conditions .
00045 //
00046 // Enough said about quadratic programming, let's return to our example .
00047 // Suppose, after a long day of doing physics, you have a look at your investments and
00048 // realize that an early retirement is not possible, given the returns of your stocks .
00049 // So what now ? ROOT to the rescue ......
00050 //
00051 // In 1990 Harry Markowitz was awarded the Noble prize for economics : " his work provided new tools
00052 // for weighing the risks and rewards of different investments and for valuing corporate stocks and bonds" .
00053 // In plain English, he developed the tools to balance greed and fear, we want the maximum return 
00054 // with the minimum amount of risk. Our stock portfolio should be at the "Efficient Frontier",
00055 // see http://www.riskglossary.com/articles/efficient_frontier.htm .
00056 // To quantify better the risk we are willing to take, we define a utility function U(x) . It describes
00057 // as a function of our total assets x, our "satisfaction" . A common choice is 1-exp(-k*x) (the reason for
00058 // the exponent will be clear later)  . The parameter k is the risk-aversion factor . For small values of k
00059 // the satisfaction is small for small values of x; by increasing x the satisfaction can still be increased
00060 // significantly . For large values of k, U(x) increases rapidly to 1, there is no increase in satisfaction
00061 // for additional dollars earned .
00062 //
00063 // In summary : small k ==> risk-loving investor
00064 //              large k ==> risk-averse investor
00065 //
00066 // Suppose we have for nrStocks the historical daily returns r = closing_price(n) - closing_price(n-1) .
00067 // Define a vector x of length of nrStocks, which contains the fraction of our money invested in
00068 // each stock . We can calculate the average daily return z of our portfolio and its variance using
00069 // the portfolio covariance Covar :
00070 //
00071 //  z = r^T x   and var = x^T Covar x
00072 //
00073 // Assuming that the daily returns have a Normal distribution, N(x), so will z with mean r^T x
00074 // and variance x^T Covar x
00075 //
00076 // The expected value of the utility function is : E(u(x)) = Int (1-exp(-k*x) N(x) dx
00077 //                                                         = 1-exp(-k (r^T x - 0.5 k x^T Covar x) )
00078 // 
00079 // Its value is maximized by maximizing  r^T x -0.5 k x^T Covar x
00080 // under the condition sum (x_i) = 1, meaning we want all our money invested and
00081 // x_i >= 0 , we can not "short" a stock
00082 //  
00083 // For 10 stocks we got the historical daily data for Sep-2000 to Jun-2004:
00084 //
00085 // GE   : General Electric Co
00086 // SUNW : Sun Microsystems Inc
00087 // QCOM : Qualcomm Inc
00088 // BRCM : Broadcom Corp
00089 // TYC  : Tyco International Ltd
00090 // IBM  : International Business Machines Corp
00091 // AMAT : Applied Materials Inc
00092 // C    : Citigroup Inc
00093 // PFE  : Pfizer Inc
00094 // HD   : Home Depot Inc
00095 //
00096 // We calculate the optimal portfolio for 2.0 and 10.0 .
00097 //
00098 // Food for thought :
00099 // - We assumed that the stock returns have a Normal distribution . Check this assumption by 
00100 //   histogramming the stock returns !
00101 // - We used for the expected return in the objective function, the flat average over a time
00102 //   period . Investment firms will put significant resources in improving the return predicton .
00103 // - If you want to trade significant number of shares, several other considerations have
00104 //   to be taken into account :
00105 //    +  If you are going to buy, you will drive the price up (so-called "slippage") .
00106 //       This can be taken into account by adding terms to the objective
00107 //       (Google for "slippage optimization")
00108 //    +  FTC regulations might have to be added to the inequality constraints 
00109 // - Investment firms do not want to be exposed to the "market" as defined by a broad
00110 //   index like the S&P and "hedge" this exposure away . A perfect hedge this can be added
00111 //    as an equality constrain, otherwise add an inequality constrain .
00112 //
00113 //Author: Eddy Offermann
00114 
00115 const Int_t nrStocks = 10;
00116 static const Char_t *stocks[] =
00117      {"GE","SUNW","QCOM","BRCM","TYC","IBM","AMAT","C","PFE","HD"};
00118 
00119 class TStockDaily  {
00120 public:
00121   Int_t fDate;
00122   Int_t fOpen;     // 100*open_price
00123   Int_t fHigh;     // 100*high_price
00124   Int_t fLow;      // 100*low_price
00125   Int_t fClose;    // 100*close_price
00126   Int_t fVol;
00127   Int_t fCloseAdj; // 100*close_price adjusted for splits and dividend
00128 
00129   TStockDaily() {
00130      fDate = fVol = fOpen = fHigh = fLow = fClose = fCloseAdj = 0; 
00131   }
00132   virtual ~TStockDaily() {}
00133 
00134   ClassDef(TStockDaily,1)
00135 };
00136 
00137 //---------------------------------------------------------------------------
00138 Double_t RiskProfile(Double_t *x, Double_t *par) {
00139   Double_t riskFactor = par[0];
00140   return 1-TMath::Exp(-riskFactor*x[0]);
00141 }
00142 
00143 //---------------------------------------------------------------------------
00144 TArrayF &StockReturn(TFile *f,const TString &name,Int_t sDay,Int_t eDay)
00145 {
00146   TTree *tDaily = (TTree*)f->Get(name);
00147   TStockDaily *data = 0;
00148   tDaily->SetBranchAddress("daily",&data);
00149   TBranch *b_closeAdj = tDaily->GetBranch("fCloseAdj");
00150   TBranch *b_date     = tDaily->GetBranch("fDate");
00151    
00152   //read only the "adjusted close" branch for all entries
00153   const Int_t nrEntries = (Int_t)tDaily->GetEntries();
00154   TArrayF closeAdj(nrEntries);
00155   for (Int_t i = 0; i < nrEntries; i++) {
00156     b_date->GetEntry(i); 
00157     b_closeAdj->GetEntry(i); 
00158     if (data->fDate >= sDay && data->fDate <= eDay)
00159 #ifdef __CINT__
00160       closeAdj.AddAt(data->fCloseAdj/100. , i );
00161 #else
00162       closeAdj[i] = data->fCloseAdj/100.;
00163 #endif
00164   }
00165 
00166   TArrayF *r = new TArrayF(nrEntries-1);
00167   for (Int_t i = 1; i < nrEntries; i++)
00168 //    (*r)[i-1] = closeAdj[i]-closeAdj[i-1];
00169 #ifdef __CINT__
00170     r->AddAt(closeAdj[i]/closeAdj[i-1],1);
00171 #else
00172     (*r)[i-1] = closeAdj[i]/closeAdj[i-1];
00173 #endif
00174 
00175   return *r;
00176 }
00177 
00178 #ifndef __MAKECINT__
00179 //---------------------------------------------------------------------------
00180 TVectorD OptimalInvest(Double_t riskFactor,TVectorD r,TMatrixDSym Covar)
00181 {
00182 // what the quadratic programming package will do:
00183 //
00184 //  minimize    c^T x + ( 1/2 ) x^T Q x       
00185 //  subject to                A x  = b  
00186 //                    clo <=  C x <= cup
00187 //                    xlo <=    x <= xup
00188 // what we want :
00189 //
00190 //  maximize    c^T x - k ( 1/2 ) x^T Q x
00191 //  subject to        sum_x x_i = 1
00192 //                    0 <= x_i
00193 
00194   // We have nrStocks weights to determine,
00195   // 1 equality- and 0 inequality- equations (the simple square boundary
00196   // condition (xlo <= x <= xup) does not count)
00197 
00198   const Int_t nrVar     = nrStocks;
00199   const Int_t nrEqual   = 1;
00200   const Int_t nrInEqual = 0;
00201 
00202   // flip the sign of the objective function because we want to maximize
00203   TVectorD    c = -1.*r;
00204   TMatrixDSym Q = riskFactor*Covar;
00205 
00206   // equality equation
00207   TMatrixD A(nrEqual,nrVar); A = 1;
00208   TVectorD b(nrEqual); b = 1;
00209 
00210   // inequality equation
00211   //
00212   // - although not applicable in the current situatio since nrInEqual = 0, one
00213   //   has to specify not only clo and cup but also an index vector iclo and icup,
00214   //   whose values are either 0 or 1 . If iclo[j] = 1, the lower boundary condition 
00215   //   is active on x[j], etc. ...
00216 
00217   TMatrixD C   (nrInEqual,nrVar);
00218   TVectorD clo (nrInEqual);
00219   TVectorD cup (nrInEqual);
00220   TVectorD iclo(nrInEqual);
00221   TVectorD icup(nrInEqual);
00222 
00223   // simple square boundary condition : 0 <= x_i, so only xlo is relevant .
00224   // Like for clo and cup above, we have to define an index vector ixlo and ixup .
00225   // Since each variable has the lower boundary, we can set the whole vector
00226   // ixlo = 1
00227 
00228   TVectorD xlo (nrVar); xlo  = 0;
00229   TVectorD xup (nrVar); xup  = 0;
00230   TVectorD ixlo(nrVar); ixlo = 1;
00231   TVectorD ixup(nrVar); ixup = 0;
00232 
00233   // setup the quadratic programming problem . Since a small number of variables are
00234   // involved and "Q" has everywhere entries, we chose the dense version "TQpProbDens" .
00235   // In case of a sparse formulation, simply replace all "Dens" by "Sparse" below and
00236   // use TMatrixDSparse instead of TMatrixDSym and TMatrixD
00237 
00238   TQpProbDens *qp = new TQpProbDens(nrVar,nrEqual,nrInEqual);
00239 
00240   // stuff all the matrices/vectors defined above in the proper places
00241 
00242   TQpDataDens *prob = (TQpDataDens *)qp->MakeData(c,Q,xlo,ixlo,xup,ixup,A,b,C,clo,iclo,cup,icup);
00243 
00244   // setup the nrStock variables, vars->fX will contain the final solution
00245 
00246   TQpVar      *vars  = qp->MakeVariables(prob);
00247   TQpResidual *resid = qp->MakeResiduals(prob);
00248 
00249   // Now we have to choose the method of solving, either TGondzioSolver or TMehrotraSolver
00250   // The Gondzio method is more sophisticated and therefore numerically more involved
00251   // If one want the Mehrotra method, simply replace "Gondzio" by "Mehrotra" .
00252 
00253   TGondzioSolver *s = new TGondzioSolver(qp,prob);
00254   const Int_t status = s->Solve(prob,vars,resid);
00255 
00256   const TVectorD weight = vars->fX;
00257 
00258   delete qp; delete prob; delete vars; delete resid; delete s;
00259   if (status != 0) {
00260     cout << "Could not solve this problem." <<endl;
00261     return TVectorD(nrStocks);
00262   }
00263 
00264   return weight;
00265 }
00266 #endif
00267 
00268 //---------------------------------------------------------------------------
00269 void portfolio()
00270 {
00271   const Int_t sDay = 20000809;
00272   const Int_t eDay = 20040602;
00273 
00274   const char *fname = "stock.root";
00275   TFile *f = 0;
00276   if (!gSystem->AccessPathName(fname)) {
00277      f = TFile::Open(fname);
00278   } else {
00279      printf("accessing %s file from http://root.cern.ch/files\n",fname);
00280      f = TFile::Open(Form("http://root.cern.ch/files/%s",fname));
00281   }
00282   if (!f) return;
00283 
00284   TArrayF *data = new TArrayF[nrStocks];
00285   for (Int_t i = 0; i < nrStocks; i++) {
00286     const TString symbol = stocks[i];
00287     data[i] = StockReturn(f,symbol,sDay,eDay);
00288   }
00289 
00290   const Int_t nrData = data[0].GetSize();
00291 
00292   TVectorD r(nrStocks);
00293   for (Int_t i = 0; i < nrStocks; i++)
00294     r[i] = data[i].GetSum()/nrData;
00295 
00296   TMatrixDSym Covar(nrStocks);
00297   for (Int_t i = 0; i < nrStocks; i++) {
00298     for (Int_t j = 0; j <= i; j++) {
00299       Double_t sum = 0.;
00300       for (Int_t k = 0; k < nrData; k++)
00301         sum += (data[i][k]-r[i])*(data[j][k]-r[j]);
00302       Covar(i,j) = Covar(j,i) = sum/nrData;
00303     }
00304   }
00305 
00306   const TVectorD weight1 = OptimalInvest(2.0,r,Covar);
00307   const TVectorD weight2 = OptimalInvest(10.,r,Covar);
00308 
00309   cout << "stock     daily  daily   w1     w2" <<endl;
00310   cout << "symb      return sdv              " <<endl;
00311   for (Int_t i = 0; i < nrStocks; i++)
00312     printf("%s\t: %.3f  %.3f  %.3f  %.3f\n",stocks[i],r[i],TMath::Sqrt(Covar[i][i]),weight1[i],weight2[i]);
00313 
00314   TCanvas *c1 = new TCanvas("c1","Portfolio Optimizations",10,10,800,900);
00315   c1->Divide(1,2);
00316 
00317   // utility function / risk profile
00318 
00319   c1->cd(1);
00320   gPad->SetGridx();
00321   gPad->SetGridy();
00322 
00323   TF1 *f1 = new TF1("f1",RiskProfile,0,2.5,1);
00324   f1->SetParameter(0,2.0);
00325   f1->SetLineColor(49);
00326   f1->Draw("AC");
00327   f1->GetHistogram()->SetXTitle("dollar");
00328   f1->GetHistogram()->SetYTitle("utility");
00329   f1->GetHistogram()->SetMinimum(0.0);
00330   f1->GetHistogram()->SetMaximum(1.0);
00331   TF1 *f2 = new TF1("f2",RiskProfile,0,2.5,1);
00332   f2->SetParameter(0,10.);
00333   f2->SetLineColor(50);
00334   f2->Draw("CSAME");
00335 
00336   TLegend *legend1 = new TLegend(0.50,0.65,0.70,0.82);
00337   legend1->AddEntry(f1,"1-exp(-2.0*x)","l");
00338   legend1->AddEntry(f2,"1-exp(-10.*x)","l");
00339   legend1->Draw();
00340 
00341   // vertical bar chart of portfolio distribution
00342 
00343   c1->cd(2);
00344   TH1F *h1 = new TH1F("h1","Portfolio Distribution",nrStocks,0,0);
00345   TH1F *h2 = new TH1F("h2","Portfolio Distribution",nrStocks,0,0);
00346   h1->SetStats(0);
00347   h1->SetFillColor(49);
00348   h2->SetFillColor(50);
00349   h1->SetBarWidth(0.45);
00350   h1->SetBarOffset(0.1);
00351   h2->SetBarWidth(0.4);
00352   h2->SetBarOffset(0.55);
00353   for (Int_t i = 0; i < nrStocks; i++) {
00354     h1->Fill(stocks[i],weight1[i]);
00355     h2->Fill(stocks[i],weight2[i]);
00356   }
00357 
00358   h1->Draw("BAR2");
00359   h2->Draw("BAR2SAME");
00360 
00361   TLegend *legend2 = new TLegend(0.50,0.65,0.70,0.82);
00362   legend2->AddEntry(h1,"high risk","f");
00363   legend2->AddEntry(h2,"low  risk","f");
00364   legend2->Draw();
00365 }