00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <stdlib.h>
00023
00024 #include "TCondor.h"
00025 #include "TList.h"
00026 #include "TSystem.h"
00027 #include "TObjString.h"
00028 #include "TRegexp.h"
00029 #include "TProofDebug.h"
00030 #include "Riostream.h"
00031 #include "TEnv.h"
00032 #include "TClass.h"
00033
00034 ClassImp(TCondorSlave)
00035 ClassImp(TCondor)
00036
00037
00038
00039 TCondor::TCondor(const char *pool) : fPool(pool), fState(kFree)
00040 {
00041
00042
00043
00044 fClaims = new TList;
00045
00046
00047
00048 TString condorHome = gEnv->GetValue("Proof.CondorHome", (char*)0);
00049 if (condorHome != "") {
00050 TString path = gSystem->Getenv("PATH");
00051 path = condorHome + "/bin:" + path;
00052 gSystem->Setenv("PATH",path);
00053 }
00054
00055 TString condorConf = gEnv->GetValue("Proof.CondorConfig", (char*)0);
00056 if (condorConf != "") {
00057 gSystem->Setenv("CONDOR_CONFIG",condorConf);
00058 }
00059
00060 char *loc = gSystem->Which(gSystem->Getenv("PATH"), "condor_cod",
00061 kExecutePermission);
00062
00063 if (loc) {
00064 fValid = kTRUE;
00065 delete [] loc;
00066 } else {
00067 fValid = kFALSE;
00068 }
00069 }
00070
00071
00072
00073 TCondor::~TCondor()
00074 {
00075
00076
00077 PDB(kCondor,1) Info("~TCondor","fState %d", fState );
00078
00079 if (fState != kFree) {
00080 Release();
00081 }
00082 delete fClaims;
00083 }
00084
00085
00086
00087 void TCondor::Print(Option_t * opt) const
00088 {
00089
00090
00091 cout << "OBJ: " << IsA()->GetName()
00092 << "\tPool: \"" << fPool << "\""
00093 << "\tState: " << fState << endl;
00094 fClaims->Print(opt);
00095 }
00096
00097
00098
00099 TCondorSlave *TCondor::ClaimVM(const char *vm, const char *cmd)
00100 {
00101
00102
00103
00104
00105 Int_t port = 0;
00106
00107 TString claimCmd = Form("condor_cod request -name %s -timeout 10 2>>%s/condor.proof.%d",
00108 vm, gSystem->TempDirectory(), gSystem->GetUid() );
00109
00110 PDB(kCondor,2) Info("ClaimVM","command: %s", claimCmd.Data());
00111 FILE *pipe = gSystem->OpenPipe(claimCmd, "r");
00112
00113 if (!pipe) {
00114 SysError("ClaimVM","cannot run command: %s", claimCmd.Data());
00115 return 0;
00116 }
00117
00118 TString claimId;
00119 TString line;
00120 while (line.Gets(pipe)) {
00121 PDB(kCondor,3) Info("ClaimVM","line = %s", line.Data());
00122
00123 if (line.BeginsWith("ClaimId = \"")) {
00124 line.Remove(0, line.Index("\"")+1);
00125 line.Chop();
00126 claimId = line;
00127 PDB(kCondor,1) Info("ClaimVM","claim = '%s'", claimId.Data());
00128 TRegexp r("[0-9]*$");
00129 TString num = line(r);
00130 port = 37000 + atoi(num.Data());
00131 PDB(kCondor,1) Info("ClaimVM","port = %d", port);
00132 }
00133 }
00134
00135 Int_t r = gSystem->ClosePipe(pipe);
00136 if (r) {
00137 Error("ClaimVM","command: %s returned %d", claimCmd.Data(), r);
00138 return 0;
00139 } else {
00140 PDB(kCondor,1) Info("ClaimVM","command: %s returned %d", claimCmd.Data(), r);
00141 }
00142
00143 TString jobad("jobad");
00144 FILE *jf = gSystem->TempFileName(jobad);
00145
00146 if (jf == 0) return 0;
00147
00148 TString str(cmd);
00149 str.ReplaceAll("$(Port)", Form("%d", port));
00150 fputs(str, jf);
00151
00152 fclose(jf);
00153
00154 TString activateCmd = Form("condor_cod activate -id '%s' -jobad %s",
00155 claimId.Data(), jobad.Data() );
00156
00157 PDB(kCondor,2) Info("ClaimVM","command: %s", activateCmd.Data());
00158 pipe = gSystem->OpenPipe(activateCmd, "r");
00159
00160 if (!pipe) {
00161 SysError("ClaimVM","cannot run command: %s", activateCmd.Data());
00162 return 0;
00163 }
00164
00165 while (line.Gets(pipe)) {
00166 PDB(kCondor,3) Info("ClaimVM","Activate: line = %s", line.Data());
00167 }
00168
00169 r = gSystem->ClosePipe(pipe);
00170 if (r) {
00171 Error("ClaimVM","command: %s returned %d", activateCmd.Data(), r);
00172 } else {
00173 PDB(kCondor,1) Info("ClaimVM","command: %s returned %d", activateCmd.Data(), r);
00174 }
00175
00176 gSystem->Unlink(jobad);
00177
00178
00179 TCondorSlave *claim = new TCondorSlave;
00180 claim->fClaimID = claimId;
00181 TString node(vm);
00182 node = node.Remove(0, node.Index("@")+1);
00183 claim->fHostname = node;
00184 claim->fPort = port;
00185 claim->fPerfIdx = 100;
00186 claim->fImage = node;
00187
00188 return claim;
00189 }
00190
00191
00192
00193 TList *TCondor::GetVirtualMachines() const
00194 {
00195
00196
00197
00198 TString poolopt = fPool ? "" : Form("-pool %s", fPool.Data());
00199 TString cmd = Form("condor_status %s -format \"%%s\\n\" Name", poolopt.Data());
00200
00201 PDB(kCondor,2) Info("GetVirtualMachines","command: %s", cmd.Data());
00202
00203 FILE *pipe = gSystem->OpenPipe(cmd, "r");
00204
00205 if (!pipe) {
00206 SysError("GetVirtualMachines","cannot run command: %s", cmd.Data());
00207 return 0;
00208 }
00209
00210 TString line;
00211 TList *l = new TList;
00212 while (line.Gets(pipe)) {
00213 PDB(kCondor,3) Info("GetVirtualMachines","line = %s", line.Data());
00214 if (line != "") l->Add(new TObjString(line));
00215 }
00216
00217 Int_t r = gSystem->ClosePipe(pipe);
00218 if (r) {
00219 delete l;
00220 Error("GetVirtualMachines","command: %s returned %d", cmd.Data(), r);
00221 return 0;
00222 } else {
00223 PDB(kCondor,1) Info("GetVirtualMachines","command: %s returned %d", cmd.Data(), r);
00224 }
00225
00226 return l;
00227 }
00228
00229
00230
00231 TList *TCondor::Claim(Int_t n, const char *cmd)
00232 {
00233
00234
00235
00236
00237 if (fState != kFree) {
00238 Error("Claim","not in state Free");
00239 return 0;
00240 }
00241
00242 TList *vms = GetVirtualMachines();
00243 TIter next(vms);
00244 TObjString *vm;
00245 for(Int_t i=0; i < n && (vm = (TObjString*) next()) != 0; i++ ) {
00246 TCondorSlave *claim = ClaimVM(vm->GetName(), cmd);
00247 if (claim != 0) {
00248 if ( !GetVmInfo(vm->GetName(), claim->fImage, claim->fPerfIdx) ) {
00249
00250 delete claim;
00251 } else {
00252 fClaims->Add(claim);
00253 fState = kActive;
00254 }
00255 }
00256 }
00257
00258 return fClaims;
00259 }
00260
00261
00262
00263 TCondorSlave *TCondor::Claim(const char *vmname, const char *cmd)
00264 {
00265
00266
00267
00268
00269 if (fState != kFree && fState != kActive) {
00270 Error("Claim","not in state Free or Active");
00271 return 0;
00272 }
00273
00274 TCondorSlave *claim = ClaimVM(vmname, cmd);
00275 if (claim != 0) {
00276 fClaims->Add(claim);
00277 fState = kActive;
00278 }
00279
00280 return claim;
00281 }
00282
00283
00284
00285 Bool_t TCondor::SetState(EState state)
00286 {
00287
00288
00289 PDB(kCondor,1) Info("SetState","state: %s (%lld)",
00290 state == kSuspended ? "kSuspended" : "kActive", Long64_t(gSystem->Now()));
00291 TIter next(fClaims);
00292 TCondorSlave *claim;
00293 while((claim = (TCondorSlave*) next()) != 0) {
00294 TString cmd = Form("condor_cod %s -id '%s'",
00295 state == kSuspended ? "suspend" : "resume",
00296 claim->fClaimID.Data());
00297
00298 PDB(kCondor,2) Info("SetState","command: %s", cmd.Data());
00299 FILE *pipe = gSystem->OpenPipe(cmd, "r");
00300
00301 if (!pipe) {
00302 SysError("SetState","cannot run command: %s", cmd.Data());
00303 return kFALSE;
00304 }
00305
00306 TString line;
00307 while (line.Gets(pipe)) {
00308 PDB(kCondor,3) Info("SetState","line = %s", line.Data());
00309 }
00310
00311 Int_t r = gSystem->ClosePipe(pipe);
00312 if (r) {
00313 Error("SetState","command: %s returned %d", cmd.Data(), r);
00314 return kFALSE;
00315 } else {
00316 PDB(kCondor,1) Info("SetState","command: %s returned %d", cmd.Data(), r);
00317 }
00318 }
00319
00320 fState = state;
00321 return kTRUE;
00322 }
00323
00324
00325
00326 Bool_t TCondor::Suspend()
00327 {
00328
00329
00330 if (fState != kActive) {
00331 Error("Suspend","not in state Active");
00332 return kFALSE;
00333 }
00334
00335 return SetState(kSuspended);
00336 }
00337
00338
00339
00340 Bool_t TCondor::Resume()
00341 {
00342
00343
00344 if (fState != kSuspended) {
00345 Error("Suspend","not in state Suspended");
00346 return kFALSE;
00347 }
00348
00349 return SetState(kActive);
00350 }
00351
00352
00353
00354 Bool_t TCondor::Release()
00355 {
00356
00357
00358 if (fState == kFree) {
00359 Error("Suspend","not in state Active or Suspended");
00360 return kFALSE;
00361 }
00362
00363 TCondorSlave *claim;
00364 while((claim = (TCondorSlave*) fClaims->First()) != 0) {
00365 TString cmd = Form("condor_cod release -id '%s'", claim->fClaimID.Data());
00366
00367 PDB(kCondor,2) Info("SetState","command: %s", cmd.Data());
00368 FILE *pipe = gSystem->OpenPipe(cmd, "r");
00369
00370 if (!pipe) {
00371 SysError("Release","cannot run command: %s", cmd.Data());
00372 return kFALSE;
00373 }
00374
00375 TString line;
00376 while (line.Gets(pipe)) {
00377 PDB(kCondor,3) Info("Release","line = %s", line.Data());
00378 }
00379
00380 Int_t r = gSystem->ClosePipe(pipe);
00381 if (r) {
00382 Error("Release","command: %s returned %d", cmd.Data(), r);
00383 return kFALSE;
00384 } else {
00385 PDB(kCondor,1) Info("Release","command: %s returned %d", cmd.Data(), r);
00386 }
00387
00388 fClaims->Remove(claim);
00389 delete claim;
00390 }
00391
00392 fState = kFree;
00393 return kTRUE;
00394 }
00395
00396
00397
00398 Bool_t TCondor::GetVmInfo(const char *vm, TString &image, Int_t &perfidx) const
00399 {
00400
00401
00402 TString cmd = Form("condor_status -format \"%%d:\" Mips -format \"%%s\\n\" FileSystemDomain "
00403 "-const 'Name==\"%s\"'", vm);
00404
00405 PDB(kCondor,2) Info("GetVmInfo","command: %s", cmd.Data());
00406 FILE *pipe = gSystem->OpenPipe(cmd, "r");
00407
00408 if (!pipe) {
00409 SysError("GetVmInfo","cannot run command: %s", cmd.Data());
00410 return kFALSE;
00411 }
00412
00413 TString line;
00414 while (line.Gets(pipe)) {
00415 PDB(kCondor,3) Info("GetVmInfo","line = %s", line.Data());
00416 if (line != "") {
00417 TString amips = line(TRegexp("^[0-9]*"));
00418 perfidx = atoi(amips);
00419 image = line(TRegexp("[^:]+$"));
00420 break;
00421 }
00422 }
00423
00424 Int_t r = gSystem->ClosePipe(pipe);
00425 if (r) {
00426 Error("GetVmInfo","command: %s returned %d", cmd.Data(), r);
00427 return kFALSE;
00428 } else {
00429 PDB(kCondor,1) Info("GetVmInfo","command: %s returned %d", cmd.Data(), r);
00430 }
00431
00432 return kTRUE;
00433 }
00434
00435
00436
00437 TString TCondor::GetImage(const char *host) const
00438 {
00439
00440
00441 TString cmd = Form("condor_status -direct %s -format \"Image:%%s\\n\" "
00442 "FileSystemDomain", host);
00443
00444 PDB(kCondor,2) Info("GetImage","command: %s", cmd.Data());
00445
00446 FILE *pipe = gSystem->OpenPipe(cmd, "r");
00447
00448 if (!pipe) {
00449 SysError("GetImage","cannot run command: %s", cmd.Data());
00450 return "";
00451 }
00452
00453 TString image;
00454 TString line;
00455 while (line.Gets(pipe)) {
00456 PDB(kCondor,3) Info("GetImage","line = %s", line.Data());
00457 if (line != "") {
00458 image = line(TRegexp("[^:]+$"));
00459 break;
00460 }
00461 }
00462
00463 Int_t r = gSystem->ClosePipe(pipe);
00464 if (r) {
00465 Error("GetImage","command: %s returned %d", cmd.Data(), r);
00466 return "";
00467 } else {
00468 PDB(kCondor,1) Info("GetImage","command: %s returned %d", cmd.Data(), r);
00469 }
00470
00471 return image;
00472 }
00473
00474
00475
00476 void TCondorSlave::Print(Option_t * ) const
00477 {
00478
00479
00480 cout << "OBJ: " << IsA()->GetName()
00481 << " " << fHostname << ":" << fPort
00482 << " Perf: " << fPerfIdx
00483 << " Image: " << fImage << endl;
00484 }