Containment Domains C++ API  0.1
Containment Domains C++ API v0.1
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Groups Pages
cd.h
Go to the documentation of this file.
1 /*
2 Copyright 2014, The University of Texas at Austin
3 All rights reserved.
4 
5 THIS FILE IS PART OF THE CONTAINMENT DOMAINS RUNTIME LIBRARY
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 
18 3. Neither the name of the copyright holder nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21 
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26  COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  POSSIBILITY OF SUCH DAMAGE.
34 */
35 
46 #ifndef _CD_H
47 #define _CD_H
48 #include <vector>
49 
136 namespace cd {
137 
138  class CDEvent; // explained later
139  class CDHandle; //explained later
140 
170  struct CDNameT {
171  uint level;
172  uint number;
173  };
174 
189  enum CDModeT { kStrict=0,
191  };
192 
208  };
209 
216  kUnsure =0,
217  kReadOnly = 1,
220  };
221  // End group cd_defs
223 
240  enum PGASUsageT {
241  kShared = 0,
243  kPrivatized,
245  kPrivate
247  };
248  // end PGAS_funcs
250 
300  enum SysErrNameT { kOK=0,
301  kSoftMem= 0b1,
302  kDegradedMem= 0b01,
305  kSoftComm= 0b001,
308  kDegradedComm= 0b0001,
310  kSoftComp= 0b00001,
311  kDegradedResource=0b000001,
313  kHardResource= 0b0000001,
315  kFileSys= 0b00000001,
317  };
319 
325  uint DeclareErrName(const char* name_string
326  );
328 
335  CDErrT UndeclareErrName(uint error_name_id
336  );
337 
351  enum SysErrLocT { kOK=0,
352  kIntraCore= 0b1,
353  kCore= 0b01,
354  kProc= 0b001,
355  kNode= 0b0001,
356  kModule= 0b00001,
357  kCabinet= 0b000001,
358  kCabinetGroup=0b0000001,
359  kSystem= 0b00000001,
360  };
361 
368  uint DeclareErrLoc(const char* name_string
369  );
371 
378  CDErrT UndeclareErrLoc(uint error_name_id
379  )
380 
381 
393  class SysErrInfo {
394  public:
395  SysErrInfo() { };
396  };
397 
404  public:
405  uint64_t get_pa_start();
406  uint64_t get_va_start();
407  uint64_t get_length();
408  char[] get_data();
409  uint64_t get_syndrome_len();
410  char[] get_syndrome();
411 
412  protected:
413  uint64_t pa_start_;
414  uint64_t va_start_;
415  uint64_t length_;
416  char* data_;
417  uint64_t syndrome_len_;
418  char* syndrome_;
419  };
420 
427  public:
428  std::vector<uint64_t> get_pa_starts();
429  std::vector<uint64_t> get_va_starts();
430  std::vector<uint64_t> get_lengths();
431 
432  protected:
433  std::vector<uint64_t> pa_starts_;
434  std::vector<uint64_t> va_starts_;
435  std::vector<uint64_t> lengths_;
436  };
437 
452  struct SysErrT {
455  SysErrInfo error_info_;
456  };
457 
458 
470  enum CDErrT { kOK=0,
473  };
474 
475  //* \todo Missing error injection API */
476 
499  enum PreserveMechanismT { kCopy=0b001,
500  kRef=0b010,
503  kRegen=0b100
511  };
517 
526  class RegenObject {
527  public:
534  virtual CDErrT Regenerate(void* data_ptr,
535  uint64_t len
537  ) = 0;
538  };
539  // End preservation_funcs group, but more methods later in CDHandle
541 
542 
572  CDHandle* Init(bool collective=true
573  CDErrT* error=0
577  );
581  // End cd_init_funcs group
583 
599  CDHandle* GetRootCD();
600 
615 
635  CDErrT SetCurrentCD(const CDHandle* cd
636  );
640 
641  // End cd_accessor_funcs group
643 };
644 
645 
646 
647 
648 
662 public:
685  CDHandle* Create(char* name=0,
686  CDModeT type=kStrict,
690  uint error_name_mask=0,
691  uint error_loc_mask=0,
695  CDErrT* error=0
699  );
703 
720  CDHandle* Create(uint_t color,
721  uint_t num_tasks_in_color,
724  char* name=0,
733  CDModeT type=kStrict,
737  uint error_name_mask=0,
738  uint error_loc_mask=0,
742  CDErrT* error=0
746  );
750 
768  CDHandle* CreateAndBegin(uint_t color,
769  uint_t num_tasks_in_color,
772  char* name=0,
781  CDModeT type=kStrict,
785  uint error_name_mask=0,
786  uint error_loc_mask=0,
790  CDErrT* error=0
794  );
798 
813  CDErrT Destroy(bool collective=false
814  );
823 
842  CDErrT Begin(bool collective=true
843  );
851 
873  CDErrT Complete(bool collective=true,
874  bool update_preservations,
882  );
891 
898  CDNameT GetName();
899 
904  CDHandle* GetParent();
905  // End cd_hierarchy
907 
936  CDErrT Preserve(void* data_ptr,
937  uint64_t len,
941  uint_t preserve_mask=kCopy,
942  const char* my_name=0,
946  const char* ref_name=0,
949  uint_64t ref_offset=0,
953  const RegenObject* regen_object=0,
960  PreserveUseT data_usage=kUnsure
961  );
965 
997  CDEvent& cd_event,
998  void* data_ptr,
1000  uint64_t len,
1004  uint_t preserve_mask=kCopy,
1005  const char* my_name=0,
1009  const char* ref_name=0,
1012  uint_64t ref_offset=0,
1016  const RegenObject* regen_object=0,
1022  PreserveUseT data_usage=kUnsure
1023  );
1027  // End preservation_funcs
1029 
1051  CDErrT CDAssert(bool test_true,
1052  const SysErrT* error_to_report=0
1057  );
1058 
1074  CDErrT CDAssertFail(bool test_true,
1075  const SysErrT* error_to_report=0
1080  );
1081 
1097  CDErrT CDAssertNotify(bool test_true,
1098  const SysErrT* error_to_report=0
1103  );
1104 
1105 
1116  std::vector<SysErrT> Detect(CDErrT* err_ret_val=0
1117  );
1123 
1132  CDErrT RegisterDetection(uint system_name_mask,
1133  uint system_loc_mask,
1138  );
1143 
1159  CDErrT RegisterRecovery(uint error_name_mask,
1160  uint error_loc_mask,
1164  RecoverObject* recover_object=0
1168  );
1172 
1173  // End detection_recovery group
1180 
1203  float GetErrorProbability(SysErrT error_type,
1204  uint error_num,
1207  );
1209 
1231  float RequireErrorProbability(SysErrT error_type,
1232  uint error_num,
1235  float probability,
1237  bool fail_over=true
1241  );
1246 
1247 
1248 
1249  // End cd_error_probability group
1251 
1293  CDErrT SetPGASUsage(void* data_ptr,
1294  uint64_t len,
1298  PGASUsageT region_type=kShared
1299  );
1304 
1325  CDErrT SetPGASOwnerWrites(void* data_ptr
1330  uint64_t len,
1331  bool owner_writes=true
1332  );
1336  // End PGAS_funcs
1338 
1362  CDErrT CDProfileStartPhase(bool collective=true,
1363  char* phase_name=0
1366  );
1367 
1370 protected:
1372 
1376 };
1379 
1380 
1393 public:
1406  CDErrT Wait(void);
1407 
1414  bool Test(void);
1415 
1418 protected:
1419  cd_internal::CDEvent event_;
1420 
1423 };
1424 
1430 public:
1442  bool virtual InternalCanRecover(uint error_name_mask,
1444  unit error_location_mask
1447  );
1448 
1449 
1463  void virtual InternalReexecute();
1464 
1470  void virtual InternalEscalate(uint error_name_mask,
1472  unit error_location_mask,
1474  std::vector<SysErrT> errors
1477  uint error_name_mask,
1478  ); // End internal_recovery group
1480 };
1481 
1500 public:
1508  virtual void Recover(CDInternalPtr* cd_instance,
1511  uint error_name_mask,
1513  unit error_location_mask,
1515  std::vector<SysErrT> errors
1518  ) {
1519  if (cd_instance->InternalCanRecover(error_name_mask, error_location_mask)) {
1520  cd_instance->InternalReexecute();
1521  }
1522  else {
1523  cd_instance->InternalEscalate(error_name_mask, error_loc_mask, errors);
1524  }
1525  };
1526 };
1527 
1528 
1529 
1530 
1531 #endif
1532 
1533 
1534 
1543 /*
1544 Open issues:
1545 
1546 
1547 
1548 
1549  */
Same as processor?
Definition: cd.h:355
std::vector< uint64_t > get_pa_starts()
Starting physical addresses.
float GetErrorProbability(SysErrT error_type, uint error_num,)
Ask the CD framework to estimate error/fault rate.
char * syndrome_
Value of syndrome.
Definition: cd.h:418
CDErrT
Type for specifying error return codes from an API call – signifies some failure of the API call its...
Definition: cd.h:470
A type to uniquely name a CD in the tree.
Definition: cd.h:170
First execution.
Definition: cd.h:206
virtual bool InternalCanRecover(uint error_name_mask, unit error_location_mask)
Method to test if this CD can recover from an error/location mask.
char[] get_data()
Data value read (erroneous)
virtual CDErrT Regenerate(void *data_ptr, uint64_t len)=0
Pure virtual interface function for regenerating data as restoration type.
bool Test(void)
Non-blocking call to test whether the event completed.
cd_internal::CDEvent event_
Definition: cd.h:1419
virtual void InternalEscalate(uint error_name_mask, unit error_location_mask, std::vector< SysErrT > errorsuint error_name_mask,)
Escalate error/failure to parent.
A cabinet.
Definition: cd.h:357
std::vector< uint64_t > get_lengths()
Lengths of affected regions.
SysErrInfo error_info_
Error-specific extra information.
Definition: cd.h:455
bool destroy_cd_object_hint_
Definition: cd.h:1375
Definition: cd.h:510
Definition: cd.h:499
CDErrT SetPGASUsage(void *data_ptr, uint64_t len, PGASUsageT region_type=kShared)
Declare how a region of memory behaves within this CD (for Relaxed CDs)
CDModeT
Type for specifying whether a CD is strict or relaxed.
Definition: cd.h:189
PGASUsageT
Different types of PGAS memory behavior for relaxed CDs.
Definition: cd.h:240
Interface to degraded memory error information.
Definition: cd.h:426
A strict CD.
Definition: cd.h:189
Some channel loss.
Definition: cd.h:309
PreserveUseT
Type to indicate whether preserved data is from read-only or potentially read/write application data...
Definition: cd.h:215
CDErrT CDProfileStartPhase(bool collective=true, char *phase_name=0)
Notify the CD Profiler that the application is entering a different execution phase.
A relaxed CD.
Definition: cd.h:190
Data to be preserved is read-only within this CD.
Definition: cd.h:218
CDErrT RegisterRecovery(uint error_name_mask, uint error_loc_mask, RecoverObject *recover_object=0)
Register that this CD can recover from certain errors/failures.
Interface to soft memory error information.
Definition: cd.h:403
functionality
Definition: cd.h:312
Recovery method that can be inherited and specialized by user.
Definition: cd.h:1499
uint number
Unique ID within level.
Definition: cd.h:172
(control/reachability failure).
Definition: cd.h:314
CDHandle * GetCurrentCD()
Accessor function to current active CD.
CDErrT RegisterDetection(uint system_name_mask, uint system_loc_mask,)
Declare that this CD can detect certain errors/failures by user-defined detectors.
char[] get_syndrome()
Value of syndrome.
virtual void InternalReexecute()
Reexecute-style default recovery.
uint64_t length_
Length of affected access.
Definition: cd.h:415
Definitely shared for actual communication.
Definition: cd.h:241
Entirely private to this CD.
Definition: cd.h:246
CDErrT Wait(void)
Blocking call waiting on the event to complete.
communication during this CD.
Definition: cd.h:244
CDHandle * GetRootCD()
Accessor function to root CD of the application.
CDErrT Destroy(bool collective=false)
Destroys a CD.
char * data_
Data value read (erroneous)
Definition: cd.h:416
CDErrT Complete(bool collective=true, bool update_preservations,)
Completes a CD.
CDErrT CDAssertNotify(bool test_true, const SysErrT *error_to_report=0)
User-provided detection function for failing a CD.
SysErrLocT error_location_
Location of error.
Definition: cd.h:454
uint64_t syndrome_len_
Length of syndrome.
Definition: cd.h:417
std::vector< SysErrT > Detect(CDErrT *err_ret_val=0)
Check whether any errors occurred while CD the executed.
CDErrT SetCurrentCD(const CDHandle *cd)
Accessor function for setting the current active CD.
(info includes message info)
Definition: cd.h:307
Within a part of a core.
Definition: cd.h:352
Processor.
Definition: cd.h:354
A core.
Definition: cd.h:353
Type for specifying errors and failure.
Definition: cd.h:452
CDInternalPtr cd_instance_
Definition: cd.h:1371
CDHandle * Create(char *name=0, CDModeT type=kStrict, uint error_name_mask=0, uint error_loc_mask=0, CDErrT *error=0)
Single-task non-collective Create.
uint level
Level within the tree (root=0)
Definition: cd.h:171
SysErrLocT
Type for specifying errors and failure location names.
Definition: cd.h:351
uint64_t get_pa_start()
Starting physical address.
virtual void Recover(CDInternalPtr *cd_instance, uint error_name_mask, unit error_location_mask, std::vector< SysErrT > errors)
Recover method to be specialized by inheriting and overloading.
Definition: cd.h:1508
Interface for specifying regeneration functions for preserve/restore.
Definition: cd.h:526
PreserveMechanismT
Type for specifying preservation methods.
Definition: cd.h:499
uint DeclareErrName(const char *name_string)
Create a new error/failure type name.
CDExecutionModeT
Type for specifying whether the current CD is executing for the first time or is currently reexecutin...
Definition: cd.h:206
Entire system.
Definition: cd.h:359
includes affected PC and perhaps bounds on the error?)
Definition: cd.h:310
uint64_t va_start_
Starting virtual address.
Definition: cd.h:414
Definition: cd.h:304
Data to be preserved will be modified by this CD.
Definition: cd.h:219
An object that provides a handle to a specific CD instance.
Definition: cd.h:661
Definition: cd.h:502
Rexecution.
Definition: cd.h:207
CDErrT UndeclareErrLoc(uint error_name_id) class SysErrInfo
Free a name that was created with DeclareErrLoc()
Definition: cd.h:378
uint64_t get_va_start()
Starting virtual address.
CDHandle * CreateAndBegin(uint_t color, uint_t num_tasks_in_color, char *name=0, CDModeT type=kStrict, uint error_name_mask=0, uint error_loc_mask=0, CDErrT *error=0)
Collective Create+Begin.
Definition: cd.h:316
CD, essentially equivalent to kShared for CDs.
Definition: cd.h:242
Definition: cd.h:301
CDErrT Preserve(void *data_ptr, uint64_t len, uint_t preserve_mask=kCopy, const char *my_name=0, const char *ref_name=0, uint_64t ref_offset=0, const RegenObject *regen_object=0, PreserveUseT data_usage=kUnsure)
Preserve data to be restored when recovering (typically reexecuting the CD from right after its Begin...
Module.
Definition: cd.h:356
CDHandle * GetParent()
Get CDHandle to this CD's parent.
std::vector< uint64_t > va_starts_
Starting virtual addresses.
Definition: cd.h:434
CDErrT CDAssert(bool test_true, const SysErrT *error_to_report=0)
User-provided detection function for failing a CD.
std::vector< uint64_t > get_va_starts()
Starting virtual addresses.
std::vector< uint64_t > pa_starts_
Starting physical addresses.
Definition: cd.h:433
Init called more than once.
Definition: cd.h:471
CDErrT CDAssertFail(bool test_true, const SysErrT *error_to_report=0)
User-provided detection function for failing a CD.
CDErrT UndeclareErrName(uint error_name_id)
Free a name that was created with DeclareErrorName()
A class that represents the interface to the internal implementation of an actual CD...
Definition: cd.h:1429
CDNameT GetName()
Get the name/location of this CD.
float RequireErrorProbability(SysErrT error_type, uint error_num, float probability, bool fail_over=true)
Request the CD framework to reach a certain error/failure probability.
SysErrNameT
Type for specifying system errors and failure names.
Definition: cd.h:300
uint64_t pa_start_
Starting physical address.
Definition: cd.h:413
CDErrT Begin(bool collective=true)
Begins a CD.
Call did not execute as expected.
Definition: cd.h:472
No errors/failures.
Definition: cd.h:300
CDHandle * Init(bool collective=trueCDErrT *error=0)
Initialize the CD runtime.
uint DeclareErrLoc(const char *name_string)
Create a new error/failure type name.
An object that provides an event identifier to a non-blocking CD runtime call.
Definition: cd.h:1392
Some grouping of cabinets.
Definition: cd.h:358
std::vector< uint64_t > lengths_
Lengths of affected regions.
Definition: cd.h:435
by the CD (treated as Read/Write for now, but may be optimized later)
Definition: cd.h:216
SysErrNameT error_name_
Name of error.
Definition: cd.h:453
uint64_t get_length()
Length of affected access.
CDErrT SetPGASOwnerWrites(void *data_ptruint64_t len, bool owner_writes=true)
Simplify optimization of discarding relaxed CD log entries.
uint64_t get_syndrome_len()
Length of syndrome.