-
Notifications
You must be signed in to change notification settings - Fork 27
Applications integrated with FTI
⬆️ Top
Classical molecular dynamics proxy application.
https://github.com/exmatex/CoMD
Integrating FTI in CoMD took only addition of ~30 lines of code in 2 files. All occurrences of MPI_COMM_WORLD
changed to FTI_COMM_WORLD
except FTI_Init("config.fti", MPI_COMM_WORLD);
File: src-mpi/CoMD.c
102: int i = 1;
103: FTI_Protect(i++, sim->boxes->nAtoms, sim->boxes->nTotalBoxes, FTI_INTG);
104:
105: FTIT_type RealTInfo;
106: FTI_InitType(&RealTInfo, sizeof(real_t));
107: FTIT_type Real3Info;
108: FTI_InitType(&Real3Info, sizeof(real3));
109: int maxTotalAtoms = MAXATOMS * (sim->boxes->nTotalBoxes);
110:
111: FTI_Protect(i++, sim->atoms->gid, maxTotalAtoms, FTI_INTG);
112: FTI_Protect(i++, sim->atoms->iSpecies, maxTotalAtoms, FTI_INTG);
113: FTI_Protect(i++, sim->atoms->r, maxTotalAtoms, Real3Info);
114: FTI_Protect(i++, sim->atoms->p, maxTotalAtoms, Real3Info);
115: FTI_Protect(i++, sim->atoms->f, maxTotalAtoms, Real3Info);
116:
117: int iStep = 0;
118: FTI_Protect(i++, &iStep, 1, FTI_INTG);
119:
120: if (FTI_Status() != 0) {
121: int res = FTI_Recover();
122: if (res != 0) {
123: printf("\tRecovery failed! FTI_Recover returned %d.\n", res);
124: }
125: }
---
139: profileStart(loopTimer);
140: for (; iStep<nSteps;)
141: {
142: startTimer(commReduceTimer);
143: sumAtoms(sim);
144: stopTimer(commReduceTimer);
145:
146: printThings(sim, iStep, getElapsedTime(timestepTimer));
147:
148: startTimer(timestepTimer);
149: timestep(sim, printRate, sim->dt);
150: stopTimer(timestepTimer);
151:
152: iStep += printRate;
153: int res = FTI_Checkpoint(iStep, 1);
154: if (res != FTI_DONE) {
155: printf("\tCheckpoint failed! FTI_Checkpoint returned %d.\n", res);
156: }
157: }
158: profileStop(loopTimer);
File: src-mpi/parallel.c
64: void initParallel(int* argc, char*** argv)
65: {
66: #ifdef DO_MPI
67: MPI_Init(argc, argv);
68: FTI_Init("config.fti", MPI_COMM_WORLD);
69: MPI_Comm_rank(FTI_COMM_WORLD, &myRank);
70: MPI_Comm_size(FTI_COMM_WORLD, &nRanks);
71: #endif
72: }
73:
74: void destroyParallel()
75: {
76: #ifdef DO_MPI
77: FTI_Finalize();
78: MPI_Finalize();
79: #endif
80: }
===============================================================================
Poznan Supercomputing and Networking Center
eagle.man.poznan.pl
===============================================================================
-------------------------------------------------------------------------------
Start of calculations [pon, 9 paź 2017, 12:57:49 CEST]
-------------------------------------------------------------------------------
Support: [email protected]
-------------------------------------------------------------------------------
Mon Oct 9 12:57:53 2017: Starting Initialization
Mini-Application Name : CoMD-mpi
Mini-Application Version : 1.1
Platform:
hostname: e0026
kernel name: 'Linux'
kernel release: '3.10.105-1.el6.elrepo.x86_64'
processor: 'x86_64'
Build:
CC: '/opt/exp_soft/local/generic/openmpi/1.10.2-1_gcc482/bin/mpicc'
compiler version: 'gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-14)'
CFLAGS: '-std=c99 -DDOUBLE -DDO_MPI -g -O5 -I/home/users/ksiero1/fti/include/ '
LDFLAGS: '-L/home/users/ksiero1/fti/lib/ -lm -lcrypto'
using MPI: true
Threading: none
Double Precision: true
Run Date/Time: 2017-10-09, 12:57:53
Command Line Parameters:
doeam: 0
potDir: pots
potName: Cu_u6.eam
potType: funcfl
nx: 800
ny: 800
nz: 800
xproc: 8
yproc: 8
zproc: 8
Lattice constant: -1 Angstroms
nSteps: 100
printRate: 10
Time step: 1 fs
Initial Temperature: 600 K
Initial Delta: 0 Angstroms
Simulation data:
Total atoms : 2048000000
Min global bounds : [ 0.0000000000, 0.0000000000, 0.0000000000 ]
Max global bounds : [ 2892.0000000000, 2892.0000000000, 2892.0000000000 ]
Decomposition data:
Processors : 8, 8, 8
Local boxes : 62, 62, 62 = 238328
Box size : [ 5.8306451613, 5.8306451613, 5.8306451613 ]
Box factor : [ 1.0074548875, 1.0074548875, 1.0074548875 ]
Max Link Cell Occupancy: 32 of 64
Potential data:
Potential type : Lennard-Jones
Species name : Cu
Atomic number : 29
Mass : 63.55 amu
Lattice Type : FCC
Lattice spacing : 3.615 Angstroms
Cutoff : 5.7875 Angstroms
Epsilon : 0.167 eV
Sigma : 2.315 Angstroms
Memory data:
Intrinsic atom footprint = 88 B/atom
Total atom footprint = -157.000 MB (335.69 MB/node)
Link cell atom footprint = 1280.082 MB/node
Link cell atom footprint = 1408.000 MB/node (including halo cell data
Initial energy : -1.166063303598, atom count : 2048000000
Mon Oct 9 12:58:06 2017: Initialization Finished
Mon Oct 9 12:58:06 2017: Starting simulation
# Performance
# Loop Time(fs) Total Energy Potential Energy Kinetic Energy Temperature (us/atom) # Atoms
0 0.00 -1.166063303598 -1.243619295198 0.077555991600 600.0000 0.0000 2048000000
10 10.00 -1.166059649733 -1.233151964368 0.067092314635 519.0494 2.3384 2048000000
20 20.00 -1.166048425247 -1.208164731096 0.042116305849 325.8263 2.4122 2048000000
30 30.00 -1.166037572103 -1.186566075400 0.020528503297 158.8156 2.4182 2048000000
40 40.00 -1.166042088520 -1.183621872290 0.017579783770 136.0033 2.4197 2048000000
50 50.00 -1.166051685771 -1.193725983586 0.027674297815 214.0979 2.4213 2048000000
60 60.00 -1.166054644001 -1.202677534791 0.036622890790 283.3274 2.4201 2048000000
70 70.00 -1.166052134038 -1.204922829363 0.038870695326 300.7172 2.4207 2048000000
80 80.00 -1.166048793793 -1.203643980438 0.037595186645 290.8494 2.4198 2048000000
90 90.00 -1.166048002607 -1.203830919192 0.037782916585 292.3017 2.4193 2048000000
100 100.00 -1.166049790544 -1.206871500823 0.040821710279 315.8109 2.4176 2048000000
Mon Oct 9 13:14:11 2017: Ending simulation
Simulation Validation:
Initial energy : -1.166063303598
Final energy : -1.166049790544
eFinal/eInitial : 0.999988
Final atom count : 2048000000, no atoms lost
Timings for Rank 0
Timer # Calls Avg/Call (s) Total (s) % Loop
___________________________________________________________________
total 1 977.2662 977.2662 101.34
loop 1 964.3040 964.3040 100.00
timestep 10 96.4285 964.2854 100.00
position 100 0.1001 10.0087 1.04
velocity 200 0.1025 20.5055 2.13
redistribute 101 1.2731 128.5869 13.33
atomHalo 101 0.9613 97.0902 10.07
force 101 7.8922 797.1134 82.66
commHalo 303 0.3041 92.1548 9.56
commReduce 39 0.4388 17.1143 1.77
Timing Statistics Across 512 Ranks:
Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)
_____________________________________________________________________________
total 51: 977.2630 140: 977.2672 977.2650 0.0012
loop 67: 964.3023 104: 964.3043 964.3033 0.0008
timestep 51: 964.2738 463: 964.2999 964.2841 0.0061
position 49: 4.5466 373: 16.3177 11.2964 3.3782
velocity 3: 7.8438 329: 29.0049 20.1119 6.3239
redistribute 51: 53.6754 481: 168.3860 127.0497 17.6334
atomHalo 51: 24.1044 481: 142.1157 94.3223 21.2231
force 323: 775.9471 51: 905.1509 795.6917 9.6434
commHalo 51: 19.3264 481: 137.4904 89.3663 21.3808
commReduce 51: 2.8272 339: 27.7147 19.1658 4.1010
---------------------------------------------------
Average atom update rate: 2.41 us/atom/task
---------------------------------------------------
---------------------------------------------------
Average all atom update rate: 0.00 us/atom
---------------------------------------------------
---------------------------------------------------
Average atom rate: 212.39 atoms/us
---------------------------------------------------
Mon Oct 9 13:14:11 2017: CoMD Ending
-------------------------------------------------------------------------------
End of calculations [pon, 9 paź 2017, 13:14:11 CEST].
-------------------------------------------------------------------------------
===============================================================================
Poznan Supercomputing and Networking Center
eagle.man.poznan.pl
===============================================================================
-------------------------------------------------------------------------------
Start of calculations [pon, 9 paź 2017, 12:14:02 CEST]
-------------------------------------------------------------------------------
Support: [email protected]
-------------------------------------------------------------------------------
[ FTI Information ] : Reading FTI configuration file (/home/users/ksiero1/CoMD/bin/config.fti)...
[ FTI Information ] : The execution ID is: 2017-10-09_12-14-07
[ FTI Information ] : Selected Ckpt I/O is POSIX
[ FTI Information ] : FTI has been initialized.
Mon Oct 9 12:14:08 2017: Starting Initialization
Mini-Application Name : CoMD-mpi
Mini-Application Version : 1.1
Platform:
hostname: e0026
kernel name: 'Linux'
kernel release: '3.10.105-1.el6.elrepo.x86_64'
processor: 'x86_64'
Build:
CC: '/opt/exp_soft/local/generic/openmpi/1.10.2-1_gcc482/bin/mpicc'
compiler version: 'gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-14)'
CFLAGS: '-std=c99 -DDOUBLE -DDO_MPI -g -O5 -I/home/users/ksiero1/fti/include/ '
LDFLAGS: '-L/home/users/ksiero1/fti/lib/ -lm -lcrypto'
using MPI: true
Threading: none
Double Precision: true
Run Date/Time: 2017-10-09, 12:14:08
Command Line Parameters:
doeam: 0
potDir: pots
potName: Cu_u6.eam
potType: funcfl
nx: 800
ny: 800
nz: 800
xproc: 8
yproc: 8
zproc: 8
Lattice constant: -1 Angstroms
nSteps: 100
printRate: 10
Time step: 1 fs
Initial Temperature: 600 K
Initial Delta: 0 Angstroms
[ FTI Information ] : Variable ID 1 to protect. Current ckpt. size per rank is 1.00MB.
[ FTI Information ] : Variable ID 2 to protect. Current ckpt. size per rank is 65.00MB.
[ FTI Information ] : Variable ID 3 to protect. Current ckpt. size per rank is 129.00MB.
[ FTI Information ] : Variable ID 4 to protect. Current ckpt. size per rank is 513.00MB.
[ FTI Information ] : Variable ID 5 to protect. Current ckpt. size per rank is 897.00MB.
[ FTI Information ] : Variable ID 6 to protect. Current ckpt. size per rank is 1281.00MB.
[ FTI Information ] : Variable ID 7 to protect. Current ckpt. size per rank is 1281.00MB.
Simulation data:
Total atoms : 2048000000
Min global bounds : [ 0.0000000000, 0.0000000000, 0.0000000000 ]
Max global bounds : [ 2892.0000000000, 2892.0000000000, 2892.0000000000 ]
Decomposition data:
Processors : 8, 8, 8
Local boxes : 62, 62, 62 = 238328
Box size : [ 5.8306451613, 5.8306451613, 5.8306451613 ]
Box factor : [ 1.0074548875, 1.0074548875, 1.0074548875 ]
Max Link Cell Occupancy: 32 of 64
Potential data:
Potential type : Lennard-Jones
Species name : Cu
Atomic number : 29
Mass : 63.55 amu
Lattice Type : FCC
Lattice spacing : 3.615 Angstroms
Cutoff : 5.7875 Angstroms
Epsilon : 0.167 eV
Sigma : 2.315 Angstroms
Memory data:
Intrinsic atom footprint = 88 B/atom
Total atom footprint = -157.000 MB (335.69 MB/node)
Link cell atom footprint = 1280.082 MB/node
Link cell atom footprint = 1408.000 MB/node (including halo cell data
Initial energy : -1.166063303598, atom count : 2048000000
Mon Oct 9 12:14:21 2017: Initialization Finished
Mon Oct 9 12:14:21 2017: Starting simulation
# Performance
# Loop Time(fs) Total Energy Potential Energy Kinetic Energy Temperature (us/atom) # Atoms
0 0.00 -1.166063303598 -1.243619295198 0.077555991600 600.0000 0.0000 2048000000
[ FTI Information ] : Post-checkpoint took 1.77 sec. (Pt:1.75s, Cl:0.02s)
[ FTI Information ] : Ckpt. ID 10 (L1) (1281.00 MB/proc) taken in 35.45 sec. (Wt:0.00s, Wr:33.68s, Ps:1.77s)
10 10.00 -1.166059649733 -1.233151964368 0.067092314635 519.0494 2.4020 2048000000
[ FTI Information ] : Post-checkpoint took 0.53 sec. (Pt:0.39s, Cl:0.14s)
[ FTI Information ] : Ckpt. ID 20 (L1) (1281.00 MB/proc) taken in 38.83 sec. (Wt:0.00s, Wr:38.30s, Ps:0.53s)
20 20.00 -1.166048425247 -1.208164731096 0.042116305849 325.8263 2.4509 2048000000
[ FTI Information ] : Post-checkpoint took 3.89 sec. (Pt:2.76s, Cl:1.13s)
[ FTI Information ] : Ckpt. ID 30 (L1) (1281.00 MB/proc) taken in 38.64 sec. (Wt:0.00s, Wr:34.75s, Ps:3.89s)
30 30.00 -1.166037572103 -1.186566075400 0.020528503297 158.8156 2.5215 2048000000
[ FTI Information ] : Post-checkpoint took 3.57 sec. (Pt:2.95s, Cl:0.62s)
[ FTI Information ] : Ckpt. ID 40 (L1) (1281.00 MB/proc) taken in 40.21 sec. (Wt:0.00s, Wr:36.63s, Ps:3.57s)
40 40.00 -1.166042088520 -1.183621872290 0.017579783770 136.0033 2.2746 2048000000
[ FTI Information ] : Post-checkpoint took 3.71 sec. (Pt:3.30s, Cl:0.41s)
[ FTI Information ] : Ckpt. ID 50 (L1) (1281.00 MB/proc) taken in 38.81 sec. (Wt:0.00s, Wr:35.10s, Ps:3.71s)
50 50.00 -1.166051685771 -1.193725983586 0.027674297815 214.0979 2.2883 2048000000
[ FTI Information ] : Post-checkpoint took 2.26 sec. (Pt:1.76s, Cl:0.50s)
[ FTI Information ] : Ckpt. ID 60 (L1) (1281.00 MB/proc) taken in 38.06 sec. (Wt:0.00s, Wr:35.80s, Ps:2.26s)
60 60.00 -1.166054644001 -1.202677534791 0.036622890790 283.3274 2.3185 2048000000
[ FTI Information ] : Post-checkpoint took 1.11 sec. (Pt:0.23s, Cl:0.87s)
[ FTI Information ] : Ckpt. ID 70 (L1) (1281.00 MB/proc) taken in 43.70 sec. (Wt:0.00s, Wr:42.59s, Ps:1.11s)
--------------------------------------------------------------------------
mpirun has exited due to process rank 416 with PID 0 on
node e0769 exiting improperly. There are three reasons this could occur:
1. this process did not call "init" before exiting, but others in
the job did. This can cause a job to hang indefinitely while it waits
for all processes to call "init". By rule, if one process calls "init",
then ALL processes must call "init" prior to termination.
2. this process called "init", but exited without calling "finalize".
By rule, all processes that call "init" MUST call "finalize" prior to
exiting or it will be considered an "abnormal termination"
3. this process called "MPI_Abort" or "orte_abort" and the mca parameter
orte_create_session_dirs is set to false. In this case, the run-time cannot
detect that the abort call was an abnormal termination. Hence, the only
error message you will receive is this one.
This may have caused other processes in the application to be
terminated by signals sent by mpirun (as reported here).
You can avoid this message by specifying -quiet on the mpirun command line.
--------------------------------------------------------------------------
[ FTI Information ] : Reading FTI configuration file (/home/users/ksiero1/CoMD/bin/config.fti)...
[ FTI Information ] : This is a restart. The execution ID is: 2017-10-09_12-14-07
[ FTI Information ] : Selected Ckpt I/O is POSIX
[ FTI Information ] : Recovering successfully from level 1.
[ FTI Information ] : FTI has been initialized.
Mon Oct 9 12:30:09 2017: Starting Initialization
Mini-Application Name : CoMD-mpi
Mini-Application Version : 1.1
Platform:
hostname: e0026
kernel name: 'Linux'
kernel release: '3.10.105-1.el6.elrepo.x86_64'
processor: 'x86_64'
Build:
CC: '/opt/exp_soft/local/generic/openmpi/1.10.2-1_gcc482/bin/mpicc'
compiler version: 'gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-14)'
CFLAGS: '-std=c99 -DDOUBLE -DDO_MPI -g -O5 -I/home/users/ksiero1/fti/include/ '
LDFLAGS: '-L/home/users/ksiero1/fti/lib/ -lm -lcrypto'
using MPI: true
Threading: none
Double Precision: true
Run Date/Time: 2017-10-09, 12:30:09
Command Line Parameters:
doeam: 0
potDir: pots
potName: Cu_u6.eam
potType: funcfl
nx: 800
ny: 800
nz: 800
xproc: 8
yproc: 8
zproc: 8
Lattice constant: -1 Angstroms
nSteps: 100
printRate: 10
Time step: 1 fs
Initial Temperature: 600 K
Initial Delta: 0 Angstroms
[ FTI Information ] : Variable ID 1 to protect. Current ckpt. size per rank is 1.00MB.
[ FTI Information ] : Variable ID 2 to protect. Current ckpt. size per rank is 65.00MB.
[ FTI Information ] : Variable ID 3 to protect. Current ckpt. size per rank is 129.00MB.
[ FTI Information ] : Variable ID 4 to protect. Current ckpt. size per rank is 513.00MB.
[ FTI Information ] : Variable ID 5 to protect. Current ckpt. size per rank is 897.00MB.
[ FTI Information ] : Variable ID 6 to protect. Current ckpt. size per rank is 1281.00MB.
[ FTI Information ] : Variable ID 7 to protect. Current ckpt. size per rank is 1281.00MB.
Simulation data:
Total atoms : 2048000000
Min global bounds : [ 0.0000000000, 0.0000000000, 0.0000000000 ]
Max global bounds : [ 2892.0000000000, 2892.0000000000, 2892.0000000000 ]
Decomposition data:
Processors : 8, 8, 8
Local boxes : 62, 62, 62 = 238328
Box size : [ 5.8306451613, 5.8306451613, 5.8306451613 ]
Box factor : [ 1.0074548875, 1.0074548875, 1.0074548875 ]
Max Link Cell Occupancy: 32 of 64
Potential data:
Potential type : Lennard-Jones
Species name : Cu
Atomic number : 29
Mass : 63.55 amu
Lattice Type : FCC
Lattice spacing : 3.615 Angstroms
Cutoff : 5.7875 Angstroms
Epsilon : 0.167 eV
Sigma : 2.315 Angstroms
Memory data:
Intrinsic atom footprint = 88 B/atom
Total atom footprint = -157.000 MB (335.69 MB/node)
Link cell atom footprint = 1280.082 MB/node
Link cell atom footprint = 1408.000 MB/node (including halo cell data
Initial energy : -1.166063303598, atom count : 2048000000
Mon Oct 9 12:30:22 2017: Initialization Finished
Mon Oct 9 12:30:22 2017: Starting simulation
# Performance
# Loop Time(fs) Total Energy Potential Energy Kinetic Energy Temperature (us/atom) # Atoms
70 70.00 -1.166063303598 -1.243619295198 0.077555991600 600.0000 0.0000 2048000000
[ FTI Information ] : Post-checkpoint took 0.47 sec. (Pt:0.28s, Cl:0.19s)
[ FTI Information ] : Ckpt. ID 80 (L1) (1281.00 MB/proc) taken in 42.96 sec. (Wt:0.00s, Wr:42.49s, Ps:0.47s)
80 80.00 -1.166048793793 -1.203643980438 0.037595186645 290.8494 2.2586 2048000000
[ FTI Information ] : Post-checkpoint took 0.78 sec. (Pt:0.22s, Cl:0.56s)
[ FTI Information ] : Ckpt. ID 90 (L1) (1281.00 MB/proc) taken in 34.50 sec. (Wt:0.00s, Wr:33.72s, Ps:0.78s)
90 90.00 -1.166048002607 -1.203830919192 0.037782916585 292.3017 2.3377 2048000000
[ FTI Information ] : Post-checkpoint took 0.68 sec. (Pt:0.16s, Cl:0.51s)
[ FTI Information ] : Ckpt. ID 100 (L1) (1281.00 MB/proc) taken in 33.82 sec. (Wt:0.00s, Wr:33.15s, Ps:0.68s)
100 100.00 -1.166049790544 -1.206871500823 0.040821710279 315.8109 2.3146 2048000000
Mon Oct 9 12:36:51 2017: Ending simulation
Simulation Validation:
Initial energy : -1.166063303598
Final energy : -1.166049790544
eFinal/eInitial : 0.999988
Final atom count : 2048000000, no atoms lost
Timings for Rank 0
Timer # Calls Avg/Call (s) Total (s) % Loop
___________________________________________________________________
total 1 401.7819 401.7819 103.47
loop 1 388.3197 388.3197 100.00
timestep 3 92.1447 276.4340 71.19
position 30 0.1194 3.5818 0.92
velocity 60 0.1042 6.2535 1.61
redistribute 31 0.8148 25.2584 6.50
atomHalo 31 0.4576 14.1847 3.65
force 31 8.0059 248.1816 63.91
commHalo 93 0.1349 12.5416 3.23
commReduce 18 0.1819 3.2744 0.84
Timing Statistics Across 512 Ranks:
Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)
_____________________________________________________________________________
total 37: 401.7036 42: 401.9202 401.7762 0.0357
loop 34: 388.3196 370: 388.4222 388.3436 0.0197
timestep 79: 276.2655 235: 276.5081 276.4445 0.0336
position 147: 1.3705 221: 6.0085 3.6971 0.9796
velocity 147: 2.3765 206: 9.8759 6.5194 1.7920
redistribute 206: 18.4708 417: 37.1847 25.1751 4.2695
atomHalo 415: 5.8228 417: 29.1367 13.9548 5.2873
force 481: 241.1729 10: 261.1229 247.7969 2.5561
commHalo 415: 4.1255 417: 27.6639 12.3329 5.3435
commReduce 415: 1.4558 193: 6.4059 3.3989 0.9957
---------------------------------------------------
Average atom update rate: 2.30 us/atom/task
---------------------------------------------------
---------------------------------------------------
Average all atom update rate: 0.00 us/atom
---------------------------------------------------
---------------------------------------------------
Average atom rate: 222.25 atoms/us
---------------------------------------------------
Mon Oct 9 12:36:52 2017: CoMD Ending
-------------------------------------------------------------------------------
End of calculations [pon, 9 paź 2017, 12:36:53 CEST].
-------------------------------------------------------------------------------
⬆️ Top
Linear algebra algorithms and workloads for a quantum molecular dynamics (QMD) electronic structure code.
https://github.com/exmatex/CoSP2
Integrating FTI in CoSP2 took only addition of ~30 lines of code in 2 files. All occurrences of MPI_COMM_WORLD
changed to FTI_COMM_WORLD
except FTI_Init("config.fti", MPI_COMM_WORLD);
File: src-mpi/sp2Loop.c
Function: sp2Loop()
56: FTIT_type RealTInfo;
57: FTI_InitType(&RealTInfo, sizeof(real_t));
58: int i = 1;
59: FTI_Protect(i++, &iter, 1, FTI_INTG);
60: FTI_Protect(i++, xmatrix->iia, xmatrix->hsize, FTI_INTG);
61: FTI_Protect(i++, xmatrix->jjcontig, xmatrix->hsize * xmatrix->msize , FTI_INTG);
62: FTI_Protect(i++, xmatrix->valcontig, xmatrix->hsize * xmatrix->msize, RealTInfo);
63:
64: if (FTI_Status() != 0) {
65: int res = FTI_Recover();
66: if (res != 0) {
67: printf("\tRecovery failed! FTI_Recover returned %d.\n", res);
68: }
69: }
70:
...
153: if (iter % 10 == 0) {
154: int res = FTI_Checkpoint(iter, 1);
155: if (res != FTI_DONE) {
156: printf("\tCheckpoint failed! FTI_Checkpoint returned %d.\n", res);
157: }
158: }
File: src-mpi/parallel.c
70: void initParallel(int* argc, char*** argv)
71: {
72: #ifdef DO_MPI
73: MPI_Init(argc, argv);
74: FTI_Init("config.fti", MPI_COMM_WORLD);
75: MPI_Comm_rank(FTI_COMM_WORLD, &myRank);
76: MPI_Comm_size(FTI_COMM_WORLD, &nRanks);
77:
78: requestList = (MPI_Request*) malloc(nRanks*sizeof(MPI_Request));
79: rUsed = (int*) malloc(nRanks*sizeof(int));
80: for (int i = 0; i < nRanks; i++) { rUsed[i] = 0; }
81: #endif
82: }
83:
84: void destroyParallel()
85: {
86: #ifdef DO_MPI
87: free(requestList);
88: FTI_Finalize();
89: MPI_Finalize();
90: #endif
91: }
===============================================================================
Poznan Supercomputing and Networking Center
eagle.man.poznan.pl
===============================================================================
-------------------------------------------------------------------------------
Support: [email protected]
-------------------------------------------------------------------------------
CoSP2: SP2 Loop
Parameters:
msparse = 80 hDim = 98304 debug = 1
hmatName =
eps = 1e-05 hEps = 1e-16
idemTol = 1e-14
hDim = 98304 M = 80
Adjusted M = 96
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 3 local row min = 18432 row max = 24576 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 1 local row min = 6144 row max = 12288 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 11 local row min = 67584 row max = 73728 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 13 local row min = 79872 row max = 86016 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 8 local row min = 49152 row max = 55296 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 7 local row min = 43008 row max = 49152 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 6 local row min = 36864 row max = 43008 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 4 local row min = 24576 row max = 30720 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 9 local row min = 55296 row max = 61440 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 10 local row min = 61440 row max = 67584 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 14 local row min = 86016 row max = 92160 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 12 local row min = 73728 row max = 79872 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 2 local row min = 12288 row max = 18432 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
total procs = 16 total rows = 98304 total cols = 96
global row min = 0 row max = 98304 row extent = 98304
rank = 0 local row min = 0 row max = 6144 row extent = 6144
Sparsity:
Initial sparsity = 672042, fraction = 6.258879e-04, Avg per row = 6.836365
Max per row = 7
I = 4, count = 2, fraction = 0.000020
I = 5, count = 621, fraction = 0.006317
I = 6, count = 14838, fraction = 0.150940
I = 7, count = 82843, fraction = 0.842723
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 15 local row min = 92160 row max = 98304 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 5 local row min = 30720 row max = 36864 row extent = 6144
Gershgorin:
New eMax, eMin = 1.745500e+00, -7.356212e-01
bufferSize = 9437184
Initial sparsity normalized = 672042, fraction = 6.258879e-04, avg = 6.83636, max = 7
SP2Loop:
iter = 0 trX = 4.935743e+04 trX2 = 2.720037e+04
iter = 1 trX = 2.720037e+04 trX2 = 9.994787e+03
iter = 2 trX = 4.440595e+04 trX2 = 2.485384e+04
iter = 3 trX = 6.395806e+04 trX2 = 4.735425e+04
iter = 4 trX = 4.735425e+04 trX2 = 3.149323e+04
iter = 5 trX = 6.321528e+04 trX2 = 5.026180e+04
iter = 6 trX = 5.026180e+04 trX2 = 3.881328e+04
iter = 7 trX = 3.881328e+04 trX2 = 2.922713e+04
iter = 8 trX = 4.839943e+04 trX2 = 4.062611e+04
iter = 9 trX = 5.617275e+04 trX2 = 4.981154e+04
iter = 10 trX = 4.981154e+04 trX2 = 4.464542e+04
iter = 11 trX = 4.464542e+04 trX2 = 4.032639e+04
iter = 12 trX = 4.896445e+04 trX2 = 4.554145e+04
iter = 13 trX = 5.238745e+04 trX2 = 4.956883e+04
iter = 14 trX = 4.956883e+04 trX2 = 4.731790e+04
iter = 15 trX = 4.731790e+04 trX2 = 4.544718e+04
iter = 16 trX = 4.918861e+04 trX2 = 4.771064e+04
iter = 17 trX = 4.771064e+04 trX2 = 4.649398e+04
iter = 18 trX = 4.892731e+04 trX2 = 4.795556e+04
iter = 19 trX = 4.989906e+04 trX2 = 4.910173e+04
iter = 20 trX = 4.910173e+04 trX2 = 4.855031e+04
iter = 21 trX = 4.965316e+04 trX2 = 5.060054e+04
iter = 22 trX = 4.870578e+04 trX2 = -9.750371e+05
iter = 23 trX = 1.072449e+06 trX2 = -5.136388e+12
iter = 24 trX = -5.136388e+12 trX2 = 7.295617e+24
Results:
X2 Sparsity CCN = 2906510, fraction = 2.706898e-03 avg = 29.5665, max = 89
D Sparsity AAN = 2906464, fraction = 2.706856e-03 avg = 29.5661, max = 89
Number of iterations = 25
Counters for Rank 0
Counter Calls Avg/Call(MB) Total(MB)
_________________________________________________________________
reduce 29 0.0000 0.0004
send 39 2.2910 89.3504
recv 39 2.2772 88.8095
Counter Statistics Across 16 Ranks:
Counter Rank: Min(MB) Rank: Max(MB) Avg(MB) Stdev(MB)
_______________________________________________________________________________________
reduce 0: 0.0004 0: 0.0004 0.0004 0.0000
send 15: 87.4100 7: 138.5495 129.9097 15.7564
recv 15: 88.4093 6: 137.0340 129.9097 15.6236
Timings for Rank 0
Timer # Calls Avg/Call (s) Total (s) % Loop
___________________________________________________________________
total 1 3.4711 3.4711 100.00
loop 1 3.4711 3.4711 100.00
pre 1 0.5444 0.5444 15.68
sp2Loop 1 2.7193 2.7193 78.34
norm 1 0.0417 0.0417 1.20
x2 25 0.0473 1.1820 34.05
xadd 13 0.0454 0.5899 16.99
xset 12 0.0383 0.4591 13.23
exchange 50 0.0032 0.1576 4.54
reduceComm 29 0.0070 0.2034 5.86
Timing Statistics Across 16 Ranks:
Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)
_____________________________________________________________________________
total 1: 3.4591 15: 3.5566 3.5160 0.0296
loop 1: 3.4591 15: 3.5566 3.5160 0.0296
pre 3: 0.4203 5: 0.5927 0.5180 0.0440
sp2Loop 15: 2.7191 12: 2.7256 2.7229 0.0019
norm 3: 0.0082 7: 0.0450 0.0376 0.0112
x2 1: 0.2678 15: 1.1916 1.0701 0.3027
xadd 1: 0.0548 0: 0.5899 0.5167 0.1744
xset 1: 0.0408 15: 0.4638 0.4071 0.1383
exchange 0: 0.1576 1: 1.1532 0.3589 0.2991
reduceComm 5: 0.0513 3: 1.4170 0.3006 0.4217
-------------------------------------------------------------------------------
End of calculations [pon, 16 paź 2017, 12:17:13 CEST].
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
Support: [email protected]
-------------------------------------------------------------------------------
[ FTI Information ] : Reading FTI configuration file (/home/users/ksiero1/CoSP2/bin/config.fti)...
[ FTI Information ] : The execution ID is: 2017-10-16_12-02-59
[ FTI Information ] : FTI has been initialized.
CoSP2: SP2 Loop
Parameters:
msparse = 80 hDim = 98304 debug = 1
hmatName =
eps = 1e-05 hEps = 1e-16
idemTol = 1e-14
hDim = 98304 M = 80
Adjusted M = 96
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 1 local row min = 6144 row max = 12288 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 3 local row min = 18432 row max = 24576 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 10 local row min = 61440 row max = 67584 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
total procs = 16 total rows = 98304 total cols = 96
global row min = 0 row max = 98304 row extent = 98304
rank = 0 local row min = 0 row max = 6144 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 5 local row min = 30720 row max = 36864 row extent = 6144
Sparsity:
Initial sparsity = 672042, fraction = 6.258879e-04, Avg per row = 6.836365
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 6 local row min = 36864 row max = 43008 row extent = 6144
Max per row = 7
I = 4, count = 2, fraction = 0.000020
I = 5, count = 621, fraction = 0.006317
I = 6, count = 14838, fraction = 0.150940
I = 7, count = 82843, fraction = 0.842723
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 13 local row min = 79872 row max = 86016 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 7 local row min = 43008 row max = 49152 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 12 local row min = 73728 row max = 79872 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 15 local row min = 92160 row max = 98304 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 8 local row min = 49152 row max = 55296 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 11 local row min = 67584 row max = 73728 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 14 local row min = 86016 row max = 92160 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 4 local row min = 24576 row max = 30720 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 9 local row min = 55296 row max = 61440 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 2 local row min = 12288 row max = 18432 row extent = 6144
Gershgorin:
New eMax, eMin = 1.745500e+00, -7.356212e-01
bufferSize = 9437184
Initial sparsity normalized = 672042, fraction = 6.258879e-04, avg = 6.83636, max = 7
[ FTI Information ] : Variable ID 1 to protect. Current ckpt. size per rank is 0.00MB.
[ FTI Information ] : Variable ID 2 to protect. Current ckpt. size per rank is 0.38MB.
[ FTI Information ] : Variable ID 3 to protect. Current ckpt. size per rank is 36.38MB.
[ FTI Information ] : Variable ID 4 to protect. Current ckpt. size per rank is 108.38MB.
[ FTI Information ] : Variable ID 5 to protect. Current ckpt. size per rank is 108.75MB.
[ FTI Information ] : Variable ID 6 to protect. Current ckpt. size per rank is 144.75MB.
[ FTI Information ] : Variable ID 7 to protect. Current ckpt. size per rank is 216.75MB.
SP2Loop:
iter = 0 trX = 4.935743e+04 trX2 = 2.720037e+04
iter = 1 trX = 2.720037e+04 trX2 = 9.994787e+03
iter = 2 trX = 4.440595e+04 trX2 = 2.485384e+04
iter = 3 trX = 6.395806e+04 trX2 = 4.735425e+04
iter = 4 trX = 4.735425e+04 trX2 = 3.149323e+04
iter = 5 trX = 6.321528e+04 trX2 = 5.026180e+04
iter = 6 trX = 5.026180e+04 trX2 = 3.881328e+04
iter = 7 trX = 3.881328e+04 trX2 = 2.922713e+04
iter = 8 trX = 4.839943e+04 trX2 = 4.062611e+04
iter = 9 trX = 5.617275e+04 trX2 = 4.981154e+04
[ FTI Information ] : Post-checkpoint took 0.00 sec. (Pt:0.00s, Cl:0.00s)
[ FTI Information ] : Ckpt. ID 10 (L1) (216.75 MB/proc) taken in 9.00 sec. (Wt:0.00s, Wr:9.00s, Ps:0.00s)
iter = 10 trX = 4.981154e+04 trX2 = 4.464542e+04
iter = 11 trX = 4.464542e+04 trX2 = 4.032639e+04
iter = 12 trX = 4.896445e+04 trX2 = 4.554145e+04
iter = 13 trX = 5.238745e+04 trX2 = 4.956883e+04
iter = 14 trX = 4.956883e+04 trX2 = 4.731790e+04
--------------------------------------------------------------------------
mpirun has exited due to process rank 3 with PID 12638 on
node e0700 exiting improperly. There are three reasons this could occur:
1. this process did not call "init" before exiting, but others in
the job did. This can cause a job to hang indefinitely while it waits
for all processes to call "init". By rule, if one process calls "init",
then ALL processes must call "init" prior to termination.
2. this process called "init", but exited without calling "finalize".
By rule, all processes that call "init" MUST call "finalize" prior to
exiting or it will be considered an "abnormal termination"
3. this process called "MPI_Abort" or "orte_abort" and the mca parameter
orte_create_session_dirs is set to false. In this case, the run-time cannot
detect that the abort call was an abnormal termination. Hence, the only
error message you will receive is this one.
This may have caused other processes in the application to be
terminated by signals sent by mpirun (as reported here).
You can avoid this message by specifying -quiet on the mpirun command line.
--------------------------------------------------------------------------
[ FTI Information ] : Reading FTI configuration file (/home/users/ksiero1/CoSP2/bin/config.fti)...
[ FTI Information ] : This is a restart. The execution ID is: 2017-10-16_12-02-59
[ FTI Information ] : Recovering successfully from level 1.
[ FTI Information ] : FTI has been initialized.
CoSP2: SP2 Loop
Parameters:
msparse = 80 hDim = 98304 debug = 1
hmatName =
eps = 1e-05 hEps = 1e-16
idemTol = 1e-14
hDim = 98304 M = 80
Adjusted M = 96
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 1 local row min = 6144 row max = 12288 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 3 local row min = 18432 row max = 24576 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 15 local row min = 92160 row max = 98304 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 4 local row min = 24576 row max = 30720 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 9 local row min = 55296 row max = 61440 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 13 local row min = 79872 row max = 86016 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 5 local row min = 30720 row max = 36864 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 11 local row min = 67584 row max = 73728 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 12 local row min = 73728 row max = 79872 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 8 local row min = 49152 row max = 55296 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 10 local row min = 61440 row max = 67584 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 6 local row min = 36864 row max = 43008 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 7 local row min = 43008 row max = 49152 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 14 local row min = 86016 row max = 92160 row extent = 6144
Generated H Matrix nnz = 672042 avg nnz/row = 6
total procs = 16 total rows = 98304 total cols = 96
global row min = 0 row max = 98304 row extent = 98304
rank = 0 local row min = 0 row max = 6144 row extent = 6144
Sparsity:
Initial sparsity = 672042, fraction = 6.258879e-04, Avg per row = 6.836365
Max per row = 7
I = 4, count = 2, fraction = 0.000020
I = 5, count = 621, fraction = 0.006317
I = 6, count = 14838, fraction = 0.150940
I = 7, count = 82843, fraction = 0.842723
Generated H Matrix nnz = 672042 avg nnz/row = 6
rank = 2 local row min = 12288 row max = 18432 row extent = 6144
Gershgorin:
New eMax, eMin = 1.745500e+00, -7.356212e-01
bufferSize = 9437184
Initial sparsity normalized = 672042, fraction = 6.258879e-04, avg = 6.83636, max = 7
[ FTI Information ] : Variable ID 1 to protect. Current ckpt. size per rank is 0.00MB.
[ FTI Information ] : Variable ID 2 to protect. Current ckpt. size per rank is 0.38MB.
[ FTI Information ] : Variable ID 3 to protect. Current ckpt. size per rank is 36.38MB.
[ FTI Information ] : Variable ID 4 to protect. Current ckpt. size per rank is 108.38MB.
[ FTI Information ] : Variable ID 5 to protect. Current ckpt. size per rank is 108.75MB.
[ FTI Information ] : Variable ID 6 to protect. Current ckpt. size per rank is 144.75MB.
[ FTI Information ] : Variable ID 7 to protect. Current ckpt. size per rank is 216.75MB.
SP2Loop:
iter = 10 trX = 4.981154e+04 trX2 = 4.464542e+04
iter = 11 trX = 4.464542e+04 trX2 = 4.032639e+04
iter = 12 trX = 4.896445e+04 trX2 = 4.554145e+04
iter = 13 trX = 5.238745e+04 trX2 = 4.956883e+04
iter = 14 trX = 4.956883e+04 trX2 = 4.731790e+04
iter = 15 trX = 4.731790e+04 trX2 = 4.544718e+04
iter = 16 trX = 4.918861e+04 trX2 = 4.771064e+04
iter = 17 trX = 4.771064e+04 trX2 = 4.649398e+04
iter = 18 trX = 4.892731e+04 trX2 = 4.795556e+04
iter = 19 trX = 4.989906e+04 trX2 = 4.910173e+04
[ FTI Information ] : Post-checkpoint took 0.02 sec. (Pt:0.00s, Cl:0.02s)
[ FTI Information ] : Ckpt. ID 20 (L1) (216.75 MB/proc) taken in 1.18 sec. (Wt:0.00s, Wr:1.16s, Ps:0.02s)
iter = 20 trX = 4.910173e+04 trX2 = 4.855031e+04
iter = 21 trX = 4.965316e+04 trX2 = 5.060054e+04
iter = 22 trX = 4.870578e+04 trX2 = -9.750371e+05
iter = 23 trX = 1.072449e+06 trX2 = -5.136388e+12
iter = 24 trX = -5.136388e+12 trX2 = 7.295617e+24
Results:
X2 Sparsity CCN = 2906510, fraction = 2.706898e-03 avg = 29.5665, max = 89
D Sparsity AAN = 2906464, fraction = 2.706856e-03 avg = 29.5661, max = 89
Number of iterations = 25
Counters for Rank 0
Counter Calls Avg/Call(MB) Total(MB)
_________________________________________________________________
reduce 19 0.0000 0.0003
send 29 2.6508 76.8721
recv 29 2.6315 76.3141
Counter Statistics Across 16 Ranks:
Counter Rank: Min(MB) Rank: Max(MB) Avg(MB) Stdev(MB)
_______________________________________________________________________________________
reduce 0: 0.0003 0: 0.0003 0.0003 0.0000
send 15: 74.9711 7: 113.5838 106.5789 11.6620
recv 15: 75.9751 6: 112.0425 106.5789 11.5183
Timings for Rank 0
Timer # Calls Avg/Call (s) Total (s) % Loop
___________________________________________________________________
total 1 4.4862 4.4862 100.00
loop 1 4.4862 4.4862 100.00
pre 1 0.5449 0.5449 12.15
sp2Loop 1 3.7464 3.7464 83.51
norm 1 0.0439 0.0439 0.98
x2 15 0.0423 0.6340 14.13
xadd 8 0.1030 0.8236 18.36
xset 7 0.0369 0.2582 5.76
exchange 30 0.0033 0.0982 2.19
reduceComm 19 0.0210 0.3999 8.91
Timing Statistics Across 16 Ranks:
Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)
_____________________________________________________________________________
total 1: 4.4668 10: 4.5643 4.5171 0.0276
loop 1: 4.4668 10: 4.5643 4.5171 0.0276
pre 1: 0.4197 2: 0.5793 0.5138 0.0399
sp2Loop 9: 3.7438 8: 3.7513 3.7490 0.0019
norm 1: 0.0081 6: 0.0463 0.0350 0.0112
x2 3: 0.1789 13: 0.7067 0.5970 0.1598
xadd 1: 0.0360 10: 0.8244 0.6514 0.2632
xset 3: 0.0240 7: 0.2859 0.2339 0.0798
exchange 0: 0.0982 3: 1.1791 0.4241 0.3105
reduceComm 7: 0.1875 1: 1.2948 0.4136 0.3341
[ FTI Information ] : FTI has been finalized.
-------------------------------------------------------------------------------
End of calculations [pon, 16 paź 2017, 12:03:18 CEST].
-------------------------------------------------------------------------------
Livermore Unstructured Lagrangian Explicit Shock Hydrodynamics (LULESH)
https://codesign.llnl.gov/lulesh.php
In order to perform the cast from a C++ object to a char buffer, BOOST serialization was used. Three files were modified to port FTI: lulesh.cc
, lulesh.h
and lulesh-comm.cc
. The modifications to the first two files are shown here. The modifications to the third file were barely the replacements of MPI_COMM_WORLD
by FTI_COMM_WORLD
and are not listed here.
diff --git a/LULESH/lulesh.cc b/FTI_LULESH/lulesh.cc
index a141611..d5572f8 100644
--- a/LULESH/lulesh.cc
+++ b/FTI_LULESH/lulesh.cc
@@ -162,6 +162,22 @@ Additional BSD Notice
#include "lulesh.h"
+//********************
+// Boost Serialization
+//********************
+#include <boost/archive/text_oarchive.hpp>
+#include <boost/archive/text_iarchive.hpp>
+
+#include <sstream>
+// --- File version ---
+#include <fstream>
+std::stringstream locDom_ser;
+
+//*************************
+// FTI Checkpoint - Restart
+//*************************
+#include <fti.h>
+#define ITER_CKPT 500
/*********************************/
/* Data structure implementation */
@@ -213,7 +229,7 @@ void TimeIncrement(Domain& domain)
#if USE_MPI
MPI_Allreduce(&gnewdt, &newdt, 1,
((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE),
- MPI_MIN, MPI_COMM_WORLD) ;
+ MPI_MIN, FTI_COMM_WORLD) ;
#else
newdt = gnewdt;
#endif
@@ -1061,7 +1077,7 @@ void CalcHourglassControlForElems(Domain& domain,
/* Do a check for negative volumes */
if ( domain.v(i) <= Real_t(0.0) ) {
#if USE_MPI
- MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
+ MPI_Abort(FTI_COMM_WORLD, VolumeError) ;
#else
exit(VolumeError);
#endif
@@ -1111,7 +1127,7 @@ void CalcVolumeForceForElems(Domain& domain)
for ( Index_t k=0 ; k<numElem ; ++k ) {
if (determ[k] <= Real_t(0.0)) {
#if USE_MPI
- MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
+ MPI_Abort(FTI_COMM_WORLD, VolumeError) ;
#else
exit(VolumeError);
#endif
@@ -1626,7 +1642,7 @@ void CalcLagrangeElements(Domain& domain, Real_t* vnew)
if (vnew[k] <= Real_t(0.0))
{
#if USE_MPI
- MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
+ MPI_Abort(FTI_COMM_WORLD, VolumeError) ;
#else
exit(VolumeError);
#endif
@@ -2030,7 +2046,7 @@ void CalcQForElems(Domain& domain, Real_t vnew[])
if(idx >= 0) {
#if USE_MPI
- MPI_Abort(MPI_COMM_WORLD, QStopError) ;
+ MPI_Abort(FTI_COMM_WORLD, QStopError) ;
#else
exit(QStopError);
#endif
@@ -2399,7 +2415,7 @@ void ApplyMaterialPropertiesForElems(Domain& domain, Real_t vnew[])
}
if (vc <= 0.) {
#if USE_MPI
- MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
+ MPI_Abort(FTI_COMM_WORLD, VolumeError) ;
#else
exit(VolumeError);
#endif
@@ -2683,6 +2699,19 @@ void LagrangeLeapFrog(Domain& domain)
#endif
}
+//Serialization
+void save (Domain *dom_saved){
+ boost::archive::text_oarchive oa(locDom_ser);
+ oa << dom_saved;
+}
+
+//Deserialization
+Domain* load (){
+ Domain *dom_loaded;
+ boost::archive::text_iarchive ia(locDom_ser);
+ ia >> dom_loaded;
+ return dom_loaded;
+}
/******************************************/
@@ -2697,8 +2726,10 @@ int main(int argc, char *argv[])
Domain_member fieldData ;
MPI_Init(&argc, &argv) ;
- MPI_Comm_size(MPI_COMM_WORLD, &numRanks) ;
- MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
+ char config_fti[] = "config.fti";
+ FTI_Init(config_fti, MPI_COMM_WORLD);
+ MPI_Comm_size(FTI_COMM_WORLD, &numRanks) ;
+ MPI_Comm_rank(FTI_COMM_WORLD, &myRank) ;
#else
numRanks = 1;
myRank = 0;
@@ -2755,7 +2786,7 @@ int main(int argc, char *argv[])
CommSBN(*locDom, 1, &fieldData) ;
// End initialization
- MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Barrier(FTI_COMM_WORLD);
#endif
// BEGIN timestep to solution */
@@ -2766,10 +2797,68 @@ int main(int argc, char *argv[])
gettimeofday(&start, NULL) ;
#endif
//debug to see region sizes
-// for(Int_t i = 0; i < locDom->numReg(); i++)
-// std::cout << "region" << i + 1<< "size" << locDom->regElemSize(i) <<std::endl;
- while((locDom->time() < locDom->stoptime()) && (locDom->cycle() < opts.its)) {
+ // for(Int_t i = 0; i < locDom->numReg(); i++)
+ // std::cout << "region" << i + 1<< "size" << locDom->regElemSize(i) <<std::endl;
+
+
+//---------------------------------------------------------------------------------------------------------------------//
+
+ //First serialization to get a buffer size
+ save(locDom);
+ //Cast std::stringstream -> char*
+ int buffer_size = 0;
+ char* buffer_locDom_ser;
+ std::string tmp = locDom_ser.str();
+ buffer_size = tmp.size();
+ buffer_size += 1000000; //Add this to handle the dynamic change size of the buffer
+ buffer_locDom_ser = new char [buffer_size];
+ strcpy(buffer_locDom_ser, tmp.c_str());
+
+ //Checkpoint informations
+ int id = 1;
+ int level = 1;
+ int res;
+
+ FTI_Protect(0, &id, 1, FTI_INTG);
+ FTI_Protect(1, &level, 1, FTI_INTG);
+ FTI_Protect(2, buffer_locDom_ser, buffer_size, FTI_CHAR);
+
+
+ //Restart
+ if(FTI_Status() != 0){
+ if(!myRank)
+ std::cout << "---- Restart ----\n";
+
+ res = FTI_Recover();
+
+ //Update checkpoint information
+ if (res != 0) {
+ exit(1);
+ }
+ else { // Update ckpt. id & level
+ level = (level+1)%5;
+ id++;
+ }
+
+ //Cast char* to stringstream
+ locDom_ser.str(""); //reset the stringstream
+ locDom_ser.str(buffer_locDom_ser);
+
+ //Deserialization
+ Domain *tmp;
+ tmp = load();
+
+ //Set the used by simulation object
+ delete locDom;
+ locDom = NULL;
+ locDom = tmp;
+ }
+
+//---------------------------------------------------------------------------------------------------------------------//
+ if (!myRank)
+ std::cout << "-- Start of the main loop --\n";
+ while((locDom->time() < locDom->stoptime()) && (locDom->cycle() < opts.its)) {
TimeIncrement(*locDom) ;
LagrangeLeapFrog(*locDom) ;
@@ -2777,6 +2866,26 @@ int main(int argc, char *argv[])
printf("cycle = %d, time = %e, dt=%e\n",
locDom->cycle(), double(locDom->time()), double(locDom->deltatime()) ) ;
}
+
+ //Checkpoint at ITER_CKPT
+ if((locDom->cycle()%ITER_CKPT) == 0 && locDom->cycle() != opts.its){
+
+ //Serialization of locDom in std::stringstream
+ locDom_ser.str("");
+ save(locDom);
+
+ //Cast std::stringstream -> char*
+ std::string tmp = locDom_ser.str();
+ buffer_locDom_ser[0] = '\0'; //reset the buffer
+ strcpy(buffer_locDom_ser, tmp.c_str());
+
+ res = FTI_Checkpoint(id, level);
+ // sleep(3); //for the tests
+ if(res != 0){
+ id++;
+ level= (level%4)+1;
+ }
+ }
}
// Use reduced max elapsed time
@@ -2791,7 +2900,7 @@ int main(int argc, char *argv[])
double elapsed_timeG;
#if USE_MPI
MPI_Reduce(&elapsed_time, &elapsed_timeG, 1, MPI_DOUBLE,
- MPI_MAX, 0, MPI_COMM_WORLD);
+ MPI_MAX, 0, FTI_COMM_WORLD);
#else
elapsed_timeG = elapsed_time;
#endif
@@ -2806,6 +2915,7 @@ int main(int argc, char *argv[])
}
#if USE_MPI
+ FTI_Finalize();
MPI_Finalize() ;
#endif
diff --git a/LULESH/lulesh.h b/FTI_LULESH/lulesh.h
index b6afd5c..1ca6a59 100644
--- a/LULESH/lulesh.h
+++ b/FTI_LULESH/lulesh.h
@@ -24,6 +24,16 @@
#include <math.h>
#include <vector>
+//********************
+// Boost Serialization
+//********************
+#include <boost/serialization/vector.hpp>
+#include <iostream>
+#include <fstream>
+#if _OPENMP
+#include <omp.h>
+#endif
+
//**************************************************
// Allow flexibility for arithmetic representations
//**************************************************
@@ -133,6 +143,27 @@ class Domain {
Index_t rowLoc, Index_t planeLoc,
Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost);
+ Domain () :
+ m_e_cut(Real_t(1.0e-7)),
+ m_p_cut(Real_t(1.0e-7)),
+ m_q_cut(Real_t(1.0e-7)),
+ m_v_cut(Real_t(1.0e-10)),
+ m_u_cut(Real_t(1.0e-7)),
+ m_hgcoef(Real_t(3.0)),
+ m_ss4o3(Real_t(4.0)/Real_t(3.0)),
+ m_qstop(Real_t(1.0e+12)),
+ m_monoq_max_slope(Real_t(1.0)),
+ m_monoq_limiter_mult(Real_t(2.0)),
+ m_qlc_monoq(Real_t(0.5)),
+ m_qqc_monoq(Real_t(2.0)/Real_t(3.0)),
+ m_qqc(Real_t(2.0)),
+ m_eosvmax(Real_t(1.0e+9)),
+ m_eosvmin(Real_t(1.0e-9)),
+ m_pmin(Real_t(0.)),
+ m_emin(Real_t(-1.0e+15)),
+ m_dvovmax(Real_t(0.1)),
+ m_refdens(Real_t(1.0)) {};
+
//
// ALLOCATION
//
@@ -423,6 +454,243 @@ class Domain {
void SetupElementConnectivities(Int_t edgeElems);
void SetupBoundaryConditions(Int_t edgeElems);
+ friend class boost::serialization::access;
+ template <typename Archive>
+ void serialize(Archive &ar, const unsigned int version){
+
+
+ //Check de/serialization
+ // if(Archive::is_loading::value){
+ // std::cout << "-------------------------\n";
+ // std::cout << "Start of deserialization.\n";
+ // std::cout << "-------------------------\n";
+ // }
+ // else {
+ // std::cout << "-------------------------\n";
+ // std::cout << "Start of serialization.\n";
+ // std::cout << "-------------------------\n";
+ // }
+
+ ar & m_x ; /* coordinates */
+ ar & m_y;
+ ar & m_z;
+
+ ar & m_xd ; /* velocities */
+ ar & m_yd ;
+ ar & m_zd ;
+
+ ar & m_xdd ; /* accelerations */
+ ar & m_ydd ;
+ ar & m_zdd ;
+
+ ar & m_fx ; /* forces */
+ ar & m_fy ;
+ ar & m_fz ;
+
+ ar & m_nodalMass ; /* mass */
+
+ ar & m_symmX ; /* symmetry plane nodesets */
+ ar & m_symmY ;
+ ar & m_symmZ ;
+
+ // Element-centered
+
+ ar & m_numRanks ;
+ ar & m_colLoc ;
+ ar & m_rowLoc ;
+ ar & m_planeLoc ;
+ ar & m_tp ;
+
+ ar & m_sizeX ;
+ ar & m_sizeY ;
+ ar & m_sizeZ ;
+ ar & m_numElem ;
+ ar & m_numNode ;
+
+ ar & m_maxPlaneSize ;
+ ar & m_maxEdgeSize ;
+
+ // Region information
+ ar & m_numReg ;
+ ar & m_cost; //imbalance cost
+
+ if(Archive::is_loading::value){
+ m_regElemSize = new Index_t[m_numReg];
+ }
+ ar & boost::serialization::make_array <Index_t> (m_regElemSize, m_numReg); // Size of region sets
+
+ if(Archive::is_loading::value){
+ m_regNumList = new Index_t[m_numElem];
+ }
+ ar & boost::serialization::make_array <Index_t> (m_regNumList, m_numElem); // Region number per domain element
+
+ if(Archive::is_loading::value){
+ m_regElemlist = new Index_t*[m_numReg];
+ for (int i = 0; i < m_numReg; i++){
+ m_regElemlist[i] = new Index_t[m_regElemSize[i]];
+ }
+ }
+
+ for (int i = 0; i < m_numReg; i++){
+ ar & boost::serialization::make_array <Index_t> (m_regElemlist[i], m_regElemSize[i]);
+ }
+
+ ar & m_nodelist ; /* elemToNode connectivity */
+
+ ar & m_lxim ; /* element connectivity across each face */
+ ar & m_lxip ;
+ ar & m_letam ;
+ ar & m_letap ;
+ ar & m_lzetam ;
+ ar & m_lzetap ;
+
+ ar & m_elemBC ; /* symmetry/free-surface flags for each elem face */
+
+ ar & m_dxx ; /* principal strains -- temporary */
+ ar & m_dyy ;
+ ar & m_dzz ;
+
+ ar & m_delv_xi ; /* velocity gradient -- temporary */
+ ar & m_delv_eta ;
+ ar & m_delv_zeta ;
+
+ ar & m_delx_xi ; /* coordinate gradient -- temporary */
+ ar & m_delx_eta ;
+ ar & m_delx_zeta ;
+
+ ar & m_e ; /* energy */
+
+ ar & m_p ; /* pressure */
+ ar & m_q ; /* q */
+ ar & m_ql ; /* linear term for q */
+ ar & m_qq ; /* quadratic term for q */
+
+ ar & m_v ; /* relative volume */
+ ar & m_volo ; /* reference volume */
+ ar & m_vnew ; /* new relative volume -- temporary */
+ ar & m_delv ; /* m_vnew - m_v */
+ ar & m_vdov ; /* volume derivative over volume */
+
+ ar & m_arealg ; /* characteristic length of an element */
+
+ ar & m_ss ; /* "sound speed" */
+
+ ar & m_elemMass ; /* mass */
+
+ // Cutoffs (treat as constants)
+ ar & const_cast<Real_t &>(m_e_cut);
+ ar & const_cast<Real_t &>(m_p_cut);
+ ar & const_cast<Real_t &>(m_q_cut);
+ ar & const_cast<Real_t &>(m_v_cut);
+ ar & const_cast<Real_t &>(m_u_cut);
+
+ // Other constants (usually setable, but hardcoded in this proxy app)
+ ar & const_cast<Real_t &>(m_hgcoef);
+ ar & const_cast<Real_t &>(m_ss4o3);
+ ar & const_cast<Real_t &>(m_qstop);
+ ar & const_cast<Real_t &>(m_monoq_max_slope);
+ ar & const_cast<Real_t &>(m_monoq_limiter_mult);
+ ar & const_cast<Real_t &>(m_qlc_monoq);
+ ar & const_cast<Real_t &>(m_qqc_monoq);
+ ar & const_cast<Real_t &>(m_qqc);
+ ar & const_cast<Real_t &>(m_eosvmax);
+ ar & const_cast<Real_t &>(m_eosvmin);
+ ar & const_cast<Real_t &>(m_pmin);
+ ar & const_cast<Real_t &>(m_emin);
+ ar & const_cast<Real_t &>(m_dvovmax);
+ ar & const_cast<Real_t &>(m_refdens);
+
+ // Variables to keep track of timestep, simulation time, and cycle
+ ar & m_dtcourant ; // courant constraint
+ ar & m_dthydro ; // volume change constraint
+ ar & m_cycle ; // iteration count for simulation
+ ar & m_dtfixed ; // fixed time increment
+ ar & m_time ; // current time
+ ar & m_deltatime ; // variable time increment
+ ar & m_deltatimemultlb ;
+ ar & m_deltatimemultub ;
+ ar & m_dtmax ; // maximum allowable time increment
+ ar & m_stoptime ; // end time for simulation
+
+ // OMP hack
+ #if _OPENMP
+ Index_t numthreads = omp_get_max_threads();
+ #else
+ Index_t numthreads = 1;
+ #endif
+
+ if (numthreads > 1) {
+ if(Archive::is_loading::value){
+ m_nodeElemStart = new Index_t[m_numNode+1];
+ }
+ ar & boost::serialization::make_array <Index_t> (m_nodeElemStart, m_numNode+1);
+
+ if(Archive::is_loading::value){
+ m_nodeElemCornerList = new Index_t[m_nodeElemStart[m_numNode]];
+ }
+ ar & boost::serialization::make_array <Index_t> (m_nodeElemCornerList, m_nodeElemStart[m_numNode]);
+ } else {
+ m_nodeElemStart = NULL;
+ m_nodeElemCornerList = NULL;
+ }
+
+ // Used in setup
+ ar & m_rowMin;
+ ar & m_rowMax;
+ ar & m_colMin;
+ ar & m_colMax;
+ ar & m_planeMin;
+ ar & m_planeMax;
+
+ #if USE_MPI
+ // account for face communication
+ Index_t comBufSize =
+ (m_rowMin + m_rowMax + m_colMin + m_colMax + m_planeMin + m_planeMax) *
+ m_maxPlaneSize * MAX_FIELDS_PER_MPI_COMM ;
+
+ // account for edge communication
+ comBufSize +=
+ ((m_rowMin & m_colMin) + (m_rowMin & m_planeMin) + (m_colMin & m_planeMin) +
+ (m_rowMax & m_colMax) + (m_rowMax & m_planeMax) + (m_colMax & m_planeMax) +
+ (m_rowMax & m_colMin) + (m_rowMin & m_planeMax) + (m_colMin & m_planeMax) +
+ (m_rowMin & m_colMax) + (m_rowMax & m_planeMin) + (m_colMax & m_planeMin)) *
+ m_maxEdgeSize * MAX_FIELDS_PER_MPI_COMM ;
+
+ // account for corner communication
+ // factor of 16 is so each buffer has its own cache line
+ comBufSize += ((m_rowMin & m_colMin & m_planeMin) +
+ (m_rowMin & m_colMin & m_planeMax) +
+ (m_rowMin & m_colMax & m_planeMin) +
+ (m_rowMin & m_colMax & m_planeMax) +
+ (m_rowMax & m_colMin & m_planeMin) +
+ (m_rowMax & m_colMin & m_planeMax) +
+ (m_rowMax & m_colMax & m_planeMin) +
+ (m_rowMax & m_colMax & m_planeMax)) * CACHE_COHERENCE_PAD_REAL ;
+
+
+ // Communication Work space
+ if(Archive::is_loading::value){
+ commDataSend = new Real_t[comBufSize];
+ commDataRecv = new Real_t[comBufSize];
+ }
+ ar & boost::serialization::make_array <Real_t> (commDataRecv,comBufSize);
+ ar & boost::serialization::make_array <Real_t> (commDataSend,comBufSize);
+
+ #endif
+
+ //Check de/serialization
+ // if(Archive::is_loading::value){
+ // std::cout << "-------------------------\n";
+ // std::cout << "Deserialization finished.\n";
+ // std::cout << "-------------------------\n";
+ // }
+ // else {
+ // std::cout << "-------------------------\n";
+ // std::cout << "Serialization finished.\n";
+ // std::cout << "-------------------------\n";
+ // }
+ }
+
//
// IMPLEMENTATION
//