Chaste  Release::3.4
Hdf5DataWriter.cpp
1 /*
2 
3 Copyright (c) 2005-2016, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 /*
37  * Implementation file for Hdf5DataWriter class.
38  *
39  */
40 #include <set>
41 #include <cstring> //For strcmp etc. Needed in gcc-4.4
42 #include <boost/scoped_array.hpp>
43 
44 #include "Hdf5DataWriter.hpp"
45 
46 #include "Exception.hpp"
47 #include "OutputFileHandler.hpp"
48 #include "PetscTools.hpp"
49 #include "Version.hpp"
50 #include "MathsCustomFunctions.hpp"
51 
53  const std::string& rDirectory,
54  const std::string& rBaseName,
55  bool cleanDirectory,
56  bool extendData,
57  std::string datasetName)
58  : AbstractHdf5Access(rDirectory, rBaseName, datasetName),
59  mrVectorFactory(rVectorFactory),
60  mCleanDirectory(cleanDirectory),
61  mUseExistingFile(extendData),
62  mIsInDefineMode(true),
63  mIsFixedDimensionSet(false),
64  mEstimatedUnlimitedLength(1u),
65  mFileFixedDimensionSize(0u),
66  mDataFixedDimensionSize(0u),
67  mLo(mrVectorFactory.GetLow()),
68  mHi(mrVectorFactory.GetHigh()),
69  mNumberOwned(0u),
70  mOffset(0u),
71  mNeedExtend(false),
72  mUseMatrixForIncompleteData(false),
73  mCurrentTimeStep(0),
74  mSinglePermutation(NULL),
75  mDoublePermutation(NULL),
76  mSingleIncompleteOutputMatrix(NULL),
77  mDoubleIncompleteOutputMatrix(NULL),
78  mUseOptimalChunkSizeAlgorithm(true),
79  mNumberOfChunks(0u)
80 {
81  mChunkSize[0] = 0;
82  mChunkSize[1] = 0;
83  mChunkSize[2] = 0;
84  mFixedChunkSize[0] = 0;
85  mFixedChunkSize[1] = 0;
86  mFixedChunkSize[2] = 0;
87 
89  {
90  EXCEPTION("You are asking to delete a file and then extend it, change arguments to constructor.");
91  }
92 
93  if (!mUseExistingFile && mDatasetName != "Data")
94  {
95  //User is trying to add a new dataset, but they are not extending a existing file
96  EXCEPTION("Adding new data only makes sense when extending an existing file");
97  }
98 
99  if (mUseExistingFile)
100  {
101  // Variables should already be defined if we are extending.
102  mIsInDefineMode = false;
103 
104  // If the file exists already, open it - this call will check it exists.
105  OpenFile();
106 
107  // If the dataset we are interested in doesn't exist then close the file
108  // We will go on to define variables and open the file as usual (except for it pre-existing).
110  {
111  //std::cout << "Dataset: " << mDatasetName << " doesn't exist in the file.\n";
112  H5Fclose(mFileId);
113  mIsInDefineMode = true;
114  }
115  // If dataset does exist then leave file open and try to extend it.
116  else
117  {
118  // Where to find the file
119  assert(mCleanDirectory==false);
120 
121  mVariablesDatasetId = H5Dopen(mFileId, mDatasetName.c_str());
122  hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
123  //unsigned variables_dataset_rank = H5Sget_simple_extent_ndims(variables_dataspace);
124  hsize_t dataset_max_sizes[DATASET_DIMS];
125  H5Sget_simple_extent_dims(variables_dataspace, mDatasetDims, dataset_max_sizes); // put dims into mDatasetDims
126  H5Sclose(variables_dataspace);
127 
128  // Check that an unlimited dimension is defined
129  if (dataset_max_sizes[0] != H5S_UNLIMITED)
130  {
131  H5Dclose(mVariablesDatasetId);
132  H5Fclose(mFileId);
133  EXCEPTION("Tried to open a datafile for extending which doesn't have an unlimited dimension.");
134  }
136 
137  // Sanity check other dimension sizes
138  for (unsigned i=1; i<DATASET_DIMS; i++) // Zero is excluded since it is unlimited
139  {
140  assert(mDatasetDims[i] == dataset_max_sizes[i]);
141  }
143  mIsFixedDimensionSet = true;
144  mVariables.reserve(mDatasetDims[2]);
145 
146  // Figure out what the variables are
147  hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
148  hid_t attribute_type = H5Aget_type(attribute_id);
149  hid_t attribute_space = H5Aget_space(attribute_id);
150  hsize_t attr_dataspace_dim;
151  H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, NULL);
152  unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
153  assert(num_columns == mDatasetDims[2]); // I think...
154 
155  char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
156  H5Aread(attribute_id, attribute_type, string_array);
157 
158  // Loop over columns/variables
159  for (unsigned index=0; index<num_columns; index++)
160  {
161  // Read the string from the raw vector
162  std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
163 
164  // Find location of unit name
165  size_t name_length = column_name_unit.find('(');
166  size_t unit_length = column_name_unit.find(')') - name_length - 1;
167 
168  // Create the variable
169  DataWriterVariable var;
170  var.mVariableName = column_name_unit.substr(0, name_length);
171  var.mVariableUnits = column_name_unit.substr(name_length+1, unit_length);
172  mVariables.push_back(var);
173  }
174 
175  // Free memory, release ids
176  free(string_array);
177  H5Tclose(attribute_type);
178  H5Sclose(attribute_space);
179  H5Aclose(attribute_id);
180 
181  // Now deal with time
183 
184  // How many time steps have been written so far?
185  hid_t timestep_dataspace = H5Dget_space(mUnlimitedDatasetId);
186  hsize_t num_timesteps;
187  H5Sget_simple_extent_dims(timestep_dataspace, &num_timesteps, NULL);
188  H5Sclose(timestep_dataspace);
189  mCurrentTimeStep = (long)num_timesteps - 1;
190 
191  // Incomplete data?
192  attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
193  if (attribute_id < 0)
194  {
195  #define COVERAGE_IGNORE
196  // Old format, before we added the attribute.
197  EXCEPTION("Extending old-format files isn't supported.");
198  #undef COVERAGE_IGNORE
199  }
200  else
201  {
202  attribute_type = H5Aget_type(attribute_id);
203  attribute_space = H5Aget_space(attribute_id);
204  unsigned is_data_complete;
205  H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
206  mIsDataComplete = (is_data_complete == 1) ? true : false;
207  H5Tclose(attribute_type);
208  H5Sclose(attribute_space);
209  H5Aclose(attribute_id);
210  }
211  if (mIsDataComplete)
212  {
214  mOffset = mLo;
216  }
217  else
218  {
219  // Read which nodes appear in the file (mIncompleteNodeIndices)
220  attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
221  attribute_type = H5Aget_type(attribute_id);
222  attribute_space = H5Aget_space(attribute_id);
223 
224  // Get the dataset/dataspace dimensions
225  unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
226 
227  // Read data from hyperslab in the file into the hyperslab in memory
228  mIncompleteNodeIndices.clear();
229  mIncompleteNodeIndices.resize(num_node_indices);
230  H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
231 
232  // Release ids
233  H5Tclose(attribute_type);
234  H5Sclose(attribute_space);
235  H5Aclose(attribute_id);
236 
237  // Set up what data we can
243  mDataFixedDimensionSize = UINT_MAX;
244  H5Dclose(mVariablesDatasetId);
245  H5Dclose(mUnlimitedDatasetId);
246  H5Fclose(mFileId);
247  EXCEPTION("Unable to extend an incomplete data file at present.");
248  }
249 
250  // Done
252  }
253  }
254 }
255 
257 {
258  Close();
259 
260  if (mSinglePermutation)
261  {
263  }
264  if (mDoublePermutation)
265  {
267  }
269  {
271  }
273  {
275  }
276 }
277 
279 {
280  OutputFileHandler output_file_handler(mDirectory, mCleanDirectory);
281  std::string file_name = mDirectory.GetAbsolutePath() + mBaseName + ".h5";
282 
283  if (mUseExistingFile)
284  {
285  FileFinder h5_file(file_name, RelativeTo::Absolute);
286  if (!h5_file.Exists())
287  {
288  EXCEPTION("Hdf5DataWriter could not open " + file_name + " , as it does not exist.");
289  }
290  }
291 
292  // Set up a property list saying how we'll open the file
293  hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
294  H5Pset_fapl_mpio(fapl, PETSC_COMM_WORLD, MPI_INFO_NULL);
295 
296  // Set size of each dimension in main dataset.
297  mDatasetDims[0] = mEstimatedUnlimitedLength; // While developing we got a non-documented "only the first dimension can be extendible" error.
298  mDatasetDims[1] = mFileFixedDimensionSize; // or should this be mDataFixedDimensionSize?
299  mDatasetDims[2] = mVariables.size();
300 
301  // Open the file and free the property list
302  std::string attempting_to;
303  if (mUseExistingFile)
304  {
305  mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDWR, fapl);
306  attempting_to = "open";
307  }
308  else
309  {
310  // Do chunk size calculation now as it lets us optimise the size of the B tree (via H5Pset_istore_k), see:
311  // http://hdf-forum.184993.n3.nabble.com/size-of-quot-write-operation-quot-with-pHDF5-td2636129.html#a2647633
312  SetChunkSize();
313  hid_t fcpl = H5Pcreate(H5P_FILE_CREATE);
314  if (mNumberOfChunks>64) // Default parameter is 32, so don't go lower than that
315  {
316  H5Pset_istore_k(fcpl, (mNumberOfChunks+1)/2);
317  }
318  /*
319  * Align objects to disk block size. Useful for striped filesystem e.g. Lustre
320  * Ideally this should match the chunk size (see "target_size_in_bytes" below).
321  */
322  // H5Pset_alignment(fapl, 0, 1024*1024); // Align to 1 M blocks
323 
324  /*
325  * The stripe size can be set on the directory just before creation of the H5
326  * file, and set back to normal just after, so that the H5 file inherits these
327  * settings, by uncommenting the lines below. Adjust the command for your
328  * specific filesystem!
329  */
330  /*
331  std::string command;
332  // Turn on striping for directory, which the newly created HDF5 file will inherit.
333  if ( PetscTools::AmMaster() )
334  {
335  // Increase stripe count
336  command = "lfs setstripe --size 1M --count -1 "; // -1 means use all OSTs
337  command.append(mDirectory.GetAbsolutePath());
338  std::cout << command << std::endl;
339  system(command.c_str());
340  }
341  */
342 
343  // Create file
344  mFileId = H5Fcreate(file_name.c_str(), H5F_ACC_TRUNC, fcpl, fapl);
345 
346  /*
347  // Turn off striping so other output files stay unstriped.
348  if ( PetscTools::AmMaster() )
349  {
350  // Use one stripe
351  command = "lfs setstripe --size 1M --count 1 ";
352  command.append(mDirectory.GetAbsolutePath());
353  std::cout << command << std::endl;
354  system(command.c_str());
355  }
356  PetscTools::Barrier(); // Don't let other processes run away
357  */
358 
359  attempting_to = "create";
360  H5Pclose(fcpl);
361  }
362 
363  H5Pclose(fapl);
364 
365  if (mFileId < 0)
366  {
368  EXCEPTION("Hdf5DataWriter could not " << attempting_to << " " << file_name <<
369  " , H5F" << attempting_to << " error code = " << mFileId);
370  }
371 }
372 
373 void Hdf5DataWriter::DefineFixedDimension(long dimensionSize)
374 {
375  if (!mIsInDefineMode)
376  {
377  EXCEPTION("Cannot define variables when not in Define mode");
378  }
379  if (dimensionSize < 1)
380  {
381  EXCEPTION("Fixed dimension must be at least 1 long");
382  }
384  {
385  EXCEPTION("Fixed dimension already set");
386  }
387 
388  // Work out the ownership details
392  mOffset = mLo;
393  mFileFixedDimensionSize = dimensionSize;
394  mDataFixedDimensionSize = dimensionSize;
395  mIsFixedDimensionSet = true;
396 }
397 
398 void Hdf5DataWriter::DefineFixedDimension(const std::vector<unsigned>& rNodesToOuput, long vecSize)
399 {
400  unsigned vector_size = rNodesToOuput.size();
401 
402  for (unsigned index=0; index < vector_size-1; index++)
403  {
404  if (rNodesToOuput[index] >= rNodesToOuput[index+1])
405  {
406  EXCEPTION("Input should be monotonic increasing");
407  }
408  }
409 
410  if ((int) rNodesToOuput.back() >= vecSize)
411  {
412  EXCEPTION("Vector size doesn't match nodes to output");
413  }
414 
415  DefineFixedDimension(vecSize);
416 
417  mFileFixedDimensionSize = vector_size;
418  mIsDataComplete = false;
419  mIncompleteNodeIndices = rNodesToOuput;
421 }
422 
423 void Hdf5DataWriter::DefineFixedDimensionUsingMatrix(const std::vector<unsigned>& rNodesToOuput, long vecSize)
424 {
425  unsigned vector_size = rNodesToOuput.size();
426 
427  for (unsigned index=0; index < vector_size-1; index++)
428  {
429  if (rNodesToOuput[index] >= rNodesToOuput[index+1])
430  {
431  EXCEPTION("Input should be monotonic increasing");
432  }
433  }
434 
435  if ((int) rNodesToOuput.back() >= vecSize)
436  {
437  EXCEPTION("Vector size doesn't match nodes to output");
438  }
439 
440  DefineFixedDimension(vecSize);
441 
442  mFileFixedDimensionSize = vector_size;
443  mIsDataComplete = false;
444  mIncompleteNodeIndices = rNodesToOuput;
447 
448  // Make sure we've not done it already
449  assert(mSingleIncompleteOutputMatrix == NULL);
450  assert(mDoubleIncompleteOutputMatrix == NULL);
453 
454  // Only do local rows
455  for (unsigned row_index = mOffset; row_index < mOffset + mNumberOwned; row_index++)
456  {
457  // Put zero on the diagonal
458  MatSetValue(mSingleIncompleteOutputMatrix, row_index, row_index, 0.0, INSERT_VALUES);
459 
460  // Put one at (i,j)
461  MatSetValue(mSingleIncompleteOutputMatrix, row_index, rNodesToOuput[row_index], 1.0, INSERT_VALUES);
462 
463  unsigned bi_index = 2*row_index;
464  unsigned perm_index = 2*rNodesToOuput[row_index];
465 
466  // Put zeroes on the diagonal
467  MatSetValue(mDoubleIncompleteOutputMatrix, bi_index, bi_index, 0.0, INSERT_VALUES);
468  MatSetValue(mDoubleIncompleteOutputMatrix, bi_index+1, bi_index+1, 0.0, INSERT_VALUES);
469 
470  // Put ones at (i,j)
471  MatSetValue(mDoubleIncompleteOutputMatrix, bi_index, perm_index, 1.0, INSERT_VALUES);
472  MatSetValue(mDoubleIncompleteOutputMatrix, bi_index+1, perm_index+1, 1.0, INSERT_VALUES);
473  }
474  MatAssemblyBegin(mSingleIncompleteOutputMatrix, MAT_FINAL_ASSEMBLY);
475  MatAssemblyBegin(mDoubleIncompleteOutputMatrix, MAT_FINAL_ASSEMBLY);
476  MatAssemblyEnd(mSingleIncompleteOutputMatrix, MAT_FINAL_ASSEMBLY);
477  MatAssemblyEnd(mDoubleIncompleteOutputMatrix, MAT_FINAL_ASSEMBLY);
478 
479 // MatView(mSingleIncompleteOutputMatrix, PETSC_VIEWER_STDOUT_WORLD);
480 }
481 
483 {
484  mOffset = 0;
485  mNumberOwned = 0;
486  for (unsigned i=0; i<mIncompleteNodeIndices.size(); i++)
487  {
488  if (mIncompleteNodeIndices[i] < mLo)
489  {
490  mOffset++;
491  }
492  else if (mIncompleteNodeIndices[i] < mHi)
493  {
494  mNumberOwned++;
495  }
496  }
497 }
498 
499 int Hdf5DataWriter::DefineVariable(const std::string& rVariableName,
500  const std::string& rVariableUnits)
501 {
502  if (!mIsInDefineMode)
503  {
504  EXCEPTION("Cannot define variables when not in Define mode");
505  }
506 
507  CheckVariableName(rVariableName);
508  CheckUnitsName(rVariableUnits);
509 
510  // Check for the variable being already defined
511  for (unsigned index=0; index<mVariables.size(); index++)
512  {
513  if (mVariables[index].mVariableName == rVariableName)
514  {
515  EXCEPTION("Variable name already exists");
516  }
517  }
518 
519  DataWriterVariable new_variable;
520  new_variable.mVariableName = rVariableName;
521  new_variable.mVariableUnits = rVariableUnits;
522  int variable_id;
523 
524  // Add the variable to the variable vector
525  mVariables.push_back(new_variable);
526 
527  // Use the index of the variable vector as the variable ID.
528  // This is ok since there is no way to remove variables.
529  variable_id = mVariables.size() - 1;
530 
531  return variable_id;
532 }
533 
534 void Hdf5DataWriter::CheckVariableName(const std::string& rName)
535 {
536  if (rName.length() == 0)
537  {
538  EXCEPTION("Variable name not allowed: may not be blank.");
539  }
540  CheckUnitsName(rName);
541 }
542 
543 void Hdf5DataWriter::CheckUnitsName(const std::string& rName)
544 {
545  for (unsigned i=0; i<rName.length(); i++)
546  {
547  if (!isalnum(rName[i]) && !(rName[i]=='_'))
548  {
549  std::string error = "Variable name/units '" + rName + "' not allowed: may only contain alphanumeric characters or '_'.";
550  EXCEPTION(error);
551  }
552  }
553 }
554 
556 {
557  return mIsInDefineMode;
558 }
559 
561 {
562  // Check that at least one variable has been defined
563  if (mVariables.size() < 1)
564  {
565  EXCEPTION("Cannot end define mode. No variables have been defined.");
566  }
567 
568  // Check that a fixed dimension has been defined
569  if (mIsFixedDimensionSet == false)
570  {
571  EXCEPTION("Cannot end define mode. One fixed dimension should be defined.");
572  }
573 
574  OpenFile();
575 
576  mIsInDefineMode = false;
577 
578  /*
579  * Create "Data" dataset
580  */
581 
582  // Set max dims of dataset
583  hsize_t dataset_max_dims[DATASET_DIMS];
585  {
586  dataset_max_dims[0] = H5S_UNLIMITED;
587  }
588  else
589  {
590  dataset_max_dims[0] = 1;
591  }
592  dataset_max_dims[1] = mDatasetDims[1];
593  dataset_max_dims[2] = mDatasetDims[2];
594 
595  // If we didn't already do the chunk calculation (e.g. we're adding a dataset to an existing H5 file)
596  if (mNumberOfChunks==0)
597  {
598  SetChunkSize();
599  }
600 
601  // Create chunked dataset and clean up
602  hid_t cparms = H5Pcreate (H5P_DATASET_CREATE);
603  H5Pset_chunk( cparms, DATASET_DIMS, mChunkSize);
604  hid_t filespace = H5Screate_simple(DATASET_DIMS, mDatasetDims, dataset_max_dims);
605  mVariablesDatasetId = H5Dcreate(mFileId, mDatasetName.c_str(), H5T_NATIVE_DOUBLE, filespace, cparms);
606  SetMainDatasetRawChunkCache(); // Set large cache (even though parallel drivers don't currently use it!)
607  H5Sclose(filespace);
608  H5Pclose(cparms);
609 
610  // Create dataspace for the name, unit attribute
611  const unsigned MAX_STRING_SIZE = 100;
612  hsize_t columns[1] = {mVariables.size()};
613  hid_t colspace = H5Screate_simple(1, columns, NULL);
614 
615  // Create attribute for variable names
616  char* col_data = (char*) malloc(mVariables.size() * sizeof(char) * MAX_STRING_SIZE);
617 
618  char* col_data_offset = col_data;
619  for (unsigned var=0; var<mVariables.size(); var++)
620  {
621  std::string full_name = mVariables[var].mVariableName + "(" + mVariables[var].mVariableUnits + ")";
622  strcpy (col_data_offset, full_name.c_str());
623  col_data_offset += sizeof(char) * MAX_STRING_SIZE;
624  }
625 
626  // Create the type 'string'
627  hid_t string_type = H5Tcopy(H5T_C_S1);
628  H5Tset_size(string_type, MAX_STRING_SIZE );
629  hid_t attr = H5Acreate(mVariablesDatasetId, "Variable Details", string_type, colspace, H5P_DEFAULT);
630 
631  // Write to the attribute
632  H5Awrite(attr, string_type, col_data);
633 
634  // Close dataspace & attribute
635  free(col_data);
636  H5Sclose(colspace);
637  H5Aclose(attr);
638 
639  // Create "boolean" attribute telling the data to be incomplete or not
640  columns[0] = 1;
641  colspace = H5Screate_simple(1, columns, NULL);
642  attr = H5Acreate(mVariablesDatasetId, "IsDataComplete", H5T_NATIVE_UINT, colspace, H5P_DEFAULT);
643 
644  // Write to the attribute - note that the native boolean is not predictable
645  unsigned is_data_complete = mIsDataComplete ? 1 : 0;
646  H5Awrite(attr, H5T_NATIVE_UINT, &is_data_complete);
647 
648  H5Sclose(colspace);
649  H5Aclose(attr);
650 
651  if (!mIsDataComplete)
652  {
653  // We need to write a map
654  // Create "unsigned" attribute with the map
655  columns[0] = mFileFixedDimensionSize;
656  colspace = H5Screate_simple(1, columns, NULL);
657  attr = H5Acreate(mVariablesDatasetId, "NodeMap", H5T_NATIVE_UINT, colspace, H5P_DEFAULT);
658 
659  // Write to the attribute
660  H5Awrite(attr, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
661 
662  H5Sclose(colspace);
663  H5Aclose(attr);
664  }
665 
666  /*
667  * Create "Time" dataset
668  */
670  {
671  hsize_t time_dataset_dims[1] = {mEstimatedUnlimitedLength};
672  hsize_t time_dataset_max_dims[1] = {H5S_UNLIMITED};
673 
674  /*
675  * Modify dataset creation properties to enable chunking. Set the chunk size in the "Time"
676  * dataset to 128 doubles, i.e. 1 KiB. See #2336.
677  */
678  hsize_t time_chunk_dims[1] = {128u};
679  hid_t time_cparms = H5Pcreate (H5P_DATASET_CREATE);
680  H5Pset_chunk( time_cparms, 1, time_chunk_dims);
681 
682  hid_t time_filespace = H5Screate_simple(1, time_dataset_dims, time_dataset_max_dims);
683 
684  // Create the unlimited dimension dataset
685 
686  // * Files post r18257 (inc. Release 3.2 onwards) use "<DatasetName>_Unlimited" for "<DatasetName>"'s
687  // unlimited variable,
688  // - a new attribute "Name" has been added to the Unlimited Dataset to allow it to assign
689  // any name to the unlimited variable. Which can then be easily read by Hdf5DataReader.
690 
691  mUnlimitedDatasetId = H5Dcreate(mFileId, (mDatasetName + "_Unlimited").c_str(), H5T_NATIVE_DOUBLE, time_filespace, time_cparms);
692 
693  // Create the dataspace for the attribute
694  hsize_t one = 1;
695  hid_t one_column_space = H5Screate_simple(1, &one, NULL);
696 
697  // Create an attribute for the time unit
698  hid_t unit_attr = H5Acreate(mUnlimitedDatasetId, "Unit", string_type, one_column_space, H5P_DEFAULT);
699 
700  // Create an attribute for the time name
701  hid_t name_attr = H5Acreate(mUnlimitedDatasetId, "Name", string_type, one_column_space, H5P_DEFAULT);
702 
703  // Copy the unit to a string MAX_STRING_SIZE long and write it
704  char unit_string[MAX_STRING_SIZE];
705  strcpy(unit_string, mUnlimitedDimensionUnit.c_str());
706  H5Awrite(unit_attr, string_type, unit_string);
707 
708  // Copy the unit to a string MAX_STRING_SIZE long and write it
709  char name_string[MAX_STRING_SIZE];
710  strcpy(name_string, mUnlimitedDimensionName.c_str());
711  H5Awrite(name_attr, string_type, name_string);
712 
713  // Close the filespace
714  H5Pclose(time_cparms);
715  H5Sclose(one_column_space);
716  H5Aclose(unit_attr);
717  H5Aclose(name_attr);
718  H5Sclose(time_filespace);
719  }
720 
721  /*
722  * Create the provenance attribute
723  */
724 
725  // Create a longer type for 'string'
726  const unsigned MAX_PROVENANCE_STRING_SIZE = 1023;
727  hid_t long_string_type = H5Tcopy(H5T_C_S1);
728  H5Tset_size(long_string_type, MAX_PROVENANCE_STRING_SIZE );
729  hsize_t prov_columns[1] = {1};
730  hid_t provenance_space = H5Screate_simple(1, prov_columns, NULL);
731  char* provenance_data = (char*) malloc(sizeof(char) * MAX_PROVENANCE_STRING_SIZE);
732  assert( ChasteBuildInfo::GetProvenanceString().length() < MAX_PROVENANCE_STRING_SIZE);
733 
734  strcpy(provenance_data, ChasteBuildInfo::GetProvenanceString().c_str());
735  hid_t prov_attr = H5Acreate(mVariablesDatasetId, "Chaste Provenance", long_string_type, provenance_space, H5P_DEFAULT);
736 
737  // Write to the attribute
738  H5Awrite(prov_attr, long_string_type, provenance_data);
739 
740  // Close dataspace & attribute
741  free(provenance_data);
742  H5Sclose(provenance_space);
743  H5Aclose(prov_attr);
744 }
745 
746 void Hdf5DataWriter::PutVector(int variableID, Vec petscVector)
747 {
748  if (mIsInDefineMode)
749  {
750  EXCEPTION("Cannot write data while in define mode.");
751  }
752 
753  int vector_size;
754  VecGetSize(petscVector, &vector_size);
755 
756  if ((unsigned) vector_size != mDataFixedDimensionSize)
757  {
758  EXCEPTION("Vector size doesn't match fixed dimension");
759  }
760 
761  // Make sure that everything is actually extended to the correct dimension.
762  PossiblyExtend();
763 
764  Vec output_petsc_vector;
765 
766  // Decide what to write
767  if (mSinglePermutation == NULL)
768  {
769  // No permutation - just write
770  output_petsc_vector = petscVector;
771  }
772  else
773  {
774  assert(mIsDataComplete);
775  // Make a vector with the same pattern (doesn't copy the data)
776  VecDuplicate(petscVector, &output_petsc_vector);
777 
778  // Apply the permutation matrix
779  MatMult(mSinglePermutation, petscVector, output_petsc_vector);
780  }
781 
782  // Define a dataset in memory for this process
783  hid_t memspace=0;
784  if (mNumberOwned != 0)
785  {
786  hsize_t v_size[1] = {mNumberOwned};
787  memspace = H5Screate_simple(1, v_size, NULL);
788  }
789 
790  // Select hyperslab in the file
791  hsize_t count[DATASET_DIMS] = {1, mNumberOwned, 1};
792  hsize_t offset_dims[DATASET_DIMS] = {mCurrentTimeStep, mOffset, (unsigned)(variableID)};
793  hid_t file_dataspace = H5Dget_space(mVariablesDatasetId);
794 
795  // Create property list for collective dataset
796  hid_t property_list_id = H5Pcreate(H5P_DATASET_XFER);
797  H5Pset_dxpl_mpio(property_list_id, H5FD_MPIO_COLLECTIVE);
798 
799  H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, offset_dims, NULL, count, NULL);
800 
801  double* p_petsc_vector;
802  VecGetArray(output_petsc_vector, &p_petsc_vector);
803 
804  if (mIsDataComplete)
805  {
806  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, file_dataspace, property_list_id, p_petsc_vector);
807  }
808  else
809  {
811  {
812  // Make a vector of the required size
814 
815  // Fill the vector by multiplying complete data by incomplete output matrix
816  MatMult(mSingleIncompleteOutputMatrix, petscVector, output_petsc_vector);
817 
818  double* p_petsc_vector_incomplete;
819  VecGetArray(output_petsc_vector, &p_petsc_vector_incomplete);
820  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, file_dataspace, property_list_id, p_petsc_vector_incomplete);
821  }
822  else
823  {
824  // Make a local copy of the data you own
825  boost::scoped_array<double> local_data(new double[mNumberOwned]);
826  for (unsigned i=0; i<mNumberOwned; i++)
827  {
828  local_data[i] = p_petsc_vector[ mIncompleteNodeIndices[mOffset+i]-mLo ];
829 
830  }
831  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, file_dataspace, property_list_id, local_data.get());
832  }
833  }
834 
835  VecRestoreArray(output_petsc_vector, &p_petsc_vector);
836 
837  H5Pclose(property_list_id);
838  H5Sclose(file_dataspace);
839  if (mNumberOwned !=0)
840  {
841  H5Sclose(memspace);
842  }
843 
844  if (petscVector != output_petsc_vector)
845  {
846  // Free local vector
847  PetscTools::Destroy(output_petsc_vector);
848  }
849 }
850 
851 void Hdf5DataWriter::PutStripedVector(std::vector<int> variableIDs, Vec petscVector)
852 {
853  if (mIsInDefineMode)
854  {
855  EXCEPTION("Cannot write data while in define mode.");
856  }
857 
858  if (variableIDs.size() <= 1)
859  {
860  EXCEPTION("The PutStripedVector method requires at least two variables ID. If only one is needed, use PutVector method instead");
861  }
862 
863  const unsigned NUM_STRIPES=variableIDs.size();
864 
865  int firstVariableID=variableIDs[0];
866 
867  // Currently the method only works with consecutive columns, can be extended if needed
868  for (unsigned i = 1; i < variableIDs.size(); i++)
869  {
870  if (variableIDs[i]-variableIDs[i-1] != 1)
871  {
872  EXCEPTION("Columns should be consecutive. Try reordering them.");
873  }
874  }
875 
876  int vector_size;
877  VecGetSize(petscVector, &vector_size);
878 
879  if ((unsigned) vector_size != NUM_STRIPES*mDataFixedDimensionSize)
880  {
881  EXCEPTION("Vector size doesn't match fixed dimension");
882  }
883 
884  // Make sure that everything is actually extended to the correct dimension
885  PossiblyExtend();
886 
887  Vec output_petsc_vector;
888 
889  // Decide what to write
890  if (mDoublePermutation == NULL)
891  {
892  // No permutation - just write
893  output_petsc_vector = petscVector;
894  }
895  else
896  {
897  assert(mIsDataComplete);
898  // Make a vector with the same pattern (doesn't copy the data)
899  VecDuplicate(petscVector, &output_petsc_vector);
900 
901  // Apply the permutation matrix
902  MatMult(mDoublePermutation, petscVector, output_petsc_vector);
903  }
904  // Define a dataset in memory for this process
905  hid_t memspace=0;
906  if (mNumberOwned !=0)
907  {
908  hsize_t v_size[1] = {mNumberOwned*NUM_STRIPES};
909  memspace = H5Screate_simple(1, v_size, NULL);
910  }
911 
912  // Select hyperslab in the file
913  hsize_t start[DATASET_DIMS] = {mCurrentTimeStep, mOffset, (unsigned)(firstVariableID)};
914  hsize_t stride[DATASET_DIMS] = {1, 1, 1};//we are imposing contiguous variables, hence the stride is 1 (3rd component)
915  hsize_t block_size[DATASET_DIMS] = {1, mNumberOwned, 1};
916  hsize_t number_blocks[DATASET_DIMS] = {1, 1, NUM_STRIPES};
917 
918  hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
919  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, start, stride, number_blocks, block_size);
920 
921  // Create property list for collective dataset write, and write! Finally.
922  hid_t property_list_id = H5Pcreate(H5P_DATASET_XFER);
923  H5Pset_dxpl_mpio(property_list_id, H5FD_MPIO_COLLECTIVE);
924 
925  double* p_petsc_vector;
926  VecGetArray(output_petsc_vector, &p_petsc_vector);
927 
928  if (mIsDataComplete)
929  {
930  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, p_petsc_vector);
931  }
932  else
933  {
934  if (variableIDs.size() < 3) // incomplete data and striped vector is supported only for NUM_STRIPES=2...for the moment
935  {
937  {
938  // Make a vector of the required size
939  output_petsc_vector = PetscTools::CreateVec(2*mFileFixedDimensionSize, 2*mNumberOwned);
940 
941  // Fill the vector by multiplying complete data by incomplete output matrix
942  MatMult(mDoubleIncompleteOutputMatrix, petscVector, output_petsc_vector);
943 
944  double* p_petsc_vector_incomplete;
945  VecGetArray(output_petsc_vector, &p_petsc_vector_incomplete);
946 
947  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, p_petsc_vector_incomplete);
948  }
949  else
950  {
951  // Make a local copy of the data you own
952  boost::scoped_array<double> local_data(new double[mNumberOwned*NUM_STRIPES]);
953  for (unsigned i=0; i<mNumberOwned; i++)
954  {
955  unsigned local_node_number = mIncompleteNodeIndices[mOffset+i] - mLo;
956  local_data[NUM_STRIPES*i] = p_petsc_vector[ local_node_number*NUM_STRIPES ];
957  local_data[NUM_STRIPES*i+1] = p_petsc_vector[ local_node_number*NUM_STRIPES + 1];
958  }
959 
960  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, local_data.get());
961  }
962  }
963  else
964  {
965  EXCEPTION("The PutStripedVector functionality for incomplete data is supported for only 2 stripes");
966  }
967  }
968 
969  VecRestoreArray(output_petsc_vector, &p_petsc_vector);
970 
971  H5Sclose(hyperslab_space);
972  if (mNumberOwned != 0)
973  {
974  H5Sclose(memspace);
975  }
976  H5Pclose(property_list_id);
977 
978  if (petscVector != output_petsc_vector)
979  {
980  // Free local vector
981  PetscTools::Destroy(output_petsc_vector);
982  }
983 }
984 
986 {
987  if (mIsInDefineMode)
988  {
989  EXCEPTION("Cannot write data while in define mode.");
990  }
991 
993  {
994  EXCEPTION("PutUnlimitedVariable() called but no unlimited dimension has been set");
995  }
996 
997  // Make sure that everything is actually extended to the correct dimension.
998  PossiblyExtend();
999 
1000  // This data is only written by the master
1001  if (!PetscTools::AmMaster())
1002  {
1003  return;
1004  }
1005 
1006  hsize_t size[1] = {1};
1007  hid_t memspace = H5Screate_simple(1, size, NULL);
1008 
1009  // Select hyperslab in the file.
1010  hsize_t count[1] = {1};
1011  hsize_t offset[1] = {mCurrentTimeStep};
1012  hid_t hyperslab_space = H5Dget_space(mUnlimitedDatasetId);
1013  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, NULL, count, NULL);
1014 
1015  H5Dwrite(mUnlimitedDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, &value);
1016 
1017  H5Sclose(hyperslab_space);
1018  H5Sclose(memspace);
1019 }
1020 
1022 {
1023  if (mIsInDefineMode)
1024  {
1025  return; // Nothing to do...
1026  }
1027 
1028  H5Dclose(mVariablesDatasetId);
1030  {
1031  H5Dclose(mUnlimitedDatasetId);
1032  }
1033  H5Fclose(mFileId);
1034 
1035  // Cope with being called twice (e.g. if a user calls Close then the destructor)
1036  mIsInDefineMode = true;
1037 }
1038 
1039 void Hdf5DataWriter::DefineUnlimitedDimension(const std::string& rVariableName,
1040  const std::string& rVariableUnits,
1041  unsigned estimatedLength)
1042 {
1044  {
1045  EXCEPTION("Unlimited dimension already set. Cannot be defined twice");
1046  }
1047 
1048  if (!mIsInDefineMode)
1049  {
1050  EXCEPTION("Cannot define variables when not in Define mode");
1051  }
1052 
1053  this->mIsUnlimitedDimensionSet = true;
1054  this->mUnlimitedDimensionName = rVariableName;
1055  this->mUnlimitedDimensionUnit = rVariableUnits;
1056  mEstimatedUnlimitedLength = estimatedLength;
1057 }
1058 
1060 {
1062  {
1063  EXCEPTION("Trying to advance along an unlimited dimension without having defined any");
1064  }
1065 
1066  mCurrentTimeStep++;
1067 
1068  /*
1069  * Extend the dataset (only reached when adding to an existing dataset,
1070  * or if mEstimatedUnlimitedLength hasn't been set and has defaulted to 1).
1071  */
1072  if ( mCurrentTimeStep >= (long unsigned) mEstimatedUnlimitedLength )
1073  {
1074  mDatasetDims[0]++;
1075  mNeedExtend = true;
1076  }
1077 }
1078 
1080 {
1081  if (mNeedExtend)
1082  {
1083 #if H5_VERS_MAJOR>=1 && H5_VERS_MINOR>=8 // HDF5 1.8+
1084  H5Dset_extent( mVariablesDatasetId, mDatasetDims );
1085  H5Dset_extent( mUnlimitedDatasetId, mDatasetDims );
1086 #else // Deprecated
1087  H5Dextend( mVariablesDatasetId, mDatasetDims );
1088  H5Dextend( mUnlimitedDatasetId, mDatasetDims );
1089 #endif
1090  }
1091  mNeedExtend = false;
1092 }
1093 
1095 {
1096  // Set internal counter to 0
1097  mCurrentTimeStep = 0;
1098  // Set dataset to 1 x nodes x vars
1099  mDatasetDims[0] = 1;
1100  mNeedExtend = 1;
1101  PossiblyExtend(); // Abusing the notation here, this is probably a contraction.
1102 }
1103 
1104 int Hdf5DataWriter::GetVariableByName(const std::string& rVariableName)
1105 {
1106  int id = -1;
1107 
1108  // Check for the variable name in the existing variables
1109  for (unsigned index=0; index<mVariables.size(); index++)
1110  {
1111  if (mVariables[index].mVariableName == rVariableName)
1112  {
1113  id = index;
1114  break;
1115  }
1116  }
1117  if (id == -1)
1118  {
1119  EXCEPTION("Variable does not exist in hdf5 definitions.");
1120  }
1121  return id;
1122 }
1123 
1124 bool Hdf5DataWriter::ApplyPermutation(const std::vector<unsigned>& rPermutation, bool unsafeExtendingMode)
1125 {
1126  if (unsafeExtendingMode == false && !mIsInDefineMode)
1127  {
1128  EXCEPTION("Cannot define permutation when not in Define mode");
1129  }
1130 
1131  if (rPermutation.empty())
1132  {
1133  return false;
1134  }
1135 
1136  if (rPermutation.size() != mFileFixedDimensionSize ||
1137  rPermutation.size() != mDataFixedDimensionSize)
1138  {
1139  EXCEPTION("Permutation doesn't match the expected problem size");
1140  }
1141 
1142  // Permutation checker
1143  std::set<unsigned> permutation_pigeon_hole;
1144 
1145  // Fill up the pigeon holes
1146  bool identity_map = true;
1147  for (unsigned i=0; i<mDataFixedDimensionSize; i++)
1148  {
1149  permutation_pigeon_hole.insert(rPermutation[i]);
1150  if (identity_map && i != rPermutation[i])
1151  {
1152  identity_map = false;
1153  }
1154  }
1155  if (identity_map)
1156  {
1157  // Do nothing
1158  return false;
1159  }
1160 
1161  /*
1162  * The pigeon-hole principle states that each index appears exactly once
1163  * so if any don't appear then either one appears twice or something out of
1164  * scope has appeared.
1165  */
1166  for (unsigned i=0; i<mDataFixedDimensionSize; i++)
1167  {
1168  if (permutation_pigeon_hole.count(i) != 1u)
1169  {
1170  EXCEPTION("Permutation vector doesn't contain a valid permutation");
1171  }
1172  }
1173  // Make sure we've not done it already
1174  assert(mSinglePermutation == NULL);
1175  assert(mDoublePermutation == NULL);
1176  PetscTools::SetupMat(mSinglePermutation, mDataFixedDimensionSize, mDataFixedDimensionSize, 2, mHi - mLo, mHi - mLo);
1177  PetscTools::SetupMat(mDoublePermutation, 2*mDataFixedDimensionSize, 2*mDataFixedDimensionSize, 4, 2*(mHi - mLo), 2*(mHi - mLo));
1178 #if (PETSC_VERSION_MAJOR == 3) //PETSc 3.x.x
1179  MatSetOption(mSinglePermutation, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE);
1180  MatSetOption(mDoublePermutation, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE);
1181 #else
1182  MatSetOption(mSinglePermutation, MAT_IGNORE_OFF_PROC_ENTRIES);
1183  MatSetOption(mDoublePermutation, MAT_IGNORE_OFF_PROC_ENTRIES);
1184 #endif
1185  // Only do local rows
1186  for (unsigned row_index=mLo; row_index<mHi; row_index++)
1187  {
1188  // Put zero on the diagonal
1189  MatSetValue(mSinglePermutation, row_index, row_index, 0.0, INSERT_VALUES);
1190 
1191  // Put one at (i,j)
1192  MatSetValue(mSinglePermutation, row_index, rPermutation[row_index], 1.0, INSERT_VALUES);
1193 
1194  unsigned bi_index = 2*row_index;
1195  unsigned perm_index = 2*rPermutation[row_index];
1196 
1197  // Put zeroes on the diagonal
1198  MatSetValue(mDoublePermutation, bi_index, bi_index, 0.0, INSERT_VALUES);
1199  MatSetValue(mDoublePermutation, bi_index+1, bi_index+1, 0.0, INSERT_VALUES);
1200 
1201  // Put ones at (i,j)
1202  MatSetValue(mDoublePermutation, bi_index, perm_index, 1.0, INSERT_VALUES);
1203  MatSetValue(mDoublePermutation, bi_index+1, perm_index+1, 1.0, INSERT_VALUES);
1204  }
1205  MatAssemblyBegin(mSinglePermutation, MAT_FINAL_ASSEMBLY);
1206  MatAssemblyBegin(mDoublePermutation, MAT_FINAL_ASSEMBLY);
1207  MatAssemblyEnd(mSinglePermutation, MAT_FINAL_ASSEMBLY);
1208  MatAssemblyEnd(mDoublePermutation, MAT_FINAL_ASSEMBLY);
1209  return true;
1210 }
1211 
1212 void Hdf5DataWriter::SetFixedChunkSize(const unsigned& rTimestepsPerChunk,
1213  const unsigned& rNodesPerChunk,
1214  const unsigned& rVariablesPerChunk)
1215 {
1216  assert(mIsInDefineMode);
1217 
1219  mFixedChunkSize[0] = rTimestepsPerChunk;
1220  mFixedChunkSize[1] = rNodesPerChunk;
1221  mFixedChunkSize[2] = rVariablesPerChunk;
1222 }
1223 
1225 {
1226  // Number of chunks for istore_k optimisation
1227  hsize_t num_chunks = 1;
1228  for (unsigned i=0; i<DATASET_DIMS; ++i)
1229  {
1230  num_chunks *= CeilDivide(mDatasetDims[i], mChunkSize[i]);
1231  }
1232  return num_chunks;
1233 }
1234 
1235 void Hdf5DataWriter::CalculateChunkDims( unsigned targetSize, unsigned* pChunkSizeInBytes, bool* pAllOneChunk )
1236 {
1237  bool all_one_chunk = true;
1238  unsigned chunk_size_in_bytes = 8u; // 8 bytes/double
1239  unsigned divisors[DATASET_DIMS];
1240  // Loop over dataset dimensions, dividing each dimension into the integer number of chunks that results
1241  // in the number of entries closest to the targetSize. This means the chunks will span the dataset with
1242  // as little waste as possible.
1243  for (unsigned i=0; i<DATASET_DIMS; ++i)
1244  {
1245  // What do I divide the dataset by to get targetSize entries per chunk?
1246  divisors[i] = CeilDivide(mDatasetDims[i], targetSize);
1247  // If I divide my dataset into divisors pieces, how big is each chunk?
1248  mChunkSize[i] = CeilDivide(mDatasetDims[i], divisors[i]);
1249  // If my chunks are this big, how many bytes is that?
1250  chunk_size_in_bytes *= mChunkSize[i];
1251  // Check if all divisors==1, which means we have one big chunk.
1252  all_one_chunk = all_one_chunk && divisors[i]==1u;
1253  }
1254  // Update pointers
1255  *pChunkSizeInBytes = chunk_size_in_bytes;
1256  *pAllOneChunk = all_one_chunk;
1257 }
1258 
1260 {
1261  /*
1262  * The size in each dimension is increased in step until the size of
1263  * the chunk exceeds a limit, or we end up with one big chunk.
1264  *
1265  * Also make sure we don't have too many chunks. Over 75 K makes the
1266  * H5Pset_istore_k optimisation above very detrimental to performance
1267  * according to "Notes from 31 July 2013" at:
1268  * http://confluence.diamond.ac.uk/display/Europroj/Ulrik+Pederson+-+Excalibur+Notes
1269  */
1270  const unsigned recommended_max_number_chunks = 75000;
1272  {
1273  /*
1274  * The line below sets the chunk size to (just under) 128 K chunks, which
1275  * seems to be a good compromise.
1276  *
1277  * For large problems performance usually improves with increased chunk size.
1278  * A sweet spot seems to be 1 M chunks.
1279  *
1280  * On a striped filesystem, for best performance, try to match the chunk size
1281  * and alignment (see "H5Pset_alignment" above) with the file stripe size.
1282  */
1283  unsigned target_size_in_bytes = 1024*1024/8; // 128 K
1284 
1285  unsigned target_size = 0;
1286  unsigned chunk_size_in_bytes;
1287  bool all_one_chunk;
1288 
1289  // While we have too many chunks, make target_size_in_bytes larger
1290  do
1291  {
1292  // While the chunks are too small, make mChunkSize[i]s larger, unless
1293  // we end up with a chunk that spans the dataset.
1294  do
1295  {
1296  target_size++;
1297  CalculateChunkDims(target_size, &chunk_size_in_bytes, &all_one_chunk);
1298  }
1299  while ( chunk_size_in_bytes < target_size_in_bytes && !all_one_chunk);
1300 
1301  // Go one step back if the target size has been exceeded
1302  if ( chunk_size_in_bytes > target_size_in_bytes && !all_one_chunk )
1303  {
1304  target_size--;
1305  CalculateChunkDims(target_size, &chunk_size_in_bytes, &all_one_chunk);
1306  }
1307 
1309 
1310  /*
1311  if ( PetscTools::AmMaster() )
1312  {
1313  std::cout << "Hdf5DataWriter dataset contains " << mNumberOfChunks << " chunks of " << chunk_size_in_bytes << " B." << std::endl;
1314  }
1315  */
1316 
1317  target_size_in_bytes *= 2; // Increase target size for next iteration
1318  }
1319  while ( mNumberOfChunks > recommended_max_number_chunks );
1320  }
1321  /*
1322  * ... unless the user has set chunk dimensions explicitly, in which case
1323  * use those. The program will exit if the size results in too many chunks.
1324  */
1325  else
1326  {
1327  for (unsigned i=0; i<DATASET_DIMS; ++i)
1328  {
1329  mChunkSize[i] = mFixedChunkSize[i];
1330  }
1331 
1333 
1334  if ( mNumberOfChunks > recommended_max_number_chunks)
1335  {
1336  /*
1337  * The user-defined HDF5 chunk size has resulted in over 75,000 chunks,
1338  * which is known to be extremely detrimental to performance. Try
1339  * increasing the chunk dimensions or (better) not using fixed chunk sizes.
1340  */
1341  NEVER_REACHED;
1342  }
1343  }
1344 }
void ComputeIncompleteOffset()
hsize_t CalculateNumberOfChunks()
bool ApplyPermutation(const std::vector< unsigned > &rPermutation, bool unsafeExtendingMode=false)
Hdf5DataWriter(DistributedVectorFactory &rVectorFactory, const std::string &rDirectory, const std::string &rBaseName, bool cleanDirectory=true, bool extendData=false, std::string datasetName="Data")
bool mUseOptimalChunkSizeAlgorithm
std::string mUnlimitedDimensionName
void CheckVariableName(const std::string &rName)
int GetVariableByName(const std::string &rVariableName)
unsigned CeilDivide(unsigned numerator, unsigned denominator)
std::string GetAbsolutePath() const
Definition: FileFinder.cpp:221
std::string mUnlimitedDimensionUnit
#define EXCEPTION(message)
Definition: Exception.hpp:143
void CalculateChunkDims(unsigned targetSize, unsigned *pChunkSizeInBytes, bool *pAllOneChunk)
static bool AmMaster()
Definition: PetscTools.cpp:120
static Vec CreateVec(int size, int localSize=PETSC_DECIDE, bool ignoreOffProcEntries=true)
Definition: PetscTools.cpp:214
void DefineFixedDimensionUsingMatrix(const std::vector< unsigned > &rNodesToOuput, long vecSize)
#define NEVER_REACHED
Definition: Exception.hpp:206
bool mUseMatrixForIncompleteData
bool DoesDatasetExist(const std::string &rDatasetName)
hsize_t mDatasetDims[DATASET_DIMS]
void SetFixedChunkSize(const unsigned &rTimestepsPerChunk, const unsigned &rNodesPerChunk, const unsigned &rVariablesPerChunk)
unsigned mEstimatedUnlimitedLength
void AdvanceAlongUnlimitedDimension()
std::vector< unsigned > mIncompleteNodeIndices
DistributedVectorFactory & mrVectorFactory
hsize_t mFixedChunkSize[DATASET_DIMS]
int DefineVariable(const std::string &rVariableName, const std::string &rVariableUnits)
static void SetupMat(Mat &rMat, int numRows, int numColumns, unsigned rowPreallocation, int numLocalRows=PETSC_DECIDE, int numLocalColumns=PETSC_DECIDE, bool ignoreOffProcEntries=true, bool newAllocationError=true)
Definition: PetscTools.cpp:268
Mat mDoubleIncompleteOutputMatrix
void PutVector(int variableID, Vec petscVector)
static void Destroy(Vec &rVec)
Definition: PetscTools.hpp:351
unsigned mNumberOwned
bool Exists() const
Definition: FileFinder.cpp:180
void DefineFixedDimension(long dimensionSize)
static std::string GetProvenanceString()
virtual ~Hdf5DataWriter()
Mat mSingleIncompleteOutputMatrix
void CheckUnitsName(const std::string &rName)
void PutStripedVector(std::vector< int > variableIDs, Vec petscVector)
std::vector< DataWriterVariable > mVariables
void PutUnlimitedVariable(double value)
static const unsigned DATASET_DIMS
hsize_t mNumberOfChunks
unsigned mDataFixedDimensionSize
long unsigned mCurrentTimeStep
hsize_t mChunkSize[DATASET_DIMS]
unsigned mFileFixedDimensionSize
virtual void EndDefineMode()
void DefineUnlimitedDimension(const std::string &rVariableName, const std::string &rVariableUnits, unsigned estimatedLength=1)