Chaste  Release::2024.1
Hdf5DataWriter.cpp
1 /*
2 
3 Copyright (c) 2005-2021, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 /*
37  * Implementation file for Hdf5DataWriter class.
38  *
39  */
40 #include <boost/scoped_array.hpp>
41 #include <cstring> //For strcmp etc. Needed in gcc-4.4
42 #include <set>
43 
44 #include "Hdf5DataWriter.hpp"
45 
46 #include "Exception.hpp"
47 #include "MathsCustomFunctions.hpp"
48 #include "OutputFileHandler.hpp"
49 #include "PetscTools.hpp"
50 #include "Version.hpp"
51 
53  const std::string& rDirectory,
54  const std::string& rBaseName,
55  bool cleanDirectory,
56  bool extendData,
57  std::string datasetName,
58  bool useCache)
59  : AbstractHdf5Access(rDirectory, rBaseName, datasetName),
60  mrVectorFactory(rVectorFactory),
61  mCleanDirectory(cleanDirectory),
62  mUseExistingFile(extendData),
63  mIsInDefineMode(true),
64  mIsFixedDimensionSet(false),
65  mEstimatedUnlimitedLength(1u),
66  mFileFixedDimensionSize(0u),
67  mDataFixedDimensionSize(0u),
68  mLo(mrVectorFactory.GetLow()),
69  mHi(mrVectorFactory.GetHigh()),
70  mNumberOwned(0u),
71  mOffset(0u),
72  mNeedExtend(false),
73  mCurrentTimeStep(0),
74  mSinglePermutation(nullptr),
75  mDoublePermutation(nullptr),
76  mUseOptimalChunkSizeAlgorithm(true),
77  mNumberOfChunks(0),
78  mChunkTargetSize(0x20000), // 128 K
79  mAlignment(0), // No alignment
80  mUseCache(useCache),
81  mCacheFirstTimeStep(0u)
82 {
83  mChunkSize[0] = 0;
84  mChunkSize[1] = 0;
85  mChunkSize[2] = 0;
86  mFixedChunkSize[0] = 0;
87  mFixedChunkSize[1] = 0;
88  mFixedChunkSize[2] = 0;
89 
91  {
92  EXCEPTION("You are asking to delete a file and then extend it, change arguments to constructor.");
93  }
94 
95  if (!mUseExistingFile && mDatasetName != "Data")
96  {
97  //User is trying to add a new dataset, but they are not extending a existing file
98  EXCEPTION("Adding new data only makes sense when extending an existing file");
99  }
100 
101  if (mUseExistingFile)
102  {
103  // Variables should already be defined if we are extending.
104  mIsInDefineMode = false;
105 
106  // If the file exists already, open it - this call will check it exists.
107  OpenFile();
108 
109  // If the dataset we are interested in doesn't exist then close the file
110  // We will go on to define variables and open the file as usual (except for it pre-existing).
112  {
113  //std::cout << "Dataset: " << mDatasetName << " doesn't exist in the file.\n";
114  H5Fclose(mFileId);
115  mIsInDefineMode = true;
116  }
117  // If dataset does exist then leave file open and try to extend it.
118  else
119  {
120  // Where to find the file
121  assert(mCleanDirectory == false);
122 
123  mVariablesDatasetId = H5Dopen(mFileId, mDatasetName.c_str(), H5P_DEFAULT);
124  hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
125  //unsigned variables_dataset_rank = H5Sget_simple_extent_ndims(variables_dataspace);
126  hsize_t dataset_max_sizes[DATASET_DIMS];
127  H5Sget_simple_extent_dims(variables_dataspace, mDatasetDims, dataset_max_sizes); // put dims into mDatasetDims
128  H5Sclose(variables_dataspace);
129 
130  // Check that an unlimited dimension is defined
131  if (dataset_max_sizes[0] != H5S_UNLIMITED)
132  {
133  H5Dclose(mVariablesDatasetId);
134  H5Fclose(mFileId);
135  EXCEPTION("Tried to open a datafile for extending which doesn't have an unlimited dimension.");
136  }
138 
139  // Sanity check other dimension sizes
140  for (unsigned i = 1; i < DATASET_DIMS; i++) // Zero is excluded since it is unlimited
141  {
142  assert(mDatasetDims[i] == dataset_max_sizes[i]);
143  }
145  mIsFixedDimensionSet = true;
146  mVariables.reserve(mDatasetDims[2]);
147 
148  // Figure out what the variables are
149  hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
150  hid_t attribute_type = H5Aget_type(attribute_id);
151  hid_t attribute_space = H5Aget_space(attribute_id);
152  hsize_t attr_dataspace_dim;
153  H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, nullptr);
154  unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
155  assert(num_columns == mDatasetDims[2]); // I think...
156 
157  char* string_array = (char*)malloc(sizeof(char) * MAX_STRING_SIZE * (int)num_columns);
158  H5Aread(attribute_id, attribute_type, string_array);
159 
160  // Loop over columns/variables
161  for (unsigned index = 0; index < num_columns; index++)
162  {
163  // Read the string from the raw vector
164  std::string column_name_unit(&string_array[MAX_STRING_SIZE * index]);
165 
166  // Find location of unit name
167  size_t name_length = column_name_unit.find('(');
168  size_t unit_length = column_name_unit.find(')') - name_length - 1;
169 
170  // Create the variable
171  DataWriterVariable var;
172  var.mVariableName = column_name_unit.substr(0, name_length);
173  var.mVariableUnits = column_name_unit.substr(name_length + 1, unit_length);
174  mVariables.push_back(var);
175  }
176 
177  // Free memory, release ids
178  free(string_array);
179  H5Tclose(attribute_type);
180  H5Sclose(attribute_space);
181  H5Aclose(attribute_id);
182 
183  // Now deal with time
185 
186  // How many time steps have been written so far?
187  hid_t timestep_dataspace = H5Dget_space(mUnlimitedDatasetId);
188  hsize_t num_timesteps;
189  H5Sget_simple_extent_dims(timestep_dataspace, &num_timesteps, nullptr);
190  H5Sclose(timestep_dataspace);
191  mCurrentTimeStep = (long)num_timesteps - 1;
193 
194  // Incomplete data?
195  attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
196  if (attribute_id < 0)
197  {
198  // LCOV_EXCL_START
199  // Old format, before we added the attribute.
200  EXCEPTION("Extending old-format files isn't supported.");
201  // LCOV_EXCL_STOP
202  }
203  else
204  {
205  attribute_type = H5Aget_type(attribute_id);
206  attribute_space = H5Aget_space(attribute_id);
207  unsigned is_data_complete;
208  H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
209  mIsDataComplete = (is_data_complete == 1) ? true : false;
210  H5Tclose(attribute_type);
211  H5Sclose(attribute_space);
212  H5Aclose(attribute_id);
213  }
214  if (mIsDataComplete)
215  {
217  mOffset = mLo;
219  }
220  else
221  {
222  // Read which nodes appear in the file (mIncompleteNodeIndices)
223  attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
224  attribute_type = H5Aget_type(attribute_id);
225  attribute_space = H5Aget_space(attribute_id);
226 
227  // Get the dataset/dataspace dimensions
228  unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
229 
230  // Read data from hyperslab in the file into the hyperslab in memory
231  mIncompleteNodeIndices.clear();
232  mIncompleteNodeIndices.resize(num_node_indices);
233  H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
234  // Assume there is no permutation when extending. We're going throw an exception at the
235  // end of the block (so setting mIncompletePermIndices is only for safety.
237 
238  // Release ids
239  H5Tclose(attribute_type);
240  H5Sclose(attribute_space);
241  H5Aclose(attribute_id);
242 
243  // Set up what data we can
249  mDataFixedDimensionSize = UINT_MAX;
250  H5Dclose(mVariablesDatasetId);
251  H5Dclose(mUnlimitedDatasetId);
252  H5Fclose(mFileId);
253  EXCEPTION("Unable to extend an incomplete data file at present.");
254  }
255 
256  // Record chunk dimensions (useful for cached write mode)
257  hid_t dcpl = H5Dget_create_plist(mVariablesDatasetId); // get dataset creation property list
258  H5Pget_chunk(dcpl, DATASET_DIMS, mChunkSize);
259  if (mUseCache)
260  {
261  // Reserve space. Enough for one chunk in the time dimension.
262  mDataCache.reserve(mChunkSize[0] * mNumberOwned * mDatasetDims[2]);
263  }
264 
265  // Done
267  }
268  }
269 }
270 
272 {
273  Close();
274 
275  if (mSinglePermutation)
276  {
278  }
279  if (mDoublePermutation)
280  {
282  }
283 }
284 
286 {
287  OutputFileHandler output_file_handler(mDirectory, mCleanDirectory);
288  std::string file_name = mDirectory.GetAbsolutePath() + mBaseName + ".h5";
289 
290  if (mUseExistingFile)
291  {
292  FileFinder h5_file(file_name, RelativeTo::Absolute);
293  if (!h5_file.Exists())
294  {
295  EXCEPTION("Hdf5DataWriter could not open " + file_name + " , as it does not exist.");
296  }
297  }
298 
299  // Set up a property list saying how we'll open the file
300  hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
301  H5Pset_fapl_mpio(fapl, PETSC_COMM_WORLD, MPI_INFO_NULL);
302 
303  // Set size of each dimension in main dataset.
304  mDatasetDims[0] = mEstimatedUnlimitedLength; // While developing we got a non-documented "only the first dimension can be extendible" error.
305  mDatasetDims[1] = mFileFixedDimensionSize; // or should this be mDataFixedDimensionSize?
306  mDatasetDims[2] = mVariables.size();
307 
308  // Open the file and free the property list
309  std::string attempting_to;
310  if (mUseExistingFile)
311  {
312  mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDWR, fapl);
313  attempting_to = "open";
314  }
315  else
316  {
317  hid_t fcpl = H5Pcreate(H5P_FILE_CREATE);
318  /*
319  * Align objects to multiples of some number of bytes. Useful for
320  * striped filesystem e.g. Lustre. Ideally this should match the chunk
321  * size (see SetTargetChunkSize).
322  */
323  if (mAlignment != 0)
324  {
325  H5Pset_alignment(fapl, 0, mAlignment);
326  }
327 
328  /*
329  * The stripe size can be set on the directory just before creation of the H5
330  * file, and set back to normal just after, so that the H5 file inherits these
331  * settings, by uncommenting the lines below. Adjust the command for your
332  * specific filesystem!
333  *
334  * A simpler solution (where supported) might be to use an environment variable, e.g.
335  * export MPICH_MPIIO_HINTS="*.h5:striping_factor=48:striping_unit=1048576"
336  */
337  /*
338  std::string command;
339  // Turn on striping for directory, which the newly created HDF5 file will inherit.
340  if (PetscTools::AmMaster())
341  {
342  // Increase stripe count
343  command = "lfs setstripe --size 1M --count -1 "; // -1 means use all OSTs
344  command.append(mDirectory.GetAbsolutePath());
345  std::cout << command << std::endl;
346  system(command.c_str());
347  }
348  */
349 
350  // Create file
351  mFileId = H5Fcreate(file_name.c_str(), H5F_ACC_TRUNC, fcpl, fapl);
352 
353  /*
354  // Turn off striping so other output files stay unstriped.
355  if (PetscTools::AmMaster())
356  {
357  // Use one stripe
358  command = "lfs setstripe --size 1M --count 1 ";
359  command.append(mDirectory.GetAbsolutePath());
360  std::cout << command << std::endl;
361  system(command.c_str());
362  }
363  PetscTools::Barrier(); // Don't let other processes run away
364  */
365 
366  attempting_to = "create";
367  H5Pclose(fcpl);
368  }
369 
370  H5Pclose(fapl);
371 
372  if (mFileId < 0)
373  {
375  EXCEPTION("Hdf5DataWriter could not " << attempting_to << " " << file_name << " , H5F" << attempting_to << " error code = " << mFileId);
376  }
377 }
378 
379 void Hdf5DataWriter::DefineFixedDimension(long dimensionSize)
380 {
381  if (!mIsInDefineMode)
382  {
383  EXCEPTION("Cannot define variables when not in Define mode");
384  }
385  if (dimensionSize < 1)
386  {
387  EXCEPTION("Fixed dimension must be at least 1 long");
388  }
390  {
391  EXCEPTION("Fixed dimension already set");
392  }
393 
394  // Work out the ownership details
398  mOffset = mLo;
399  mFileFixedDimensionSize = dimensionSize;
400  mDataFixedDimensionSize = dimensionSize;
401  mIsFixedDimensionSet = true;
402 }
403 
404 void Hdf5DataWriter::DefineFixedDimension(const std::vector<unsigned>& rNodesToOuputOriginalIndices, const std::vector<unsigned>& rNodesToOuputPermutedIndices, long vecSize)
405 {
406  unsigned vector_size = rNodesToOuputOriginalIndices.size();
407 
408  for (unsigned index = 0; index < vector_size - 1; index++)
409  {
410  if (rNodesToOuputOriginalIndices[index] >= rNodesToOuputOriginalIndices[index + 1])
411  {
412  EXCEPTION("Input should be monotonic increasing");
413  }
414  }
415 
416  if ((int)rNodesToOuputOriginalIndices.back() >= vecSize || (int)rNodesToOuputPermutedIndices.back() >= vecSize)
417  {
418  EXCEPTION("Vector size doesn't match nodes to output");
419  }
420 
421  DefineFixedDimension(vecSize);
422 
423  mFileFixedDimensionSize = vector_size;
424  mIsDataComplete = false;
425  mIncompleteNodeIndices = rNodesToOuputOriginalIndices;
426  mIncompletePermIndices = rNodesToOuputPermutedIndices;
428 }
429 
431 {
432  mOffset = 0;
433  mNumberOwned = 0;
434 
435  if (mIncompleteNodeIndices != mIncompletePermIndices) //i.e. not the identity
436  {
437  //Need to reorder to columns so that mIncompletePermIndices is increasing
438  std::vector<std::pair <unsigned, unsigned> > indices;
439  for (unsigned i = 0; i < mIncompletePermIndices.size(); i++)
440  {
441  indices.push_back(std::make_pair(mIncompletePermIndices[i], mIncompleteNodeIndices[i]));
442  }
443  std::sort(indices.begin(),indices.end());
444  for (unsigned i = 0; i < mIncompletePermIndices.size(); i++)
445  {
446  mIncompletePermIndices[i]=indices[i].first;
447  mIncompleteNodeIndices[i]=indices[i].second;
448  }
449  }
450 
451  for (unsigned i = 0; i < mIncompletePermIndices.size(); i++)
452  {
453  if (mIncompletePermIndices[i] < mLo)
454  {
455  mOffset++;
456  }
457  else if (mIncompletePermIndices[i] < mHi)
458  {
459  mNumberOwned++;
460  }
461  }
462 }
463 
464 int Hdf5DataWriter::DefineVariable(const std::string& rVariableName,
465  const std::string& rVariableUnits)
466 {
467  if (!mIsInDefineMode)
468  {
469  EXCEPTION("Cannot define variables when not in Define mode");
470  }
471 
472  CheckVariableName(rVariableName);
473  CheckUnitsName(rVariableUnits);
474 
475  // Check for the variable being already defined
476  for (unsigned index = 0; index < mVariables.size(); index++)
477  {
478  if (mVariables[index].mVariableName == rVariableName)
479  {
480  EXCEPTION("Variable name already exists");
481  }
482  }
483 
484  DataWriterVariable new_variable;
485  new_variable.mVariableName = rVariableName;
486  new_variable.mVariableUnits = rVariableUnits;
487  int variable_id;
488 
489  // Add the variable to the variable vector
490  mVariables.push_back(new_variable);
491 
492  // Use the index of the variable vector as the variable ID.
493  // This is ok since there is no way to remove variables.
494  variable_id = mVariables.size() - 1;
495 
496  return variable_id;
497 }
498 
499 void Hdf5DataWriter::CheckVariableName(const std::string& rName)
500 {
501  if (rName.length() == 0)
502  {
503  EXCEPTION("Variable name not allowed: may not be blank.");
504  }
505  CheckUnitsName(rName);
506 }
507 
508 void Hdf5DataWriter::CheckUnitsName(const std::string& rName)
509 {
510  for (unsigned i = 0; i < rName.length(); i++)
511  {
512  if (!isalnum(rName[i]) && !(rName[i] == '_'))
513  {
514  std::string error = "Variable name/units '" + rName + "' not allowed: may only contain alphanumeric characters or '_'.";
515  EXCEPTION(error);
516  }
517  }
518 }
519 
521 {
522  return mIsInDefineMode;
523 }
524 
526 {
527  // Check that at least one variable has been defined
528  if (mVariables.size() < 1)
529  {
530  EXCEPTION("Cannot end define mode. No variables have been defined.");
531  }
532 
533  // Check that a fixed dimension has been defined
534  if (mIsFixedDimensionSet == false)
535  {
536  EXCEPTION("Cannot end define mode. One fixed dimension should be defined.");
537  }
538 
539  OpenFile();
540 
541  mIsInDefineMode = false;
542 
543  /*
544  * Create "Data" dataset
545  */
546 
547  // Set max dims of dataset
548  hsize_t dataset_max_dims[DATASET_DIMS];
550  {
551  dataset_max_dims[0] = H5S_UNLIMITED;
552  }
553  else
554  {
555  dataset_max_dims[0] = 1;
556  }
557  dataset_max_dims[1] = mDatasetDims[1];
558  dataset_max_dims[2] = mDatasetDims[2];
559 
560  // Set chunk dimensions for the new dataset
561  SetChunkSize();
562 
563  if (mUseCache)
564  {
565  // Reserve space. Enough for one chunk in the time dimension.
566  mDataCache.reserve(mChunkSize[0] * mNumberOwned * mDatasetDims[2]);
567  }
568 
569  // Create chunked dataset and clean up
570  hid_t cparms = H5Pcreate(H5P_DATASET_CREATE);
571  H5Pset_chunk(cparms, DATASET_DIMS, mChunkSize);
572  hid_t filespace = H5Screate_simple(DATASET_DIMS, mDatasetDims, dataset_max_dims);
573  mVariablesDatasetId = H5Dcreate(mFileId, mDatasetName.c_str(), H5T_NATIVE_DOUBLE, filespace,
574  H5P_DEFAULT, cparms, H5P_DEFAULT);
575  SetMainDatasetRawChunkCache(); // Set large cache (even though parallel drivers don't currently use it!)
576  H5Sclose(filespace);
577  H5Pclose(cparms);
578 
579  // Create dataspace for the name, unit attribute
580  const unsigned MAX_STRING_SIZE = 100;
581  hsize_t columns[1] = { mVariables.size() };
582  hid_t colspace = H5Screate_simple(1, columns, nullptr);
583 
584  // Create attribute for variable names
585  char* col_data = (char*)malloc(mVariables.size() * sizeof(char) * MAX_STRING_SIZE);
586 
587  char* col_data_offset = col_data;
588  for (unsigned var = 0; var < mVariables.size(); var++)
589  {
590  std::string full_name = mVariables[var].mVariableName + "(" + mVariables[var].mVariableUnits + ")";
591  strcpy(col_data_offset, full_name.c_str());
592  col_data_offset += sizeof(char) * MAX_STRING_SIZE;
593  }
594 
595  // Create the type 'string'
596  hid_t string_type = H5Tcopy(H5T_C_S1);
597  H5Tset_size(string_type, MAX_STRING_SIZE);
598  hid_t attr = H5Acreate(mVariablesDatasetId, "Variable Details", string_type, colspace,
599  H5P_DEFAULT, H5P_DEFAULT);
600 
601  // Write to the attribute
602  H5Awrite(attr, string_type, col_data);
603 
604  // Close dataspace & attribute
605  free(col_data);
606  H5Sclose(colspace);
607  H5Aclose(attr);
608 
609  // Create "boolean" attribute telling the data to be incomplete or not
610  columns[0] = 1;
611  colspace = H5Screate_simple(1, columns, nullptr);
612  attr = H5Acreate(mVariablesDatasetId, "IsDataComplete", H5T_NATIVE_UINT, colspace,
613  H5P_DEFAULT, H5P_DEFAULT);
614 
615  // Write to the attribute - note that the native boolean is not predictable
616  unsigned is_data_complete = mIsDataComplete ? 1 : 0;
617  H5Awrite(attr, H5T_NATIVE_UINT, &is_data_complete);
618 
619  H5Sclose(colspace);
620  H5Aclose(attr);
621 
622  if (!mIsDataComplete)
623  {
624  // We need to write a map
625  // Create "unsigned" attribute with the map
626  columns[0] = mFileFixedDimensionSize;
627  colspace = H5Screate_simple(1, columns, nullptr);
628  attr = H5Acreate(mVariablesDatasetId, "NodeMap", H5T_NATIVE_UINT, colspace,
629  H5P_DEFAULT, H5P_DEFAULT);
630 
631  // Write to the attribute (the original node index labels)
632  H5Awrite(attr, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
633 
634  H5Sclose(colspace);
635  H5Aclose(attr);
636  }
637 
638  /*
639  * Create "Time" dataset
640  */
642  {
643  hsize_t time_dataset_dims[1] = { mEstimatedUnlimitedLength };
644  hsize_t time_dataset_max_dims[1] = { H5S_UNLIMITED };
645 
646  /*
647  * Modify dataset creation properties to enable chunking. Set the chunk size in the "Time"
648  * dataset to 128 doubles, i.e. 1 KiB. See #2336.
649  */
650  hsize_t time_chunk_dims[1] = { 128u };
651  hid_t time_cparms = H5Pcreate(H5P_DATASET_CREATE);
652  H5Pset_chunk(time_cparms, 1, time_chunk_dims);
653 
654  hid_t time_filespace = H5Screate_simple(1, time_dataset_dims, time_dataset_max_dims);
655 
656  // Create the unlimited dimension dataset
657 
658  // * Files post r18257 (inc. Release 3.2 onwards) use "<DatasetName>_Unlimited" for "<DatasetName>"'s
659  // unlimited variable,
660  // - a new attribute "Name" has been added to the Unlimited Dataset to allow it to assign
661  // any name to the unlimited variable. Which can then be easily read by Hdf5DataReader.
662 
663  mUnlimitedDatasetId = H5Dcreate(mFileId, (mDatasetName + "_Unlimited").c_str(), H5T_NATIVE_DOUBLE, time_filespace,
664  H5P_DEFAULT, time_cparms, H5P_DEFAULT);
665 
666  // Create the dataspace for the attribute
667  hsize_t one = 1;
668  hid_t one_column_space = H5Screate_simple(1, &one, nullptr);
669 
670  // Create an attribute for the time unit
671  hid_t unit_attr = H5Acreate(mUnlimitedDatasetId, "Unit", string_type, one_column_space,
672  H5P_DEFAULT, H5P_DEFAULT);
673 
674  // Create an attribute for the time name
675  hid_t name_attr = H5Acreate(mUnlimitedDatasetId, "Name", string_type, one_column_space,
676  H5P_DEFAULT, H5P_DEFAULT);
677 
678  // Copy the unit to a string MAX_STRING_SIZE long and write it
679  char unit_string[MAX_STRING_SIZE];
680  strcpy(unit_string, mUnlimitedDimensionUnit.c_str());
681  H5Awrite(unit_attr, string_type, unit_string);
682 
683  // Copy the unit to a string MAX_STRING_SIZE long and write it
684  char name_string[MAX_STRING_SIZE];
685  strcpy(name_string, mUnlimitedDimensionName.c_str());
686  H5Awrite(name_attr, string_type, name_string);
687 
688  // Close the filespace
689  H5Pclose(time_cparms);
690  H5Sclose(one_column_space);
691  H5Aclose(unit_attr);
692  H5Aclose(name_attr);
693  H5Sclose(time_filespace);
694  }
695 
696  /*
697  * Create the provenance attribute
698  */
699 
700  // Create a longer type for 'string'
701  const unsigned MAX_PROVENANCE_STRING_SIZE = 1023;
702  hid_t long_string_type = H5Tcopy(H5T_C_S1);
703  H5Tset_size(long_string_type, MAX_PROVENANCE_STRING_SIZE);
704  hsize_t prov_columns[1] = { 1 };
705  hid_t provenance_space = H5Screate_simple(1, prov_columns, nullptr);
706  char* provenance_data = (char*)malloc(sizeof(char) * MAX_PROVENANCE_STRING_SIZE);
707  assert(ChasteBuildInfo::GetProvenanceString().length() < MAX_PROVENANCE_STRING_SIZE);
708 
709  strcpy(provenance_data, ChasteBuildInfo::GetProvenanceString().c_str());
710  hid_t prov_attr = H5Acreate(mVariablesDatasetId, "Chaste Provenance", long_string_type, provenance_space,
711  H5P_DEFAULT, H5P_DEFAULT);
712 
713  // Write to the attribute
714  H5Awrite(prov_attr, long_string_type, provenance_data);
715 
716  // Close dataspace & attribute
717  free(provenance_data);
718  H5Sclose(provenance_space);
719  H5Aclose(prov_attr);
720 }
721 
722 void Hdf5DataWriter::PutVector(int variableID, Vec petscVector)
723 {
724  if (mIsInDefineMode)
725  {
726  EXCEPTION("Cannot write data while in define mode.");
727  }
728 
729  int vector_size;
730  VecGetSize(petscVector, &vector_size);
731 
732  if ((unsigned)vector_size != mDataFixedDimensionSize)
733  {
734  EXCEPTION("Vector size doesn't match fixed dimension");
735  }
736 
737  if (mUseCache)
738  {
739  if (mDatasetDims[2] != 1)
740  {
741  //Covered by TestHdf5DataWriterMultipleColumnsCachedFails
742  EXCEPTION("Cached writes must write all variables at once.");
743  }
745  {
746  //Covered by TestHdf5DataWriterSingleColumnsCachedFails
747  EXCEPTION("Cached writes require an unlimited dimension.");
748  }
749  }
750 
751  // Make sure that everything is actually extended to the correct dimension.
752  PossiblyExtend();
753 
754  Vec output_petsc_vector;
755 
756  // Decide what to write
757  if (mSinglePermutation == nullptr)
758  {
759  // No permutation - just write
760  output_petsc_vector = petscVector;
761  }
762  else
763  {
764  assert(mIsDataComplete);
765  // Make a vector with the same pattern (doesn't copy the data)
766  VecDuplicate(petscVector, &output_petsc_vector);
767 
768  // Apply the permutation matrix
769  MatMult(mSinglePermutation, petscVector, output_petsc_vector);
770  }
771 
772  // Define memspace and hyperslab
773  hid_t memspace, hyperslab_space;
774  if (mNumberOwned != 0)
775  {
776  hsize_t v_size[1] = { mNumberOwned };
777  memspace = H5Screate_simple(1, v_size, nullptr);
778 
779  hsize_t count[DATASET_DIMS] = { 1, mNumberOwned, 1 };
780  hsize_t offset_dims[DATASET_DIMS] = { mCurrentTimeStep, mOffset, (unsigned)(variableID) };
781 
782  hyperslab_space = H5Dget_space(mVariablesDatasetId);
783  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset_dims, nullptr, count, nullptr);
784  }
785  else
786  {
787  memspace = H5Screate(H5S_NULL);
788  hyperslab_space = H5Screate(H5S_NULL);
789  }
790 
791  // Create property list for collective dataset
792  hid_t property_list_id = H5Pcreate(H5P_DATASET_XFER);
793  H5Pset_dxpl_mpio(property_list_id, H5FD_MPIO_COLLECTIVE);
794 
795  double* p_petsc_vector;
796  VecGetArray(output_petsc_vector, &p_petsc_vector);
797 
798  if (mIsDataComplete)
799  {
800  if (mUseCache)
801  {
802  //Covered by TestHdf5DataWriterSingleColumnCached
803  mDataCache.insert(mDataCache.end(), p_petsc_vector, p_petsc_vector + mNumberOwned);
804  }
805  else
806  {
807  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, p_petsc_vector);
808  }
809  }
810  else
811  {
812  // Make a local copy of the data you own
813  boost::scoped_array<double> local_data(new double[mNumberOwned]);
814  for (unsigned i = 0; i < mNumberOwned; i++)
815  {
816  local_data[i] = p_petsc_vector[mIncompletePermIndices[mOffset + i] - mLo];
817  }
818  if (mUseCache)
819  {
820  //Covered by TestHdf5DataWriterFullFormatIncompleteCached
821  mDataCache.insert(mDataCache.end(), local_data.get(), local_data.get() + mNumberOwned);
822  }
823  else
824  {
825  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, local_data.get());
826  }
827  }
828 
829  VecRestoreArray(output_petsc_vector, &p_petsc_vector);
830 
831  H5Sclose(memspace);
832  H5Sclose(hyperslab_space);
833  H5Pclose(property_list_id);
834 
835  if (petscVector != output_petsc_vector)
836  {
837  // Free local vector
838  PetscTools::Destroy(output_petsc_vector);
839  }
840 }
841 
842 void Hdf5DataWriter::PutStripedVector(std::vector<int> variableIDs, Vec petscVector)
843 {
844  if (mIsInDefineMode)
845  {
846  EXCEPTION("Cannot write data while in define mode.");
847  }
848 
849  if (variableIDs.size() <= 1)
850  {
851  EXCEPTION("The PutStripedVector method requires at least two variables ID. If only one is needed, use PutVector method instead");
852  }
853 
854  const unsigned NUM_STRIPES = variableIDs.size();
855 
856  if (mUseCache)
857  {
858  if (NUM_STRIPES != mDatasetDims[2])
859  {
860  //Covered by TestHdf5DataWriterFullFormatStripedIncompleteCached
861  EXCEPTION("Cached writes must write all variables at once.");
862  }
864  {
865  //Covered by TestHdf5DataWriterStripedNoTimeCachedFails
866  EXCEPTION("Cached writes require an unlimited dimension.");
867  }
868  }
869 
870  int firstVariableID = variableIDs[0];
871 
872  // Currently the method only works with consecutive columns, can be extended if needed
873  for (unsigned i = 1; i < variableIDs.size(); i++)
874  {
875  if (variableIDs[i] - variableIDs[i - 1] != 1)
876  {
877  EXCEPTION("Columns should be consecutive. Try reordering them.");
878  }
879  }
880 
881  int vector_size;
882  VecGetSize(petscVector, &vector_size);
883 
884  if ((unsigned)vector_size != NUM_STRIPES * mDataFixedDimensionSize)
885  {
886  EXCEPTION("Vector size doesn't match fixed dimension");
887  }
888 
889  // Make sure that everything is actually extended to the correct dimension
890  PossiblyExtend();
891 
892  Vec output_petsc_vector;
893 
894  // Decide what to write
895  if (mDoublePermutation == nullptr)
896  {
897  // No permutation - just write
898  output_petsc_vector = petscVector;
899  }
900  else
901  {
902  assert(mIsDataComplete);
903  // Make a vector with the same pattern (doesn't copy the data)
904  VecDuplicate(petscVector, &output_petsc_vector);
905 
906  // Apply the permutation matrix
907  MatMult(mDoublePermutation, petscVector, output_petsc_vector);
908  }
909  // Define memspace and hyperslab
910  hid_t memspace, hyperslab_space;
911  if (mNumberOwned != 0)
912  {
913  hsize_t v_size[1] = { mNumberOwned * NUM_STRIPES };
914  memspace = H5Screate_simple(1, v_size, nullptr);
915 
916  hsize_t start[DATASET_DIMS] = { mCurrentTimeStep, mOffset, (unsigned)(firstVariableID) };
917  hsize_t stride[DATASET_DIMS] = { 1, 1, 1 }; //we are imposing contiguous variables, hence the stride is 1 (3rd component)
918  hsize_t block_size[DATASET_DIMS] = { 1, mNumberOwned, 1 };
919  hsize_t number_blocks[DATASET_DIMS] = { 1, 1, NUM_STRIPES };
920 
921  hyperslab_space = H5Dget_space(mVariablesDatasetId);
922  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, start, stride, number_blocks, block_size);
923  }
924  else
925  {
926  memspace = H5Screate(H5S_NULL);
927  hyperslab_space = H5Screate(H5S_NULL);
928  }
929 
930  // Create property list for collective dataset write, and write! Finally.
931  hid_t property_list_id = H5Pcreate(H5P_DATASET_XFER);
932  H5Pset_dxpl_mpio(property_list_id, H5FD_MPIO_COLLECTIVE);
933 
934  double* p_petsc_vector;
935  VecGetArray(output_petsc_vector, &p_petsc_vector);
936 
937  if (mIsDataComplete)
938  {
939  if (mUseCache)
940  {
941  // Covered by TestHdf5DataWriterStripedCached
942  mDataCache.insert(mDataCache.end(), p_petsc_vector, p_petsc_vector + mNumberOwned * NUM_STRIPES);
943  }
944  else
945  {
946  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, p_petsc_vector);
947  }
948  }
949  else
950  {
951  if (variableIDs.size() < 3) // incomplete data and striped vector is supported only for NUM_STRIPES=2...for the moment
952  {
953  // Make a local copy of the data you own
954  boost::scoped_array<double> local_data(new double[mNumberOwned * NUM_STRIPES]);
955  for (unsigned i = 0; i < mNumberOwned; i++)
956  {
957  unsigned local_node_number = mIncompletePermIndices[mOffset + i] - mLo;
958  local_data[NUM_STRIPES * i] = p_petsc_vector[local_node_number * NUM_STRIPES];
959  local_data[NUM_STRIPES * i + 1] = p_petsc_vector[local_node_number * NUM_STRIPES + 1];
960  }
961 
962  if (mUseCache)
963  {
964  //Covered by TestHdf5DataWriterFullFormatStripedIncompleteCached
965  mDataCache.insert(mDataCache.end(), local_data.get(), local_data.get() + 2 * mNumberOwned);
966  }
967  else
968  {
969  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, local_data.get());
970  }
971  }
972  else
973  {
974  EXCEPTION("The PutStripedVector functionality for incomplete data is supported for only 2 stripes");
975  }
976  }
977 
978  VecRestoreArray(output_petsc_vector, &p_petsc_vector);
979 
980  H5Sclose(memspace);
981  H5Sclose(hyperslab_space);
982  H5Pclose(property_list_id);
983 
984  if (petscVector != output_petsc_vector)
985  {
986  // Free local vector
987  PetscTools::Destroy(output_petsc_vector);
988  }
989 }
990 
992 {
993  return mUseCache;
994 }
995 
997 {
998  // The HDF5 writes are collective which means that if a process has nothing to write from
999  // its cache then it must still proceed in step with the other processes.
1000  bool any_nonempty_caches = PetscTools::ReplicateBool(!mDataCache.empty());
1001  if (!any_nonempty_caches)
1002  {
1003  // Nothing to do
1004  return;
1005  }
1006 
1007  // PRINT_3_VARIABLES(mCacheFirstTimeStep, mOffset, 0)
1008  // PRINT_3_VARIABLES(mCurrentTimeStep-mCacheFirstTimeStep, mNumberOwned, mDatasetDims[2])
1009  // PRINT_VARIABLE(mDataCache.size())
1010 
1011  // Define memspace and hyperslab
1012  hid_t memspace, hyperslab_space;
1013  if (mNumberOwned != 0)
1014  {
1015  hsize_t v_size[1] = { mDataCache.size() };
1016  memspace = H5Screate_simple(1, v_size, nullptr);
1017 
1020  assert((mCurrentTimeStep - mCacheFirstTimeStep) * mNumberOwned * mDatasetDims[2] == mDataCache.size()); // Got size right?
1021 
1022  hyperslab_space = H5Dget_space(mVariablesDatasetId);
1023  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, start, nullptr, count, nullptr);
1024  }
1025  else
1026  {
1027  memspace = H5Screate(H5S_NULL);
1028  hyperslab_space = H5Screate(H5S_NULL);
1029  }
1030 
1031  // Create property list for collective dataset write
1032  hid_t property_list_id = H5Pcreate(H5P_DATASET_XFER);
1033  H5Pset_dxpl_mpio(property_list_id, H5FD_MPIO_COLLECTIVE);
1034 
1035  // Write!
1036  H5Dwrite(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, property_list_id, &mDataCache[0]);
1037 
1038  // Tidy up
1039  H5Sclose(memspace);
1040  H5Sclose(hyperslab_space);
1041  H5Pclose(property_list_id);
1042 
1043  mCacheFirstTimeStep = mCurrentTimeStep; // Update where we got to
1044  mDataCache.clear(); // Clear out cache
1045 }
1046 
1048 {
1049  if (mIsInDefineMode)
1050  {
1051  EXCEPTION("Cannot write data while in define mode.");
1052  }
1053 
1055  {
1056  EXCEPTION("PutUnlimitedVariable() called but no unlimited dimension has been set");
1057  }
1058 
1059  // Make sure that everything is actually extended to the correct dimension.
1060  PossiblyExtend();
1061 
1062  // This datum is only written by the master
1063  if (!PetscTools::AmMaster())
1064  {
1065  return;
1066  }
1067 
1068  hsize_t size[1] = { 1 };
1069  hid_t memspace = H5Screate_simple(1, size, nullptr);
1070 
1071  // Select hyperslab in the file.
1072  hsize_t count[1] = { 1 };
1073  hsize_t offset[1] = { mCurrentTimeStep };
1074  hid_t hyperslab_space = H5Dget_space(mUnlimitedDatasetId);
1075  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, nullptr, count, nullptr);
1076 
1077  H5Dwrite(mUnlimitedDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, &value);
1078 
1079  H5Sclose(hyperslab_space);
1080  H5Sclose(memspace);
1081 }
1082 
1084 {
1085  if (mIsInDefineMode)
1086  {
1087  return; // Nothing to do...
1088  }
1089 
1090  if (mUseCache)
1091  {
1092  WriteCache();
1093  }
1094 
1095  H5Dclose(mVariablesDatasetId);
1097  {
1098  H5Dclose(mUnlimitedDatasetId);
1099  }
1100  H5Fclose(mFileId);
1101 
1102  // Cope with being called twice (e.g. if a user calls Close then the destructor)
1103  mIsInDefineMode = true;
1104 }
1105 
1106 void Hdf5DataWriter::DefineUnlimitedDimension(const std::string& rVariableName,
1107  const std::string& rVariableUnits,
1108  unsigned estimatedLength)
1109 {
1111  {
1112  EXCEPTION("Unlimited dimension already set. Cannot be defined twice");
1113  }
1114 
1115  if (!mIsInDefineMode)
1116  {
1117  EXCEPTION("Cannot define variables when not in Define mode");
1118  }
1119 
1120  this->mIsUnlimitedDimensionSet = true;
1121  this->mUnlimitedDimensionName = rVariableName;
1122  this->mUnlimitedDimensionUnit = rVariableUnits;
1123  mEstimatedUnlimitedLength = estimatedLength;
1124 }
1125 
1127 {
1129  {
1130  EXCEPTION("Trying to advance along an unlimited dimension without having defined any");
1131  }
1132 
1133  mCurrentTimeStep++;
1134 
1135  /*
1136  * Write when stepping over a chunk boundary. Note: NOT the same as write
1137  * out when the chunk size == the cache size, because we might have started
1138  * part-way through a chunk.
1139  */
1140  if (mUseCache && (mCurrentTimeStep % mChunkSize[0] == 0))
1141  {
1142  WriteCache();
1143  }
1144 
1145  /*
1146  * Extend the dataset (only reached when adding to an existing dataset,
1147  * or if mEstimatedUnlimitedLength hasn't been set and has defaulted to 1).
1148  */
1149  if (mCurrentTimeStep >= (long unsigned)mEstimatedUnlimitedLength)
1150  {
1151  mDatasetDims[0]++;
1152  mNeedExtend = true;
1153  }
1154 }
1155 
1157 {
1158  if (mNeedExtend)
1159  {
1160  H5Dset_extent(mVariablesDatasetId, mDatasetDims);
1161  H5Dset_extent(mUnlimitedDatasetId, mDatasetDims);
1162  }
1163  mNeedExtend = false;
1164 }
1165 
1167 {
1168  // Set internal counter to 0
1169  mCurrentTimeStep = 0;
1170  // Set dataset to 1 x nodes x vars
1171  mDatasetDims[0] = 1;
1172  mNeedExtend = 1;
1173  PossiblyExtend(); // Abusing the notation here, this is probably a contraction.
1174 }
1175 
1176 int Hdf5DataWriter::GetVariableByName(const std::string& rVariableName)
1177 {
1178  int id = -1;
1179 
1180  // Check for the variable name in the existing variables
1181  for (unsigned index = 0; index < mVariables.size(); index++)
1182  {
1183  if (mVariables[index].mVariableName == rVariableName)
1184  {
1185  id = index;
1186  break;
1187  }
1188  }
1189  if (id == -1)
1190  {
1191  EXCEPTION("Variable does not exist in hdf5 definitions.");
1192  }
1193  return id;
1194 }
1195 
1196 bool Hdf5DataWriter::ApplyPermutation(const std::vector<unsigned>& rPermutation, bool unsafeExtendingMode)
1197 {
1198  if (unsafeExtendingMode == false && !mIsInDefineMode)
1199  {
1200  EXCEPTION("Cannot define permutation when not in Define mode");
1201  }
1202 
1203  if (rPermutation.empty())
1204  {
1205  return false;
1206  }
1207 
1208  assert(mFileFixedDimensionSize == mDataFixedDimensionSize); // (undoing) permutations only works when we are outputting all nodes.
1209 
1210  if (rPermutation.size() != mFileFixedDimensionSize || rPermutation.size() != mDataFixedDimensionSize)
1211  {
1212  EXCEPTION("Permutation doesn't match the expected problem size of " << mFileFixedDimensionSize);
1213  }
1214 
1215  // Permutation checker
1216  std::set<unsigned> permutation_pigeon_hole;
1217 
1218  // Fill up the pigeon holes
1219  bool identity_map = true;
1220  for (unsigned i = 0; i < mDataFixedDimensionSize; i++)
1221  {
1222  permutation_pigeon_hole.insert(rPermutation[i]);
1223  if (identity_map && i != rPermutation[i])
1224  {
1225  identity_map = false;
1226  }
1227  }
1228  if (identity_map)
1229  {
1230  // Do nothing
1231  return false;
1232  }
1233 
1234  /*
1235  * The pigeon-hole principle states that each index appears exactly once
1236  * so if any don't appear then either one appears twice or something out of
1237  * scope has appeared.
1238  */
1239  for (unsigned i = 0; i < mDataFixedDimensionSize; i++)
1240  {
1241  if (permutation_pigeon_hole.count(i) != 1u)
1242  {
1243  EXCEPTION("Permutation vector doesn't contain a valid permutation");
1244  }
1245  }
1246  // Make sure we've not done it already
1247  assert(mSinglePermutation == nullptr);
1248  assert(mDoublePermutation == nullptr);
1249  PetscTools::SetupMat(mSinglePermutation, mDataFixedDimensionSize, mDataFixedDimensionSize, 2, mHi - mLo, mHi - mLo);
1250  PetscTools::SetupMat(mDoublePermutation, 2 * mDataFixedDimensionSize, 2 * mDataFixedDimensionSize, 4, 2 * (mHi - mLo), 2 * (mHi - mLo));
1251 #if (PETSC_VERSION_MAJOR == 3) //PETSc 3.x.x
1252  MatSetOption(mSinglePermutation, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE);
1253  MatSetOption(mDoublePermutation, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE);
1254 #else
1255  MatSetOption(mSinglePermutation, MAT_IGNORE_OFF_PROC_ENTRIES);
1256  MatSetOption(mDoublePermutation, MAT_IGNORE_OFF_PROC_ENTRIES);
1257 #endif
1258  // Only do local rows
1259  for (unsigned row_index = mLo; row_index < mHi; row_index++)
1260  {
1261  // Put zero on the diagonal
1262  MatSetValue(mSinglePermutation, row_index, row_index, 0.0, INSERT_VALUES);
1263 
1264  // Put one at (i,j)
1265  MatSetValue(mSinglePermutation, row_index, rPermutation[row_index], 1.0, INSERT_VALUES);
1266 
1267  unsigned bi_index = 2 * row_index;
1268  unsigned perm_index = 2 * rPermutation[row_index];
1269 
1270  // Put zeroes on the diagonal
1271  MatSetValue(mDoublePermutation, bi_index, bi_index, 0.0, INSERT_VALUES);
1272  MatSetValue(mDoublePermutation, bi_index + 1, bi_index + 1, 0.0, INSERT_VALUES);
1273 
1274  // Put ones at (i,j)
1275  MatSetValue(mDoublePermutation, bi_index, perm_index, 1.0, INSERT_VALUES);
1276  MatSetValue(mDoublePermutation, bi_index + 1, perm_index + 1, 1.0, INSERT_VALUES);
1277  }
1278  MatAssemblyBegin(mSinglePermutation, MAT_FINAL_ASSEMBLY);
1279  MatAssemblyBegin(mDoublePermutation, MAT_FINAL_ASSEMBLY);
1280  MatAssemblyEnd(mSinglePermutation, MAT_FINAL_ASSEMBLY);
1281  MatAssemblyEnd(mDoublePermutation, MAT_FINAL_ASSEMBLY);
1282  return true;
1283 }
1284 
1285 void Hdf5DataWriter::SetFixedChunkSize(const unsigned& rTimestepsPerChunk,
1286  const unsigned& rNodesPerChunk,
1287  const unsigned& rVariablesPerChunk)
1288 {
1289  assert(mIsInDefineMode);
1290 
1292  mFixedChunkSize[0] = rTimestepsPerChunk;
1293  mFixedChunkSize[1] = rNodesPerChunk;
1294  mFixedChunkSize[2] = rVariablesPerChunk;
1295 }
1296 
1298 {
1299  // Number of chunks for istore_k optimisation
1300  hsize_t num_chunks = 1;
1301  for (unsigned i = 0; i < DATASET_DIMS; ++i)
1302  {
1303  num_chunks *= CeilDivide(mDatasetDims[i], mChunkSize[i]);
1304  }
1305  return num_chunks;
1306 }
1307 
1308 void Hdf5DataWriter::CalculateChunkDims(unsigned targetSize, unsigned* pChunkSizeInBytes, bool* pAllOneChunk)
1309 {
1310  bool all_one_chunk = true;
1311  unsigned chunk_size_in_bytes = 8u; // 8 bytes/double
1312  unsigned divisors[DATASET_DIMS];
1313  // Loop over dataset dimensions, dividing each dimension into the integer number of chunks that results
1314  // in the number of entries closest to the targetSize. This means the chunks will span the dataset with
1315  // as little waste as possible.
1316  for (unsigned i = 0; i < DATASET_DIMS; ++i)
1317  {
1318  // What do I divide the dataset by to get targetSize entries per chunk?
1319  divisors[i] = CeilDivide(mDatasetDims[i], targetSize);
1320  // If I divide my dataset into divisors pieces, how big is each chunk?
1321  mChunkSize[i] = CeilDivide(mDatasetDims[i], divisors[i]);
1322  // If my chunks are this big, how many bytes is that?
1323  chunk_size_in_bytes *= mChunkSize[i];
1324  // Check if all divisors==1, which means we have one big chunk.
1325  all_one_chunk = all_one_chunk && divisors[i] == 1u;
1326  }
1327  // Update pointers
1328  *pChunkSizeInBytes = chunk_size_in_bytes;
1329  *pAllOneChunk = all_one_chunk;
1330 }
1331 
1333 {
1335  {
1336  const unsigned target_size_in_bytes = mChunkTargetSize;
1337 
1338  unsigned target_size = 0;
1339  unsigned chunk_size_in_bytes;
1340  bool all_one_chunk;
1341 
1342  // While the chunks are too small, make mChunkSize[i]s larger, unless
1343  // we end up with a chunk that spans the dataset.
1344  do
1345  {
1346  target_size++;
1347  CalculateChunkDims(target_size, &chunk_size_in_bytes, &all_one_chunk);
1348  }
1349  while ( chunk_size_in_bytes < target_size_in_bytes && !all_one_chunk);
1350 
1351  // Go one step back if the target size has been exceeded
1352  if (chunk_size_in_bytes > target_size_in_bytes && !all_one_chunk)
1353  {
1354  target_size--;
1355  CalculateChunkDims(target_size, &chunk_size_in_bytes, &all_one_chunk);
1356  }
1357  }
1358  else
1359  {
1360  /*
1361  * User-provided chunk dims.
1362  */
1363  for (unsigned i = 0; i < DATASET_DIMS; ++i)
1364  {
1365  mChunkSize[i] = mFixedChunkSize[i];
1366  }
1367  }
1368 
1370 
1371  /*
1372  if (PetscTools::AmMaster())
1373  {
1374  std::cout << "Hdf5DataWriter dataset contains " << mNumberOfChunks << " chunks of " << chunk_size_in_bytes << " B." << std::endl;
1375  }
1376  */
1377 }
1378 
1380 {
1381  if (!mIsInDefineMode)
1382  {
1383  /* Must be in define mode, i.e. creating a new dataset (and possibly a
1384  * new HDF5 file) to set the dataset chunk dims. */
1385  EXCEPTION("Cannot set chunk target size when not in define mode.");
1386  }
1387  mChunkTargetSize = targetSize;
1388 }
1389 
1391 {
1392  /* Note: calling this method after OpenFile() is pointless as that's where
1393  * H5Pset_alignment happens, so the obvious way to protect this method is
1394  * to check mFileId to assert H5Fopen has not been called yet.
1395  * Unfortunately it's uninitialised so is not always safe to check!
1396  *
1397  * Fortunately OpenFile() is only called in one of two ways:
1398  * 1. In the constructor in combination with mUseExistingFile. Subsequently
1399  * calling this method will end up throwing in the first block below
1400  * (since mUseExistingFile is const).
1401  * 2. By EndDefineMode(). Subsequently calling this method will end up in
1402  * the second block below.
1403  */
1404  if (mUseExistingFile)
1405  {
1406  // Must be called on new HDF5 files.
1407  EXCEPTION("Alignment parameter can only be set for new HDF5 files.");
1408  }
1409  if (!mIsInDefineMode)
1410  {
1411  // Creating a new file but EndDefineMode() called already.
1412  EXCEPTION("Cannot set alignment parameter when not in define mode.");
1413  }
1414 
1415  mAlignment = alignment;
1416 }
void ComputeIncompleteOffset()
std::string GetAbsolutePath() const
Definition: FileFinder.cpp:224
hsize_t CalculateNumberOfChunks()
bool ApplyPermutation(const std::vector< unsigned > &rPermutation, bool unsafeExtendingMode=false)
bool mUseOptimalChunkSizeAlgorithm
std::string mUnlimitedDimensionName
static bool ReplicateBool(bool flag)
Definition: PetscTools.cpp:190
void CheckVariableName(const std::string &rName)
const bool mUseExistingFile
int GetVariableByName(const std::string &rVariableName)
unsigned CeilDivide(unsigned numerator, unsigned denominator)
hsize_t mChunkTargetSize
std::string mUnlimitedDimensionUnit
bool Exists() const
Definition: FileFinder.cpp:183
#define EXCEPTION(message)
Definition: Exception.hpp:143
void CalculateChunkDims(unsigned targetSize, unsigned *pChunkSizeInBytes, bool *pAllOneChunk)
static bool AmMaster()
Definition: PetscTools.cpp:120
std::vector< unsigned > mIncompletePermIndices
void SetTargetChunkSize(hsize_t targetSize)
bool DoesDatasetExist(const std::string &rDatasetName)
hsize_t mDatasetDims[DATASET_DIMS]
const bool mCleanDirectory
void SetFixedChunkSize(const unsigned &rTimestepsPerChunk, const unsigned &rNodesPerChunk, const unsigned &rVariablesPerChunk)
unsigned mEstimatedUnlimitedLength
long unsigned mCacheFirstTimeStep
void AdvanceAlongUnlimitedDimension()
std::vector< unsigned > mIncompleteNodeIndices
DistributedVectorFactory & mrVectorFactory
hsize_t mFixedChunkSize[DATASET_DIMS]
int DefineVariable(const std::string &rVariableName, const std::string &rVariableUnits)
static void SetupMat(Mat &rMat, int numRows, int numColumns, unsigned rowPreallocation, int numLocalRows=PETSC_DECIDE, int numLocalColumns=PETSC_DECIDE, bool ignoreOffProcEntries=true, bool newAllocationError=true)
Definition: PetscTools.cpp:272
void PutVector(int variableID, Vec petscVector)
static void Destroy(Vec &rVec)
Definition: PetscTools.hpp:352
unsigned mNumberOwned
std::vector< double > mDataCache
void DefineFixedDimension(long dimensionSize)
Hdf5DataWriter(DistributedVectorFactory &rVectorFactory, const std::string &rDirectory, const std::string &rBaseName, bool cleanDirectory=true, bool extendData=false, std::string datasetName="Data", bool useCache=false)
void SetAlignment(hsize_t alignment)
static std::string GetProvenanceString()
virtual ~Hdf5DataWriter()
void CheckUnitsName(const std::string &rName)
void PutStripedVector(std::vector< int > variableIDs, Vec petscVector)
std::vector< DataWriterVariable > mVariables
void PutUnlimitedVariable(double value)
static const unsigned DATASET_DIMS
hsize_t mNumberOfChunks
unsigned mDataFixedDimensionSize
long unsigned mCurrentTimeStep
hsize_t mChunkSize[DATASET_DIMS]
unsigned mFileFixedDimensionSize
virtual void EndDefineMode()
void DefineUnlimitedDimension(const std::string &rVariableName, const std::string &rVariableUnits, unsigned estimatedLength=1)