Chaste  Release::2024.1
Hdf5DataWriter.hpp
1 /*
2 
3 Copyright (c) 2005-2021, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #ifndef HDF5DATAWRITER_HPP_
37 #define HDF5DATAWRITER_HPP_
38 
39 #include <vector>
40 
41 #include "AbstractHdf5Access.hpp"
42 #include "DataWriterVariable.hpp"
43 #include "DistributedVectorFactory.hpp"
44 
48 class Hdf5DataWriter : public AbstractHdf5Access //: public AbstractDataWriter
49 {
50  friend class TestHdf5DataWriter;
51 private:
52 
55 
56  const bool mCleanDirectory;
57  const bool mUseExistingFile;
64  unsigned mLo;
65  unsigned mHi;
66  unsigned mNumberOwned;
67  unsigned mOffset;
68  std::vector<unsigned> mIncompletePermIndices;
69  bool mNeedExtend;
71  std::vector<DataWriterVariable> mVariables;
73  long unsigned mCurrentTimeStep;
86  bool mUseCache;
87  long unsigned mCacheFirstTimeStep;
88  std::vector<double> mDataCache;
95  void CheckVariableName(const std::string& rName);
96 
102  void CheckUnitsName(const std::string& rName);
103 
108 
114  void OpenFile();
115 
121 
130  void CalculateChunkDims( unsigned targetSize, unsigned* pChunkSizeInBytes, bool* pAllOneChunk );
131 
140  void SetChunkSize();
141 
142 public:
143 
160  const std::string& rDirectory,
161  const std::string& rBaseName,
162  bool cleanDirectory=true,
163  bool extendData=false,
164  std::string datasetName="Data",
165  bool useCache=false);
166 
170  virtual ~Hdf5DataWriter();
171 
177  void DefineFixedDimension(long dimensionSize);
178 
186  void DefineFixedDimension(const std::vector<unsigned>& rNodesToOuputOriginalIndices, const std::vector<unsigned>& rNodesToOuputPermutedIndices, long vecSize);
187 
196  void DefineUnlimitedDimension(const std::string& rVariableName, const std::string& rVariableUnits, unsigned estimatedLength = 1);
197 
203 
212  int DefineVariable(const std::string& rVariableName, const std::string& rVariableUnits);
213 
223  bool IsInDefineMode();
224 
228  virtual void EndDefineMode();
229 
233  void PossiblyExtend();
234 
241  void EmptyDataset();
242 
249  void PutVector(int variableID, Vec petscVector);
250 
257  void PutStripedVector(std::vector<int> variableIDs, Vec petscVector);
258 
263  bool GetUsingCache();
264 
268  void WriteCache();
269 
275  void PutUnlimitedVariable(double value);
276 
280  void Close();
281 
289  int GetVariableByName(const std::string& rVariableName);
290 
291 
300  bool ApplyPermutation(const std::vector<unsigned>& rPermutation, bool unsafeExtendingMode=false);
301 
316  void SetFixedChunkSize(const unsigned& rTimestepsPerChunk,
317  const unsigned& rNodesPerChunk,
318  const unsigned& rVariablesPerChunk);
319 
320  /*
321  * * NOTES ON CHUNK SIZE AND ALIGNMENT *
322  *
323  * The default target chunk size is 128 K, which seems to be a good compromise
324  * for small problems (e.g. on a desktop PC). For larger problems, I/O
325  * performance often improves with increased chunk size. A sweet spot seems to
326  * be 1 M chunks.
327  *
328  * On a striped filesystem, for best performance set the chunk size and
329  * alignment (using `H5Pset_alignment` above) to the file stripe size. With
330  * `H5Pset_alignment`, every chunk starts at a multiple of the alignment value.
331  *
332  * To avoid wasting space, the chunk size should be an integer multiple of the
333  * alignment value. Note that the algorithm below automatically goes back one
334  * step after exceeding the chunk size, which minimises wasted space. To see
335  * why, consider the examples below.
336  *
337  * (Example 1) Say our file system uses 1 M stripes. If we set
338  * target_size_in_bytes = 1024*1024;
339  * below and uncomment
340  * H5Pset_alignment(fapl, 0, 1024*1024);
341  * above, i.e. aim for (slightly under) 1 M chunks and align them to 1 M
342  * boundaries, then the algorithm below will get as close as possible to 1 M
343  * chunks but not exceed it, so each chunk will be padded slightly to sit on
344  * the 1 M boundaries. Each chunk will therefore have its own stripe on the
345  * file system, which should give us the best bandwidth and least contention.
346  * Conclusion: this is optimal!
347  *
348  * Note: In general the algorithm can get very close to the target so the
349  * waste isn't bad. Typical utilization is 99.99% (check with "h5ls -v ...").
350  *
351  * (Example 2) We set
352  * target_size_in_bytes = 128*1024;
353  * and uncomment
354  * H5Pset_alignment(fapl, 0, 1024*1024);
355  * i.e. 128 K chunks aligned to 1 M boundaries. This would pad every chunk to
356  * 1 M boundaries, wasting 7/8 of the space in the file! A file which might be
357  * 5 G with an efficient layout would be more like 40 G! Conclusion: setting
358  * the chunk size to less than the alignment value is very bad!
359  *
360  * (Example 3) Say our file system uses 1 M stripes. We set
361  * target_size_in_bytes = 2*1024*1024;
362  * and uncomment
363  * H5Pset_alignment(fapl, 0, 1024*1024);
364  * i.e. 2 M chunks aligned to 1 M boundaries. This might not be optimal, but
365  * it's OK, since the chunk size is (slightly under) twice the alignment, as in
366  * Example 1 the amount of padding would be very small. Each read/write would
367  * require accessing 2 stripes on the file system. Conclusion: a chunk size of
368  * an integer multiple of the alignment value is fine (but not optimal).
369  */
370 
384  void SetTargetChunkSize(hsize_t targetSize);
385 
400  void SetAlignment(hsize_t alignment);
401 };
402 
403 #endif /*HDF5DATAWRITER_HPP_*/
void ComputeIncompleteOffset()
hsize_t CalculateNumberOfChunks()
bool ApplyPermutation(const std::vector< unsigned > &rPermutation, bool unsafeExtendingMode=false)
bool mUseOptimalChunkSizeAlgorithm
void CheckVariableName(const std::string &rName)
const bool mUseExistingFile
int GetVariableByName(const std::string &rVariableName)
hsize_t mChunkTargetSize
void CalculateChunkDims(unsigned targetSize, unsigned *pChunkSizeInBytes, bool *pAllOneChunk)
std::vector< unsigned > mIncompletePermIndices
void SetTargetChunkSize(hsize_t targetSize)
const bool mCleanDirectory
void SetFixedChunkSize(const unsigned &rTimestepsPerChunk, const unsigned &rNodesPerChunk, const unsigned &rVariablesPerChunk)
unsigned mEstimatedUnlimitedLength
long unsigned mCacheFirstTimeStep
void AdvanceAlongUnlimitedDimension()
DistributedVectorFactory & mrVectorFactory
hsize_t mFixedChunkSize[DATASET_DIMS]
int DefineVariable(const std::string &rVariableName, const std::string &rVariableUnits)
void PutVector(int variableID, Vec petscVector)
unsigned mNumberOwned
std::vector< double > mDataCache
void DefineFixedDimension(long dimensionSize)
Hdf5DataWriter(DistributedVectorFactory &rVectorFactory, const std::string &rDirectory, const std::string &rBaseName, bool cleanDirectory=true, bool extendData=false, std::string datasetName="Data", bool useCache=false)
void SetAlignment(hsize_t alignment)
virtual ~Hdf5DataWriter()
void CheckUnitsName(const std::string &rName)
void PutStripedVector(std::vector< int > variableIDs, Vec petscVector)
std::vector< DataWriterVariable > mVariables
void PutUnlimitedVariable(double value)
static const unsigned DATASET_DIMS
hsize_t mNumberOfChunks
unsigned mDataFixedDimensionSize
long unsigned mCurrentTimeStep
hsize_t mChunkSize[DATASET_DIMS]
unsigned mFileFixedDimensionSize
virtual void EndDefineMode()
void DefineUnlimitedDimension(const std::string &rVariableName, const std::string &rVariableUnits, unsigned estimatedLength=1)