Chaste Commit::baa90ac2819b962188b7562f2326be23c47859a7
Hdf5DataWriter.hpp
1/*
2
3Copyright (c) 2005-2024, University of Oxford.
4All rights reserved.
5
6University of Oxford means the Chancellor, Masters and Scholars of the
7University of Oxford, having an administrative office at Wellington
8Square, Oxford OX1 2JD, UK.
9
10This file is part of Chaste.
11
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19 * Neither the name of the University of Oxford nor the names of its
20 contributors may be used to endorse or promote products derived from this
21 software without specific prior written permission.
22
23THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34*/
35
36#ifndef HDF5DATAWRITER_HPP_
37#define HDF5DATAWRITER_HPP_
38
39#include <vector>
40
41#include "AbstractHdf5Access.hpp"
42#include "DataWriterVariable.hpp"
43#include "DistributedVectorFactory.hpp"
44
48class Hdf5DataWriter : public AbstractHdf5Access //: public AbstractDataWriter
49{
50 friend class TestHdf5DataWriter;
51private:
52
55
56 const bool mCleanDirectory;
57 const bool mUseExistingFile;
64 unsigned mLo;
65 unsigned mHi;
66 unsigned mNumberOwned;
67 unsigned mOffset;
68 std::vector<unsigned> mIncompletePermIndices;
71 std::vector<DataWriterVariable> mVariables;
73 long unsigned mCurrentTimeStep;
86 bool mUseCache;
87 long unsigned mCacheFirstTimeStep;
88 std::vector<double> mDataCache;
95 void CheckVariableName(const std::string& rName);
96
102 void CheckUnitsName(const std::string& rName);
103
108
114 void OpenFile();
115
121
130 void CalculateChunkDims( unsigned targetSize, unsigned* pChunkSizeInBytes, bool* pAllOneChunk );
131
140 void SetChunkSize();
141
142public:
143
160 const std::string& rDirectory,
161 const std::string& rBaseName,
162 bool cleanDirectory=true,
163 bool extendData=false,
164 std::string datasetName="Data",
165 bool useCache=false);
166
170 virtual ~Hdf5DataWriter();
171
177 void DefineFixedDimension(long dimensionSize);
178
186 void DefineFixedDimension(const std::vector<unsigned>& rNodesToOuputOriginalIndices, const std::vector<unsigned>& rNodesToOuputPermutedIndices, long vecSize);
187
196 void DefineUnlimitedDimension(const std::string& rVariableName, const std::string& rVariableUnits, unsigned estimatedLength = 1);
197
203
212 int DefineVariable(const std::string& rVariableName, const std::string& rVariableUnits);
213
223 bool IsInDefineMode();
224
228 virtual void EndDefineMode();
229
233 void PossiblyExtend();
234
241 void EmptyDataset();
242
249 void PutVector(int variableID, Vec petscVector);
250
257 void PutStripedVector(std::vector<int> variableIDs, Vec petscVector);
258
263 bool GetUsingCache();
264
268 void WriteCache();
269
275 void PutUnlimitedVariable(double value);
276
280 void Close();
281
289 int GetVariableByName(const std::string& rVariableName);
290
291
300 bool ApplyPermutation(const std::vector<unsigned>& rPermutation, bool unsafeExtendingMode=false);
301
316 void SetFixedChunkSize(const unsigned& rTimestepsPerChunk,
317 const unsigned& rNodesPerChunk,
318 const unsigned& rVariablesPerChunk);
319
320 /*
321 * * NOTES ON CHUNK SIZE AND ALIGNMENT *
322 *
323 * The default target chunk size is 128 K, which seems to be a good compromise
324 * for small problems (e.g. on a desktop PC). For larger problems, I/O
325 * performance often improves with increased chunk size. A sweet spot seems to
326 * be 1 M chunks.
327 *
328 * On a striped filesystem, for best performance set the chunk size and
329 * alignment (using `H5Pset_alignment` above) to the file stripe size. With
330 * `H5Pset_alignment`, every chunk starts at a multiple of the alignment value.
331 *
332 * To avoid wasting space, the chunk size should be an integer multiple of the
333 * alignment value. Note that the algorithm below automatically goes back one
334 * step after exceeding the chunk size, which minimises wasted space. To see
335 * why, consider the examples below.
336 *
337 * (Example 1) Say our file system uses 1 M stripes. If we set
338 * target_size_in_bytes = 1024*1024;
339 * below and uncomment
340 * H5Pset_alignment(fapl, 0, 1024*1024);
341 * above, i.e. aim for (slightly under) 1 M chunks and align them to 1 M
342 * boundaries, then the algorithm below will get as close as possible to 1 M
343 * chunks but not exceed it, so each chunk will be padded slightly to sit on
344 * the 1 M boundaries. Each chunk will therefore have its own stripe on the
345 * file system, which should give us the best bandwidth and least contention.
346 * Conclusion: this is optimal!
347 *
348 * Note: In general the algorithm can get very close to the target so the
349 * waste isn't bad. Typical utilization is 99.99% (check with "h5ls -v ...").
350 *
351 * (Example 2) We set
352 * target_size_in_bytes = 128*1024;
353 * and uncomment
354 * H5Pset_alignment(fapl, 0, 1024*1024);
355 * i.e. 128 K chunks aligned to 1 M boundaries. This would pad every chunk to
356 * 1 M boundaries, wasting 7/8 of the space in the file! A file which might be
357 * 5 G with an efficient layout would be more like 40 G! Conclusion: setting
358 * the chunk size to less than the alignment value is very bad!
359 *
360 * (Example 3) Say our file system uses 1 M stripes. We set
361 * target_size_in_bytes = 2*1024*1024;
362 * and uncomment
363 * H5Pset_alignment(fapl, 0, 1024*1024);
364 * i.e. 2 M chunks aligned to 1 M boundaries. This might not be optimal, but
365 * it's OK, since the chunk size is (slightly under) twice the alignment, as in
366 * Example 1 the amount of padding would be very small. Each read/write would
367 * require accessing 2 stripes on the file system. Conclusion: a chunk size of
368 * an integer multiple of the alignment value is fine (but not optimal).
369 */
370
384 void SetTargetChunkSize(hsize_t targetSize);
385
400 void SetAlignment(hsize_t alignment);
401};
402
403#endif /*HDF5DATAWRITER_HPP_*/
static const unsigned DATASET_DIMS
void CheckVariableName(const std::string &rName)
std::vector< double > mDataCache
void SetTargetChunkSize(hsize_t targetSize)
hsize_t mChunkSize[DATASET_DIMS]
void ComputeIncompleteOffset()
void DefineUnlimitedDimension(const std::string &rVariableName, const std::string &rVariableUnits, unsigned estimatedLength=1)
DistributedVectorFactory & mrVectorFactory
void PutUnlimitedVariable(double value)
void AdvanceAlongUnlimitedDimension()
std::vector< unsigned > mIncompletePermIndices
long unsigned mCurrentTimeStep
bool mUseOptimalChunkSizeAlgorithm
const bool mUseExistingFile
unsigned mEstimatedUnlimitedLength
unsigned mFileFixedDimensionSize
int GetVariableByName(const std::string &rVariableName)
bool ApplyPermutation(const std::vector< unsigned > &rPermutation, bool unsafeExtendingMode=false)
std::vector< DataWriterVariable > mVariables
long unsigned mCacheFirstTimeStep
void DefineFixedDimension(long dimensionSize)
unsigned mDataFixedDimensionSize
virtual ~Hdf5DataWriter()
int DefineVariable(const std::string &rVariableName, const std::string &rVariableUnits)
virtual void EndDefineMode()
void PutVector(int variableID, Vec petscVector)
void CalculateChunkDims(unsigned targetSize, unsigned *pChunkSizeInBytes, bool *pAllOneChunk)
void SetAlignment(hsize_t alignment)
void SetFixedChunkSize(const unsigned &rTimestepsPerChunk, const unsigned &rNodesPerChunk, const unsigned &rVariablesPerChunk)
void CheckUnitsName(const std::string &rName)
hsize_t CalculateNumberOfChunks()
void PutStripedVector(std::vector< int > variableIDs, Vec petscVector)
hsize_t mFixedChunkSize[DATASET_DIMS]
const bool mCleanDirectory