Chaste Commit::baa90ac2819b962188b7562f2326be23c47859a7
Hdf5DataReader.cpp
1/*
2
3Copyright (c) 2005-2024, University of Oxford.
4All rights reserved.
5
6University of Oxford means the Chancellor, Masters and Scholars of the
7University of Oxford, having an administrative office at Wellington
8Square, Oxford OX1 2JD, UK.
9
10This file is part of Chaste.
11
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19 * Neither the name of the University of Oxford nor the names of its
20 contributors may be used to endorse or promote products derived from this
21 software without specific prior written permission.
22
23THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34*/
35
36#include "Hdf5DataReader.hpp"
37#include "Exception.hpp"
38#include "OutputFileHandler.hpp"
39#include "PetscTools.hpp"
40
41#include <cassert>
42#include <algorithm>
43
44Hdf5DataReader::Hdf5DataReader(const std::string& rDirectory,
45 const std::string& rBaseName,
46 bool makeAbsolute,
47 std::string datasetName)
48 : AbstractHdf5Access(rDirectory, rBaseName, datasetName, makeAbsolute),
49 mNumberTimesteps(1),
50 mClosed(false)
51{
53}
54
56 const std::string& rBaseName,
57 std::string datasetName)
58 : AbstractHdf5Access(rDirectory, rBaseName, datasetName),
59 mNumberTimesteps(1),
60 mClosed(false)
61{
63}
64
66{
67 if (!mDirectory.IsDir() || !mDirectory.Exists())
68 {
69 EXCEPTION("Directory does not exist: " + mDirectory.GetAbsolutePath());
70 }
71
72 std::string file_name = mDirectory.GetAbsolutePath() + mBaseName + ".h5";
73 FileFinder h5_file(file_name, RelativeTo::Absolute);
74
75 if (!h5_file.Exists())
76 {
77 EXCEPTION("Hdf5DataReader could not open " + file_name + " , as it does not exist.");
78 }
79
80 // Open the file and the main dataset
81 mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
82
83 if (mFileId <= 0)
84 {
85 EXCEPTION("Hdf5DataReader could not open " << file_name <<
86 " , H5Fopen error code = " << mFileId);
87 }
88
89 mVariablesDatasetId = H5Dopen(mFileId, mDatasetName.c_str(), H5P_DEFAULT);
91
92 if (mVariablesDatasetId <= 0)
93 {
94 H5Fclose(mFileId);
95 EXCEPTION("Hdf5DataReader opened " << file_name << " but could not find the dataset '" <<
96 mDatasetName.c_str() << "', H5Dopen error code = " << mVariablesDatasetId);
97 }
98
99 hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
100 mVariablesDatasetRank = H5Sget_simple_extent_ndims(variables_dataspace);
101
102 // Get the dataset/dataspace dimensions
103 hsize_t dataset_max_sizes[AbstractHdf5Access::DATASET_DIMS];
104 H5Sget_simple_extent_dims(variables_dataspace, mDatasetDims, dataset_max_sizes);
105
106 for (unsigned i=1; i<AbstractHdf5Access::DATASET_DIMS; i++) // Zero is excluded since it may be unlimited
107 {
108 assert(mDatasetDims[i] == dataset_max_sizes[i]);
109 }
110
111 // Check if an unlimited dimension has been defined
112 if (dataset_max_sizes[0] == H5S_UNLIMITED)
113 {
114 // In terms of an Unlimited dimension dataset:
115 // * Files pre - r16738 (inc. Release 3.1 and earlier) use simply "Time" for "Data"'s unlimited variable.
116 // * Files generated by r16738 - r18257 used "<DatasetName>_Time" for "<DatasetName>"'s unlimited variable,
117 // - These are not to be used and there is no backwards compatibility for them, since they weren't in a release.
118 // * Files post r18257 (inc. Release 3.2 onwards) use "<DatasetName>_Unlimited" for "<DatasetName>"'s
119 // unlimited variable,
120 // - a new attribute "Name" has been added to the Unlimited Dataset to allow it to assign
121 // any name to the unlimited variable. Which can then be easily read by this class.
122 // - if this dataset is missing we look for simply "Time" to remain backwards compatible with Releases <= 3.1
124
125 hid_t timestep_dataspace = H5Dget_space(mUnlimitedDatasetId);
126
127 // Get the dataset/dataspace dimensions
128 H5Sget_simple_extent_dims(timestep_dataspace, &mNumberTimesteps, nullptr);
129 }
130
131 // Get the attribute where the name of the variables are stored
132 hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
133
134 // Get attribute datatype, dataspace, rank, and dimensions
135 hid_t attribute_type = H5Aget_type(attribute_id);
136 hid_t attribute_space = H5Aget_space(attribute_id);
137
138 hsize_t attr_dataspace_dim;
139 H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, nullptr);
140
141 unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
142 char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
143 H5Aread(attribute_id, attribute_type, string_array);
144
145 // Loop over column names and store them.
146 for (unsigned index=0; index < num_columns; index++)
147 {
148 // Read the string from the raw vector
149 std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
150
151 // Find beginning of unit definition.
152 size_t name_length = column_name_unit.find('(');
153 size_t unit_length = column_name_unit.find(')') - name_length - 1;
154
155 std::string column_name = column_name_unit.substr(0, name_length);
156 std::string column_unit = column_name_unit.substr(name_length+1, unit_length);
157
158 mVariableNames.push_back(column_name);
159 mVariableToColumnIndex[column_name] = index;
160 mVariableToUnit[column_name] = column_unit;
161 }
162
163 // Release all the identifiers
164 H5Tclose(attribute_type);
165 H5Sclose(attribute_space);
166 H5Aclose(attribute_id);
167
168 // Free allocated memory
169 free(string_array);
170
171 // Find out if it's incomplete data
172 H5E_BEGIN_TRY //Supress HDF5 error if the IsDataComplete name isn't there
173 {
174 attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
175 }
176 H5E_END_TRY;
177 if (attribute_id < 0)
178 {
179 // This is in the old format (before we added the IsDataComplete attribute).
180 // Just quit (leaving a nasty hdf5 error).
181 return;
182 }
183
184 attribute_type = H5Aget_type(attribute_id);
185 attribute_space = H5Aget_space(attribute_id);
186 unsigned is_data_complete;
187 H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
188
189 // Release all the identifiers
190 H5Tclose(attribute_type);
191 H5Sclose(attribute_space);
192 H5Aclose(attribute_id);
193 mIsDataComplete = (is_data_complete == 1) ? true : false;
194
195 if (is_data_complete)
196 {
197 return;
198 }
199
200 // Incomplete data
201 // Read the vector thing
202 attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
203 attribute_type = H5Aget_type(attribute_id);
204 attribute_space = H5Aget_space(attribute_id);
205
206 // Get the dataset/dataspace dimensions
207 unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
208
209 // Read data from hyperslab in the file into the hyperslab in memory
211 mIncompleteNodeIndices.resize(num_node_indices);
212 H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
213
214 H5Tclose(attribute_type);
215 H5Sclose(attribute_space);
216 H5Aclose(attribute_id);
217}
218
219std::vector<double> Hdf5DataReader::GetVariableOverTime(const std::string& rVariableName,
220 unsigned nodeIndex)
221{
223 {
224 EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
225 }
226
227 unsigned actual_node_index = nodeIndex;
228 if (!mIsDataComplete)
229 {
230 unsigned node_index = 0;
231 for (node_index=0; node_index<mIncompleteNodeIndices.size(); node_index++)
232 {
233 if (mIncompleteNodeIndices[node_index]==nodeIndex)
234 {
235 actual_node_index = node_index;
236 break;
237 }
238 }
239 if (node_index == mIncompleteNodeIndices.size())
240 {
241 EXCEPTION("The incomplete dataset '" << mDatasetName << "' does not contain info of node " << nodeIndex);
242 }
243 }
244 if (actual_node_index >= mDatasetDims[1])
245 {
246 EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain info of node " << actual_node_index);
247 }
248
249 std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
250 if (col_iter == mVariableToColumnIndex.end())
251 {
252 EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain data for variable " << rVariableName);
253 }
254 unsigned column_index = (*col_iter).second;
255
256 // Define hyperslab in the dataset.
257 hsize_t offset[3] = {0, actual_node_index, column_index};
258 hsize_t count[3] = {mDatasetDims[0], 1, 1};
259 hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
260 H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, nullptr, count, nullptr);
261
262 // Define a simple memory dataspace
263 hid_t memspace = H5Screate_simple(1, &mDatasetDims[0] ,nullptr);
264
265 // Data buffer to return
266 std::vector<double> ret(mDatasetDims[0]);
267
268 // Read data from hyperslab in the file into the hyperslab in memory
269 H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, &ret[0]);
270
271 H5Sclose(variables_dataspace);
272 H5Sclose(memspace);
273
274 return ret;
275}
276
277std::vector<std::vector<double> > Hdf5DataReader::GetVariableOverTimeOverMultipleNodes(const std::string& rVariableName,
278 unsigned lowerIndex,
279 unsigned upperIndex)
280{
282 {
283 EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
284 }
285
286 if (!mIsDataComplete)
287 {
288 EXCEPTION("GetVariableOverTimeOverMultipleNodes() cannot be called using incomplete data sets (those for which data was only written for certain nodes)");
289 }
290
291 if (upperIndex > mDatasetDims[1])
292 {
293 EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain info for node " << upperIndex-1);
294 }
295
296 std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
297 if (col_iter == mVariableToColumnIndex.end())
298 {
299 EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain data for variable " << rVariableName);
300 }
301 unsigned column_index = (*col_iter).second;
302
303 // Define hyperslab in the dataset.
304 hsize_t offset[3] = {0, lowerIndex, column_index};
305 hsize_t count[3] = {mDatasetDims[0], upperIndex-lowerIndex, 1};
306 hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
307 H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, nullptr, count, nullptr);
308
309 // Define a simple memory dataspace
310 hsize_t data_dimensions[2];
311 data_dimensions[0] = mDatasetDims[0];
312 data_dimensions[1] = upperIndex-lowerIndex;
313 hid_t memspace = H5Screate_simple(2, data_dimensions, nullptr);
314
315 double* data_read = new double[mDatasetDims[0]*(upperIndex-lowerIndex)];
316
317 // Read data from hyperslab in the file into the hyperslab in memory
318 H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, data_read);
319
320 H5Sclose(variables_dataspace);
321 H5Sclose(memspace);
322
323 // Data buffer to return
324 unsigned num_nodes_read = upperIndex-lowerIndex;
325 unsigned num_timesteps = mDatasetDims[0];
326
327 std::vector<std::vector<double> > ret(num_nodes_read);
328
329 for (unsigned node_num=0; node_num<num_nodes_read; node_num++)
330 {
331 ret[node_num].resize(num_timesteps);
332 for (unsigned time_num=0; time_num<num_timesteps; time_num++)
333 {
334 ret[node_num][time_num] = data_read[num_nodes_read*time_num + node_num];
335 }
336 }
337
338 delete[] data_read;
339
340 return ret;
341}
342
344 const std::string& rVariableName,
345 unsigned timestep)
346{
347 if (!mIsDataComplete)
348 {
349 EXCEPTION("You can only get a vector for complete data");
350 }
351 if (!mIsUnlimitedDimensionSet && timestep!=0)
352 {
353 EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
354 }
355
356 std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
357 if (col_iter == mVariableToColumnIndex.end())
358 {
359 EXCEPTION("The dataset '" << mDatasetName << "' does not contain data for variable " << rVariableName);
360 }
361 unsigned column_index = (*col_iter).second;
362
363 // Check for valid timestep
364 if (timestep >= mNumberTimesteps)
365 {
366 EXCEPTION("The dataset '" << mDatasetName << "' does not contain data for timestep number " << timestep);
367 }
368
369 int lo, hi, size;
370 VecGetSize(data, &size);
371 if ((unsigned)size != mDatasetDims[1])
372 {
373 EXCEPTION("Could not read data because Vec is the wrong size");
374 }
375 // Get range owned by each processor
376 VecGetOwnershipRange(data, &lo, &hi);
377
378 if (hi > lo) // i.e. we own some...
379 {
380 // Define a dataset in memory for this process
381 hsize_t v_size[1] = {(unsigned)(hi-lo)};
382 hid_t memspace = H5Screate_simple(1, v_size, nullptr);
383
384 // Select hyperslab in the file.
385 hsize_t offset[3] = {timestep, (unsigned)(lo), column_index};
386 hsize_t count[3] = {1, (unsigned)(hi-lo), 1};
387 hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
388 H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, nullptr, count, nullptr);
389
390 double* p_petsc_vector;
391 VecGetArray(data, &p_petsc_vector);
392
393 herr_t err = H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, p_petsc_vector);
394 UNUSED_OPT(err);
395 assert(err==0);
396
397 VecRestoreArray(data, &p_petsc_vector);
398
399 H5Sclose(hyperslab_space);
400 H5Sclose(memspace);
401 }
402}
403
405{
406 // Data buffer to return
407 std::vector<double> ret(mNumberTimesteps);
408
410 {
411 // Fake it
412 assert(mNumberTimesteps==1);
413 ret[0] = 0.0;
414 return ret;
415 }
416
417 // Read data from hyperslab in the file into memory
418 H5Dread(mUnlimitedDatasetId, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ret[0]);
419
420 return ret;
421}
422
424{
425 if (!mClosed)
426 {
427 H5Dclose(mVariablesDatasetId);
429 {
430 H5Dclose(mUnlimitedDatasetId);
431 }
432 H5Fclose(mFileId);
433 mClosed = true;
434 }
435}
436
441
443{
444 return mDatasetDims[1];
445}
446
447std::vector<std::string> Hdf5DataReader::GetVariableNames()
448{
449 return mVariableNames;
450}
451
452std::string Hdf5DataReader::GetUnit(const std::string& rVariableName)
453{
454 return mVariableToUnit[rVariableName];
455}
456
457
#define EXCEPTION(message)
#define UNUSED_OPT(var)
std::vector< unsigned > mIncompleteNodeIndices
hsize_t mDatasetDims[DATASET_DIMS]
static const unsigned DATASET_DIMS
std::string GetAbsolutePath() const
bool IsDir() const
bool Exists() const
std::map< std::string, std::string > mVariableToUnit
std::string GetUnit(const std::string &rVariableName)
std::vector< std::string > mVariableNames
std::map< std::string, unsigned > mVariableToColumnIndex
std::vector< std::vector< double > > GetVariableOverTimeOverMultipleNodes(const std::string &rVariableName, unsigned lowerIndex, unsigned upperIndex)
unsigned GetNumberOfRows()
void GetVariableOverNodes(Vec data, const std::string &rVariableName, unsigned timestep=0)
unsigned mVariablesDatasetRank
Hdf5DataReader(const std::string &rDirectory, const std::string &rBaseName, bool makeAbsolute=true, std::string datasetName="Data")
std::vector< double > GetUnlimitedDimensionValues()
std::vector< std::string > GetVariableNames()
std::vector< double > GetVariableOverTime(const std::string &rVariableName, unsigned nodeIndex)