Hdf5DataReader.cpp

00001 /*
00002 
00003 Copyright (c) 2005-2015, University of Oxford.
00004 All rights reserved.
00005 
00006 University of Oxford means the Chancellor, Masters and Scholars of the
00007 University of Oxford, having an administrative office at Wellington
00008 Square, Oxford OX1 2JD, UK.
00009 
00010 This file is part of Chaste.
00011 
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014  * Redistributions of source code must retain the above copyright notice,
00015    this list of conditions and the following disclaimer.
00016  * Redistributions in binary form must reproduce the above copyright notice,
00017    this list of conditions and the following disclaimer in the documentation
00018    and/or other materials provided with the distribution.
00019  * Neither the name of the University of Oxford nor the names of its
00020    contributors may be used to endorse or promote products derived from this
00021    software without specific prior written permission.
00022 
00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 
00034 */
00035 
00036 #include "Hdf5DataReader.hpp"
00037 #include "Exception.hpp"
00038 #include "OutputFileHandler.hpp"
00039 #include "PetscTools.hpp"
00040 
00041 #include <cassert>
00042 #include <algorithm>
00043 
00044 Hdf5DataReader::Hdf5DataReader(const std::string& rDirectory,
00045                                const std::string& rBaseName,
00046                                bool makeAbsolute,
00047                                std::string datasetName)
00048     : AbstractHdf5Access(rDirectory, rBaseName, datasetName, makeAbsolute),
00049       mNumberTimesteps(1),
00050       mClosed(false)
00051 {
00052     CommonConstructor();
00053 }
00054 
00055 Hdf5DataReader::Hdf5DataReader(const FileFinder& rDirectory,
00056                                const std::string& rBaseName,
00057                                std::string datasetName)
00058     : AbstractHdf5Access(rDirectory, rBaseName, datasetName),
00059       mNumberTimesteps(1),
00060       mClosed(false)
00061 {
00062     CommonConstructor();
00063 }
00064 
00065 void Hdf5DataReader::CommonConstructor()
00066 {
00067     if (!mDirectory.IsDir() || !mDirectory.Exists())
00068     {
00069         EXCEPTION("Directory does not exist: " + mDirectory.GetAbsolutePath());
00070     }
00071 
00072     std::string file_name = mDirectory.GetAbsolutePath() + mBaseName + ".h5";
00073     FileFinder h5_file(file_name, RelativeTo::Absolute);
00074 
00075     if (!h5_file.Exists())
00076     {
00077         EXCEPTION("Hdf5DataReader could not open " + file_name + " , as it does not exist.");
00078     }
00079 
00080     // Open the file and the main dataset
00081     mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
00082 
00083     if (mFileId <= 0)
00084     {
00085         EXCEPTION("Hdf5DataReader could not open " << file_name <<
00086                   " , H5Fopen error code = " << mFileId);
00087     }
00088 
00089     mVariablesDatasetId = H5Dopen(mFileId, mDatasetName.c_str());
00090     SetMainDatasetRawChunkCache();
00091 
00092     if (mVariablesDatasetId <= 0)
00093     {
00094         H5Fclose(mFileId);
00095         EXCEPTION("Hdf5DataReader opened " << file_name << " but could not find the dataset '" <<
00096                   mDatasetName.c_str() << "', H5Dopen error code = " << mVariablesDatasetId);
00097     }
00098 
00099     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00100     mVariablesDatasetRank = H5Sget_simple_extent_ndims(variables_dataspace);
00101 
00102     // Get the dataset/dataspace dimensions
00103     hsize_t dataset_max_sizes[AbstractHdf5Access::DATASET_DIMS];
00104     H5Sget_simple_extent_dims(variables_dataspace, mDatasetDims, dataset_max_sizes);
00105 
00106     for (unsigned i=1; i<AbstractHdf5Access::DATASET_DIMS; i++)  // Zero is excluded since it may be unlimited
00107     {
00108         assert(mDatasetDims[i] == dataset_max_sizes[i]);
00109     }
00110 
00111     // Check if an unlimited dimension has been defined
00112     if (dataset_max_sizes[0] == H5S_UNLIMITED)
00113     {
00114         // In terms of an Unlimited dimension dataset:
00115         // * Files pre - r16738 (inc. Release 3.1 and earlier) use simply "Time" for "Data"'s unlimited variable.
00116         // * Files generated by r16738 - r18257 used "<DatasetName>_Time" for "<DatasetName>"'s unlimited variable,
00117         //   - These are not to be used and there is no backwards compatibility for them, since they weren't in a release.
00118         // * Files post r18257 (inc. Release 3.2 onwards) use "<DatasetName>_Unlimited" for "<DatasetName>"'s
00119         //   unlimited variable,
00120         //   - a new attribute "Name" has been added to the Unlimited Dataset to allow it to assign
00121         //     any name to the unlimited variable. Which can then be easily read by this class.
00122         //   - if this dataset is missing we look for simply "Time" to remain backwards compatible with Releases <= 3.1
00123         SetUnlimitedDatasetId();
00124 
00125         hid_t timestep_dataspace = H5Dget_space(mUnlimitedDatasetId);
00126 
00127         // Get the dataset/dataspace dimensions
00128         H5Sget_simple_extent_dims(timestep_dataspace, &mNumberTimesteps, NULL);
00129     }
00130 
00131     // Get the attribute where the name of the variables are stored
00132     hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
00133 
00134     // Get attribute datatype, dataspace, rank, and dimensions
00135     hid_t attribute_type  = H5Aget_type(attribute_id);
00136     hid_t attribute_space = H5Aget_space(attribute_id);
00137 
00138     hsize_t attr_dataspace_dim;
00139     H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, NULL);
00140 
00141     unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
00142     char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
00143     H5Aread(attribute_id, attribute_type, string_array);
00144 
00145     // Loop over column names and store them.
00146     for (unsigned index=0; index < num_columns; index++)
00147     {
00148         // Read the string from the raw vector
00149         std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
00150 
00151         // Find beginning of unit definition.
00152         size_t name_length = column_name_unit.find('(');
00153         size_t unit_length = column_name_unit.find(')') - name_length - 1;
00154 
00155         std::string column_name = column_name_unit.substr(0, name_length);
00156         std::string column_unit = column_name_unit.substr(name_length+1, unit_length);
00157 
00158         mVariableNames.push_back(column_name);
00159         mVariableToColumnIndex[column_name] = index;
00160         mVariableToUnit[column_name] = column_unit;
00161     }
00162 
00163     // Release all the identifiers
00164     H5Tclose(attribute_type);
00165     H5Sclose(attribute_space);
00166     H5Aclose(attribute_id);
00167 
00168     // Free allocated memory
00169     free(string_array);
00170 
00171     // Find out if it's incomplete data
00172     H5E_BEGIN_TRY //Supress HDF5 error if the IsDataComplete name isn't there
00173     {
00174         attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
00175     }
00176     H5E_END_TRY;
00177     if (attribute_id < 0)
00178     {
00179         // This is in the old format (before we added the IsDataComplete attribute).
00180         // Just quit (leaving a nasty hdf5 error).
00181         return;
00182     }
00183 
00184     attribute_type  = H5Aget_type(attribute_id);
00185     attribute_space = H5Aget_space(attribute_id);
00186     unsigned is_data_complete;
00187     H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
00188 
00189     // Release all the identifiers
00190     H5Tclose(attribute_type);
00191     H5Sclose(attribute_space);
00192     H5Aclose(attribute_id);
00193     mIsDataComplete = (is_data_complete == 1) ? true : false;
00194 
00195     if (is_data_complete)
00196     {
00197         return;
00198     }
00199 
00200     // Incomplete data
00201     // Read the vector thing
00202     attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
00203     attribute_type  = H5Aget_type(attribute_id);
00204     attribute_space = H5Aget_space(attribute_id);
00205 
00206     // Get the dataset/dataspace dimensions
00207     unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
00208 
00209     // Read data from hyperslab in the file into the hyperslab in memory
00210     mIncompleteNodeIndices.clear();
00211     mIncompleteNodeIndices.resize(num_node_indices);
00212     H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
00213 
00214     H5Tclose(attribute_type);
00215     H5Sclose(attribute_space);
00216     H5Aclose(attribute_id);
00217 }
00218 
00219 std::vector<double> Hdf5DataReader::GetVariableOverTime(const std::string& rVariableName,
00220                                                         unsigned nodeIndex)
00221 {
00222     if (!mIsUnlimitedDimensionSet)
00223     {
00224         EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
00225     }
00226 
00227     unsigned actual_node_index = nodeIndex;
00228     if (!mIsDataComplete)
00229     {
00230         unsigned node_index = 0;
00231         for (node_index=0; node_index<mIncompleteNodeIndices.size(); node_index++)
00232         {
00233             if (mIncompleteNodeIndices[node_index]==nodeIndex)
00234             {
00235                 actual_node_index = node_index;
00236                 break;
00237             }
00238         }
00239         if ( node_index == mIncompleteNodeIndices.size())
00240         {
00241             EXCEPTION("The incomplete dataset '" << mDatasetName << "' does not contain info of node " << nodeIndex);
00242         }
00243     }
00244     if (actual_node_index >= mDatasetDims[1])
00245     {
00246         EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain info of node " << actual_node_index);
00247     }
00248 
00249     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00250     if (col_iter == mVariableToColumnIndex.end())
00251     {
00252         EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain data for variable " << rVariableName);
00253     }
00254     unsigned column_index = (*col_iter).second;
00255 
00256     // Define hyperslab in the dataset.
00257     hsize_t offset[3] = {0, actual_node_index, column_index};
00258     hsize_t count[3]  = {mDatasetDims[0], 1, 1};
00259     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00260     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00261 
00262     // Define a simple memory dataspace
00263     hid_t memspace = H5Screate_simple(1, &mDatasetDims[0] ,NULL);
00264 
00265     // Data buffer to return
00266     std::vector<double> ret(mDatasetDims[0]);
00267 
00268     // Read data from hyperslab in the file into the hyperslab in memory
00269     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, &ret[0]);
00270 
00271     H5Sclose(variables_dataspace);
00272     H5Sclose(memspace);
00273 
00274     return ret;
00275 }
00276 
00277 std::vector<std::vector<double> > Hdf5DataReader::GetVariableOverTimeOverMultipleNodes(const std::string& rVariableName,
00278                                                                                        unsigned lowerIndex,
00279                                                                                        unsigned upperIndex)
00280 {
00281     if (!mIsUnlimitedDimensionSet)
00282     {
00283         EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
00284     }
00285 
00286     if (!mIsDataComplete)
00287     {
00288         EXCEPTION("GetVariableOverTimeOverMultipleNodes() cannot be called using incomplete data sets (those for which data was only written for certain nodes)");
00289     }
00290 
00291     if (upperIndex > mDatasetDims[1])
00292     {
00293        EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain info for node " << upperIndex-1);
00294     }
00295 
00296     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00297     if (col_iter == mVariableToColumnIndex.end())
00298     {
00299         EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain data for variable " << rVariableName);
00300     }
00301     unsigned column_index = (*col_iter).second;
00302 
00303     // Define hyperslab in the dataset.
00304     hsize_t offset[3] = {0, lowerIndex, column_index};
00305     hsize_t count[3]  = {mDatasetDims[0], upperIndex-lowerIndex, 1};
00306     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00307     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00308 
00309     // Define a simple memory dataspace
00310     hsize_t data_dimensions[2];
00311     data_dimensions[0] = mDatasetDims[0];
00312     data_dimensions[1] = upperIndex-lowerIndex;
00313     hid_t memspace = H5Screate_simple(2, data_dimensions, NULL);
00314 
00315     double* data_read = new double[mDatasetDims[0]*(upperIndex-lowerIndex)];
00316 
00317     // Read data from hyperslab in the file into the hyperslab in memory
00318     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, data_read);
00319 
00320     H5Sclose(variables_dataspace);
00321     H5Sclose(memspace);
00322 
00323     // Data buffer to return
00324     unsigned num_nodes_read = upperIndex-lowerIndex;
00325     unsigned num_timesteps = mDatasetDims[0];
00326 
00327     std::vector<std::vector<double> > ret(num_nodes_read);
00328 
00329     for (unsigned node_num=0; node_num<num_nodes_read; node_num++)
00330     {
00331         ret[node_num].resize(num_timesteps);
00332         for (unsigned time_num=0; time_num<num_timesteps; time_num++)
00333         {
00334             ret[node_num][time_num] = data_read[num_nodes_read*time_num + node_num];
00335         }
00336     }
00337 
00338     delete[] data_read;
00339 
00340     return ret;
00341 }
00342 
00343 void Hdf5DataReader::GetVariableOverNodes(Vec data,
00344                                           const std::string& rVariableName,
00345                                           unsigned timestep)
00346 {
00347     if (!mIsDataComplete)
00348     {
00349         EXCEPTION("You can only get a vector for complete data");
00350     }
00351     if (!mIsUnlimitedDimensionSet && timestep!=0)
00352     {
00353         EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
00354     }
00355 
00356     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00357     if (col_iter == mVariableToColumnIndex.end())
00358     {
00359         EXCEPTION("The dataset '" << mDatasetName << "' does not contain data for variable " << rVariableName);
00360     }
00361     unsigned column_index = (*col_iter).second;
00362 
00363     // Check for valid timestep
00364     if (timestep >= mNumberTimesteps)
00365     {
00366         EXCEPTION("The dataset '" << mDatasetName << "' does not contain data for timestep number " << timestep);
00367     }
00368 
00369     int lo, hi, size;
00370     VecGetSize(data, &size);
00371     if ((unsigned)size != mDatasetDims[1])
00372     {
00373         EXCEPTION("Could not read data because Vec is the wrong size");
00374     }
00375     // Get range owned by each processor
00376     VecGetOwnershipRange(data, &lo, &hi);
00377 
00378     if (hi > lo) // i.e. we own some...
00379     {
00380         // Define a dataset in memory for this process
00381         hsize_t v_size[1] = {(unsigned)(hi-lo)};
00382         hid_t memspace = H5Screate_simple(1, v_size, NULL);
00383 
00384         // Select hyperslab in the file.
00385         hsize_t offset[3] = {timestep, (unsigned)(lo), column_index};
00386         hsize_t count[3]  = {1, (unsigned)(hi-lo), 1};
00387         hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
00388         H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, NULL, count, NULL);
00389 
00390         double* p_petsc_vector;
00391         VecGetArray(data, &p_petsc_vector);
00392 
00393         herr_t err = H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, p_petsc_vector);
00394         UNUSED_OPT(err);
00395         assert(err==0);
00396 
00397         VecRestoreArray(data, &p_petsc_vector);
00398 
00399         H5Sclose(hyperslab_space);
00400         H5Sclose(memspace);
00401     }
00402 }
00403 
00404 std::vector<double> Hdf5DataReader::GetUnlimitedDimensionValues()
00405 {
00406     // Data buffer to return
00407     std::vector<double> ret(mNumberTimesteps);
00408 
00409     if (!mIsUnlimitedDimensionSet)
00410     {
00411         // Fake it
00412         assert(mNumberTimesteps==1);
00413         ret[0] = 0.0;
00414         return ret;
00415     }
00416 
00417     // Read data from hyperslab in the file into memory
00418     H5Dread(mUnlimitedDatasetId, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ret[0]);
00419 
00420     return ret;
00421 }
00422 
00423 void Hdf5DataReader::Close()
00424 {
00425     if (!mClosed)
00426     {
00427         H5Dclose(mVariablesDatasetId);
00428         if (mIsUnlimitedDimensionSet)
00429         {
00430             H5Dclose(mUnlimitedDatasetId);
00431         }
00432         H5Fclose(mFileId);
00433         mClosed = true;
00434     }
00435 }
00436 
00437 Hdf5DataReader::~Hdf5DataReader()
00438 {
00439     Close();
00440 }
00441 
00442 unsigned Hdf5DataReader::GetNumberOfRows()
00443 {
00444     return mDatasetDims[1];
00445 }
00446 
00447 std::vector<std::string> Hdf5DataReader::GetVariableNames()
00448 {
00449     return mVariableNames;
00450 }
00451 
00452 std::string Hdf5DataReader::GetUnit(const std::string& rVariableName)
00453 {
00454     return mVariableToUnit[rVariableName];
00455 }
00456 
00457 

Generated by  doxygen 1.6.2