ColumnDataReader.cpp

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (c) 2005-2015, University of Oxford.
00004 All rights reserved.
00005 
00006 University of Oxford means the Chancellor, Masters and Scholars of the
00007 University of Oxford, having an administrative office at Wellington
00008 Square, Oxford OX1 2JD, UK.
00009 
00010 This file is part of Chaste.
00011 
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014  * Redistributions of source code must retain the above copyright notice,
00015    this list of conditions and the following disclaimer.
00016  * Redistributions in binary form must reproduce the above copyright notice,
00017    this list of conditions and the following disclaimer in the documentation
00018    and/or other materials provided with the distribution.
00019  * Neither the name of the University of Oxford nor the names of its
00020    contributors may be used to endorse or promote products derived from this
00021    software without specific prior written permission.
00022 
00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 
00034 */
00035 
00042 #include "ColumnDataReader.hpp"
00043 #include "ColumnDataConstants.hpp"
00044 
00045 #include <fstream>
00046 #include <sstream>
00047 #include <iomanip>
00048 #include <cassert>
00049 #include <climits>
00050 #include <cctype> //for isdigit
00051 #include "OutputFileHandler.hpp"
00052 #include "Exception.hpp"
00053 
00058 const int NOT_READ = INT_UNSET;
00059 
00060 ColumnDataReader::ColumnDataReader(const std::string& rDirectory,
00061                                    const std::string& rBaseName,
00062                                    bool makeAbsolute)
00063 {
00064     // Find out where files are really stored
00065     std::string directory;
00066     if (makeAbsolute)
00067     {
00068         OutputFileHandler output_file_handler(rDirectory, false);
00069         directory = output_file_handler.GetOutputDirectoryFullPath();
00070     }
00071     else
00072     {
00073         // Add a trailing slash if needed
00074         if ( !(*(rDirectory.end()-1) == '/'))
00075         {
00076             directory = rDirectory + "/";
00077         }
00078         else
00079         {
00080             directory = rDirectory;
00081         }
00082     }
00083     CheckFiles(directory, rBaseName);
00084 }
00085 
00086 ColumnDataReader::ColumnDataReader(const FileFinder& rDirectory,
00087                                    const std::string& rBaseName)
00088 {
00089     if (!rDirectory.IsDir() || !rDirectory.Exists())
00090     {
00091         EXCEPTION("Directory does not exist: " + rDirectory.GetAbsolutePath());
00092     }
00093     CheckFiles(rDirectory.GetAbsolutePath(), rBaseName);
00094 }
00095 
00096 void ColumnDataReader::CheckFiles(const std::string& rDirectory, const std::string& rBaseName)
00097 {
00098     // Read in info file
00099     mInfoFilename = rDirectory + rBaseName + ".info";
00100     std::ifstream infofile(mInfoFilename.c_str(), std::ios::in);
00101 
00102     // If it doesn't exist - throw exception
00103     if (!infofile.is_open())
00104     {
00105         EXCEPTION("Couldn't open info file: " + mInfoFilename);
00106     }
00107     std::string junk;
00108     mNumFixedDimensions = NOT_READ;
00109     mHasUnlimitedDimension = false;
00110     mNumVariables = NOT_READ;
00111 
00112     infofile >> junk;
00113     infofile >> mNumFixedDimensions >> junk;
00114     infofile >> mHasUnlimitedDimension >> junk;
00115     infofile >> mNumVariables;
00116 
00117     if (mNumFixedDimensions == NOT_READ || mNumVariables == NOT_READ)
00118     {
00119         infofile.close();
00120         EXCEPTION("Couldn't read info file correctly");
00121     }
00122 
00123     // Read in variables and associated them with a column number
00124     if (mHasUnlimitedDimension)
00125     {
00126         if (mNumFixedDimensions < 1)
00127         {
00128             mDataFilename = rDirectory + rBaseName + ".dat";
00129         }
00130         else
00131         {
00132             std::stringstream suffix;
00133             suffix << std::setfill('0') << std::setw(FILE_SUFFIX_WIDTH) << 0;
00134 
00135             mDataFilename = rDirectory + rBaseName + "_" + suffix.str() + ".dat";
00136 
00137             /*
00138              * The ancillary path needs to come from a single place that is
00139              * used by both the reader & writer, otherwise all will be bad.
00140              */
00141             mAncillaryFilename = rDirectory + rBaseName + "_unlimited.dat";
00142 
00143             // Extract the units and place into map
00144             std::ifstream ancillaryfile(mAncillaryFilename.c_str(), std::ios::in);
00145 
00146             // If it doesn't exist - throw exception
00147             if (!ancillaryfile.is_open())
00148             {
00149                 EXCEPTION("Couldn't open ancillary data file");
00150             }
00151             std::string dimension;
00152             std::getline(ancillaryfile, dimension);
00153             std::stringstream dimension_stream(dimension);
00154             std::string dimension_unit, dimension_name, header;
00155             dimension_stream >> header;
00156 
00157             // Separate into variable name and units
00158             int unitpos = header.find("(") + 1;
00159 
00160             dimension_name = header.substr(0, unitpos - 1);
00161             dimension_unit = header.substr(unitpos, header.length() - unitpos - 1);
00162 
00163             mVariablesToUnits[dimension_name] = dimension_unit;
00164             ancillaryfile.close();
00165         }
00166     }
00167     else
00168     {
00169         mDataFilename = rDirectory + rBaseName + ".dat";
00170     }
00171 
00172     std::ifstream datafile(mDataFilename.c_str(), std::ios::in);
00173     // If it doesn't exist - throw exception
00174     if (!datafile.is_open())
00175     {
00176         EXCEPTION("Couldn't open data file");
00177     }
00178 
00179     std::string variables;
00180     std::getline(datafile, variables);
00181     std::stringstream variable_stream(variables);
00182     std::string header, variable, unit;
00183     int column = 0;
00184 
00185     // Insert variables into map
00186     while (variable_stream >> header)
00187     {
00188         // Separate into variable name and units
00189         int unitpos = header.find("(") + 1;
00190 
00191         variable = header.substr(0, unitpos - 1);
00192         unit = header.substr(unitpos, header.length() - unitpos - 1);
00193 
00194         mVariablesToColumns[variable] = column;
00195         mVariablesToUnits[variable] = unit;
00196 
00197         column++;
00198     }
00199 
00200     /*
00201      * Now read the first line of proper data to determine the field width used when this
00202      * file was created. Do this by
00203      * 1. reading the first entry and measuring the distance from
00204      * the decimal point to the 'e'.  This gives the precision; the field width is then
00205      * precision + 7 (With MSVC on Windows, it's precision + 8).
00206      * e.g. if the first entry is
00207      *   6.3124e+01         => field width = 11 // chaste release 1 and 1.1
00208      *  -3.5124e+01         => field width = 11 // chaste release 1 and 1.1
00209      *  +1.00000000e+00     => field width = 15
00210      *  -1.20000000e+01     => field width = 15
00211      *  -1.12345678e-321    => field width = 15
00212      * 2. Because the first column has a varying number of spaces read a few columns and
00213      *    do some modular arithmetic to work out the correct width
00214      */
00215     std::string first_line;
00216     std::string first_entry;
00217     unsigned last_pos=0u;
00218     // Read the first entry of the line. If there is no first entry, move to the next line..
00219     while (first_entry.length()==0 && !datafile.eof())
00220     {
00221         std::getline(datafile, first_line);
00222         std::stringstream stream(first_line);
00223         stream >> first_entry;
00224         last_pos = stream.tellg(); // Where the first number ends (but it might be in the column 2 or 3)
00225         while (stream.good() && last_pos <170) //Avoid reading more than about 10 columns, because we want to avoid last_pos being divisible by too many factors
00226         {
00227             std::string last_entry;
00228             stream >> last_entry;
00229             if (stream.tellg() > 0)
00230             {
00231                 last_pos = stream.tellg();
00232             }
00233         }
00234     }
00235 
00236     if (datafile.eof() && first_entry.length()==0)
00237     {
00238         EXCEPTION("Unable to determine field width from file as cannot find any data entries");
00239     }
00240     assert (last_pos > 0u);
00241 
00242     size_t dot_pos = first_entry.find(".");
00243     size_t e_pos = first_entry.find("e");
00244     if (dot_pos == std::string::npos || e_pos == std::string::npos)
00245     {
00246         EXCEPTION("Badly formatted scientific data field");
00247     }
00248 
00249     unsigned est_field_width = e_pos - dot_pos - 1 + 8; // = Precision + 8
00250 
00251     if (last_pos % est_field_width == 0)
00252     {
00253         mFieldWidth = est_field_width;
00254     }
00255     else
00256     {
00257         assert ( last_pos % (est_field_width+1) == 0  || (last_pos+1) % (est_field_width+1) == 0 );
00258         mFieldWidth = est_field_width+1;
00259     }
00260     infofile.close();
00261     datafile.close();
00262 }
00263 
00264 std::vector<double> ColumnDataReader::GetValues(const std::string& rVariableName)
00265 {
00266     if (mNumFixedDimensions > 0)
00267     {
00268         EXCEPTION("Data file has fixed dimension which must be specified");
00269     }
00270 
00271     std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
00272     if (col == mVariablesToColumns.end())
00273     {
00274         std::stringstream variable_name;
00275         variable_name << rVariableName;
00276         EXCEPTION("'" + variable_name.str() + "' is an unknown variable.");
00277     }
00278 
00279     int column = (*col).second;
00280     ReadColumnFromFile(mDataFilename, column);
00281 
00282     return mValues;
00283 }
00284 
00285 std::vector<double> ColumnDataReader::GetValues(const std::string& rVariableName,
00286                                                 int fixedDimension)
00287 {
00288     if (mNumFixedDimensions < 1)
00289     {
00290         EXCEPTION("Data file has no fixed dimension");
00291     }
00292 
00293     mValues.clear();
00294     if (mHasUnlimitedDimension)
00295     {
00296         std::string datafile = mDataFilename;
00297         std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
00298         if (col == mVariablesToColumns.end())
00299         {
00300             EXCEPTION("Unknown variable");
00301         }
00302         int column = (*col).second;
00303 
00304         int counter = 1;
00305         while (true)
00306         {
00307             try
00308             {
00309                 ReadValueFromFile(datafile, column, fixedDimension);
00310             }
00311             catch (Exception)
00312             {
00313                 break;
00314             }
00315 
00316             // Advance counter
00317             std::string::size_type underscore_pos = datafile.rfind("_", datafile.length());
00318             std::stringstream suffix;
00319 
00320             suffix << std::setfill('0') << std::setw(FILE_SUFFIX_WIDTH) << counter;
00321 
00322             if (underscore_pos != std::string::npos)
00323             {
00324                 datafile = datafile.substr(0, underscore_pos+1) + suffix.str() + ".dat";
00325             }
00326             counter++;
00327         }
00328     }
00329     else
00330     {
00331         int column = mVariablesToColumns[rVariableName];
00332         if (0 == column)
00333         {
00334             EXCEPTION("Unknown variable");
00335         }
00336         ReadValueFromFile(mDataFilename, column, fixedDimension);
00337     }
00338 
00339     return mValues;
00340 }
00341 
00342 std::vector<double> ColumnDataReader::GetUnlimitedDimensionValues()
00343 {
00344     mValues.clear();
00345     if (!mHasUnlimitedDimension)
00346     {
00347         EXCEPTION("Data file has no unlimited dimension");
00348     }
00349     if (mNumFixedDimensions > 0)
00350     {
00351         // Read in from the ancillary file
00352         ReadColumnFromFile(mAncillaryFilename, 0);
00353     }
00354     else
00355     {
00356         // Read the first column
00357         ReadColumnFromFile(mDataFilename, 0);
00358     }
00359     return mValues;
00360 }
00361 
00362 void ColumnDataReader::ReadValueFromFile(const std::string& rFilename, int col, int row)
00363 {
00364     std::ifstream datafile(rFilename.c_str(), std::ios::in);
00365     // If it doesn't exist - throw exception
00366     if (!datafile.is_open())
00367     {
00368         EXCEPTION("Couldn't open data file");
00369     }
00370     std::string variable_values;
00371     for (int i=0; i<row+1; i++)
00372     {
00373         std::getline(datafile, variable_values);
00374     }
00375 
00376     std::getline(datafile, variable_values);
00377     this->PushColumnEntryFromLine(variable_values, col);
00378 
00379     datafile.close();
00380 }
00381 
00382 void ColumnDataReader::ReadColumnFromFile(const std::string& rFilename, int col)
00383 {
00384     // Empty the values vector
00385     mValues.clear();
00386 
00387     // Read in from the ancillary file
00388     std::ifstream datafile(rFilename.c_str(), std::ios::in);
00389     std::string value;
00390 
00391     // We should have already checked that this file can be opened.
00392     assert(datafile.is_open());
00393 
00394     // The current variable becomes true just after reading the last line
00395     bool end_of_file_reached = false;
00396 
00397     // Skip header line
00398     end_of_file_reached = std::getline(datafile, value).eof();
00399 
00400     while (!end_of_file_reached)
00401     {
00402         end_of_file_reached = std::getline(datafile, value).eof();
00403         this->PushColumnEntryFromLine(value, col);
00404     }
00405     datafile.close();
00406 }
00407 
00408 void ColumnDataReader::PushColumnEntryFromLine(const std::string& rLine, int col)
00409 {
00410     std::string value;
00411     unsigned startpos = col * mFieldWidth;
00412     value = rLine.substr(startpos, mFieldWidth);
00413 
00414     std::stringstream variable_stream(value);
00415     double d_value;
00416     variable_stream >> d_value;
00417     if (variable_stream.fail())
00418     {
00419         if (variable_stream.eof()) //Missing data from column
00420         {
00421             d_value = DBL_MAX;
00422         }
00423         else
00424         {
00425 #define COVERAGE_IGNORE
00426            //  Clang Objective C++ (on Mac OSX) treats reading very small numbers (<2e-308) as an error but other compilers just round to zero
00427            d_value = 0.0;
00428 #undef COVERAGE_IGNORE
00429         }
00430     }
00431     mValues.push_back(d_value);
00432 }
00433 
00434 bool ColumnDataReader::HasValues(const std::string& rVariableName)
00435 {
00436     std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
00437     return !(col == mVariablesToColumns.end());
00438 }
00439 
00440 unsigned ColumnDataReader::GetFieldWidth()
00441 {
00442     return mFieldWidth;
00443 }

Generated by  doxygen 1.6.2