Chaste Commit::baa90ac2819b962188b7562f2326be23c47859a7
ColumnDataReader.cpp
Go to the documentation of this file.
1/*
2
3Copyright (c) 2005-2024, University of Oxford.
4All rights reserved.
5
6University of Oxford means the Chancellor, Masters and Scholars of the
7University of Oxford, having an administrative office at Wellington
8Square, Oxford OX1 2JD, UK.
9
10This file is part of Chaste.
11
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19 * Neither the name of the University of Oxford nor the names of its
20 contributors may be used to endorse or promote products derived from this
21 software without specific prior written permission.
22
23THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34*/
35
42#include "ColumnDataReader.hpp"
44
45#include <fstream>
46#include <sstream>
47#include <iomanip>
48#include <cassert>
49#include <climits>
50#include <cctype> //for isdigit
51#include "OutputFileHandler.hpp"
52#include "Exception.hpp"
53
58const int NOT_READ = INT_UNSET;
59
60ColumnDataReader::ColumnDataReader(const std::string& rDirectory,
61 const std::string& rBaseName,
62 bool makeAbsolute)
63{
64 // Find out where files are really stored
65 std::string directory;
66 if (makeAbsolute)
67 {
68 OutputFileHandler output_file_handler(rDirectory, false);
69 directory = output_file_handler.GetOutputDirectoryFullPath();
70 }
71 else
72 {
73 // Add a trailing slash if needed
74 if (!(*(rDirectory.end()-1) == '/'))
75 {
76 directory = rDirectory + "/";
77 }
78 else
79 {
80 directory = rDirectory;
81 }
82 }
83 CheckFiles(directory, rBaseName);
84}
85
87 const std::string& rBaseName)
88{
89 if (!rDirectory.IsDir() || !rDirectory.Exists())
90 {
91 EXCEPTION("Directory does not exist: " + rDirectory.GetAbsolutePath());
92 }
93 CheckFiles(rDirectory.GetAbsolutePath(), rBaseName);
94}
95
96void ColumnDataReader::CheckFiles(const std::string& rDirectory, const std::string& rBaseName)
97{
98 // Read in info file
99 mInfoFilename = rDirectory + rBaseName + ".info";
100 std::ifstream infofile(mInfoFilename.c_str(), std::ios::in);
101
102 // If it doesn't exist - throw exception
103 if (!infofile.is_open())
104 {
105 EXCEPTION("Couldn't open info file: " + mInfoFilename);
106 }
107 std::string junk;
111
112 infofile >> junk;
113 infofile >> mNumFixedDimensions >> junk;
114 infofile >> mHasUnlimitedDimension >> junk;
115 infofile >> mNumVariables;
116
118 {
119 infofile.close();
120 EXCEPTION("Couldn't read info file correctly");
121 }
122
123 // Read in variables and associated them with a column number
125 {
126 if (mNumFixedDimensions < 1)
127 {
128 mDataFilename = rDirectory + rBaseName + ".dat";
129 }
130 else
131 {
132 std::stringstream suffix;
133 suffix << std::setfill('0') << std::setw(FILE_SUFFIX_WIDTH) << 0;
134
135 mDataFilename = rDirectory + rBaseName + "_" + suffix.str() + ".dat";
136
137 /*
138 * The ancillary path needs to come from a single place that is
139 * used by both the reader & writer, otherwise all will be bad.
140 */
141 mAncillaryFilename = rDirectory + rBaseName + "_unlimited.dat";
142
143 // Extract the units and place into map
144 std::ifstream ancillaryfile(mAncillaryFilename.c_str(), std::ios::in);
145
146 // If it doesn't exist - throw exception
147 if (!ancillaryfile.is_open())
148 {
149 EXCEPTION("Couldn't open ancillary data file");
150 }
151 std::string dimension;
152 std::getline(ancillaryfile, dimension);
153 std::stringstream dimension_stream(dimension);
154 std::string dimension_unit, dimension_name, header;
155 dimension_stream >> header;
156
157 // Separate into variable name and units
158 int unitpos = header.find("(") + 1;
159
160 dimension_name = header.substr(0, unitpos - 1);
161 dimension_unit = header.substr(unitpos, header.length() - unitpos - 1);
162
163 mVariablesToUnits[dimension_name] = dimension_unit;
164 ancillaryfile.close();
165 }
166 }
167 else
168 {
169 mDataFilename = rDirectory + rBaseName + ".dat";
170 }
171
172 std::ifstream datafile(mDataFilename.c_str(), std::ios::in);
173 // If it doesn't exist - throw exception
174 if (!datafile.is_open())
175 {
176 EXCEPTION("Couldn't open data file");
177 }
178
179 std::string variables;
180 std::getline(datafile, variables);
181 std::stringstream variable_stream(variables);
182 std::string header, variable, unit;
183 int column = 0;
184
185 // Insert variables into map
186 while (variable_stream >> header)
187 {
188 // Separate into variable name and units
189 int unitpos = header.find("(") + 1;
190
191 variable = header.substr(0, unitpos - 1);
192 unit = header.substr(unitpos, header.length() - unitpos - 1);
193
194 mVariablesToColumns[variable] = column;
195 mVariablesToUnits[variable] = unit;
196
197 column++;
198 }
199
200 /*
201 * Now read the first line of proper data to determine the field width used when this
202 * file was created. Do this by
203 * 1. reading the first entry and measuring the distance from
204 * the decimal point to the 'e'. This gives the precision; the field width is then
205 * precision + 7 (With MSVC on Windows, it's precision + 8).
206 * e.g. if the first entry is
207 * 6.3124e+01 => field width = 11 // chaste release 1 and 1.1
208 * -3.5124e+01 => field width = 11 // chaste release 1 and 1.1
209 * +1.00000000e+00 => field width = 15
210 * -1.20000000e+01 => field width = 15
211 * -1.12345678e-321 => field width = 15
212 * 2. Because the first column has a varying number of spaces read a few columns and
213 * do some modular arithmetic to work out the correct width
214 */
215 std::string first_line;
216 std::string first_entry;
217 unsigned last_pos=0u;
218 // Read the first entry of the line. If there is no first entry, move to the next line..
219 while (first_entry.length()==0 && !datafile.eof())
220 {
221 std::getline(datafile, first_line);
222 std::stringstream stream(first_line);
223 stream >> first_entry;
224 last_pos = stream.tellg(); // Where the first number ends (but it might be in the column 2 or 3)
225 while (stream.good() && last_pos <170) //Avoid reading more than about 10 columns, because we want to avoid last_pos being divisible by too many factors
226 {
227 std::string last_entry;
228 stream >> last_entry;
229 if (stream.tellg() > 0)
230 {
231 last_pos = stream.tellg();
232 }
233 }
234 }
235
236 if (datafile.eof() && first_entry.length()==0)
237 {
238 EXCEPTION("Unable to determine field width from file as cannot find any data entries");
239 }
240 assert (last_pos > 0u);
241
242 size_t dot_pos = first_entry.find(".");
243 size_t e_pos = first_entry.find("e");
244 if (dot_pos == std::string::npos || e_pos == std::string::npos)
245 {
246 EXCEPTION("Badly formatted scientific data field");
247 }
248
249 unsigned est_field_width = e_pos - dot_pos - 1 + 8; // = Precision + 8
250
251 if (last_pos % est_field_width == 0)
252 {
253 mFieldWidth = est_field_width;
254 }
255 else
256 {
257 assert ( last_pos % (est_field_width+1) == 0 || (last_pos+1) % (est_field_width+1) == 0 );
258 mFieldWidth = est_field_width+1;
259 }
260 infofile.close();
261 datafile.close();
262}
263
264std::vector<double> ColumnDataReader::GetValues(const std::string& rVariableName)
265{
266 if (mNumFixedDimensions > 0)
267 {
268 EXCEPTION("Data file has fixed dimension which must be specified");
269 }
270
271 std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
272 if (col == mVariablesToColumns.end())
273 {
274 std::stringstream variable_name;
275 variable_name << rVariableName;
276 EXCEPTION("'" + variable_name.str() + "' is an unknown variable.");
277 }
278
279 int column = (*col).second;
281
282 return mValues;
283}
284
285std::vector<double> ColumnDataReader::GetValues(const std::string& rVariableName,
286 int fixedDimension)
287{
288 if (mNumFixedDimensions < 1)
289 {
290 EXCEPTION("Data file has no fixed dimension");
291 }
292
293 mValues.clear();
295 {
296 std::string datafile = mDataFilename;
297 std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
298 if (col == mVariablesToColumns.end())
299 {
300 EXCEPTION("Unknown variable");
301 }
302 int column = (*col).second;
303
304 int counter = 1;
305 while (true)
306 {
307 try
308 {
309 ReadValueFromFile(datafile, column, fixedDimension);
310 }
311 catch (const Exception&)
312 {
313 break;
314 }
315
316 // Advance counter
317 std::string::size_type underscore_pos = datafile.rfind("_", datafile.length());
318 std::stringstream suffix;
319
320 suffix << std::setfill('0') << std::setw(FILE_SUFFIX_WIDTH) << counter;
321
322 if (underscore_pos != std::string::npos)
323 {
324 datafile = datafile.substr(0, underscore_pos+1) + suffix.str() + ".dat";
325 }
326 counter++;
327 }
328 }
329 else
330 {
331 int column = mVariablesToColumns[rVariableName];
332 if (0 == column)
333 {
334 EXCEPTION("Unknown variable");
335 }
336 ReadValueFromFile(mDataFilename, column, fixedDimension);
337 }
338
339 return mValues;
340}
341
343{
344 mValues.clear();
346 {
347 EXCEPTION("Data file has no unlimited dimension");
348 }
349 if (mNumFixedDimensions > 0)
350 {
351 // Read in from the ancillary file
353 }
354 else
355 {
356 // Read the first column
358 }
359 return mValues;
360}
361
362void ColumnDataReader::ReadValueFromFile(const std::string& rFilename, int col, int row)
363{
364 std::ifstream datafile(rFilename.c_str(), std::ios::in);
365 // If it doesn't exist - throw exception
366 if (!datafile.is_open())
367 {
368 EXCEPTION("Couldn't open data file");
369 }
370 std::string variable_values;
371 for (int i=0; i<row+1; i++)
372 {
373 std::getline(datafile, variable_values);
374 }
375
376 std::getline(datafile, variable_values);
377 this->PushColumnEntryFromLine(variable_values, col);
378
379 datafile.close();
380}
381
382void ColumnDataReader::ReadColumnFromFile(const std::string& rFilename, int col)
383{
384 // Empty the values vector
385 mValues.clear();
386
387 // Read in from the ancillary file
388 std::ifstream datafile(rFilename.c_str(), std::ios::in);
389 std::string value;
390
391 // We should have already checked that this file can be opened.
392 assert(datafile.is_open());
393
394 // The current variable becomes true just after reading the last line
395 bool end_of_file_reached = false;
396
397 // Skip header line
398 end_of_file_reached = std::getline(datafile, value).eof();
399
400 while (!end_of_file_reached)
401 {
402 end_of_file_reached = std::getline(datafile, value).eof();
403 this->PushColumnEntryFromLine(value, col);
404 }
405 datafile.close();
406}
407
408void ColumnDataReader::PushColumnEntryFromLine(const std::string& rLine, int col)
409{
410 std::string value;
411 unsigned startpos = col * mFieldWidth;
412 value = rLine.substr(startpos, mFieldWidth);
413
414 std::stringstream variable_stream(value);
415 double d_value;
416 variable_stream >> d_value;
417 if (variable_stream.fail())
418 {
419 if (variable_stream.eof()) //Missing data from column
420 {
421 d_value = DBL_MAX;
422 }
423 else
424 {
425// LCOV_EXCL_START
426 // Clang Objective C++ (on Mac OSX) treats reading very small numbers (<2e-308) as an error but other compilers just round to zero
427 d_value = 0.0;
428// LCOV_EXCL_STOP
429 }
430 }
431 mValues.push_back(d_value);
432}
433
434bool ColumnDataReader::HasValues(const std::string& rVariableName)
435{
436 std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
437 return !(col == mVariablesToColumns.end());
438}
439
441{
442 return mFieldWidth;
443}
const int FILE_SUFFIX_WIDTH
const int NOT_READ
const int INT_UNSET
Definition Exception.hpp:55
#define EXCEPTION(message)
std::string mInfoFilename
std::string mAncillaryFilename
void ReadColumnFromFile(const std::string &rFilename, int col)
void CheckFiles(const std::string &rDirectory, const std::string &rBaseName)
bool HasValues(const std::string &rVariableName)
ColumnDataReader(const std::string &rDirectory, const std::string &rBaseName, bool makeAbsolute=true)
std::string mDataFilename
void ReadValueFromFile(const std::string &rFilename, int col, int row)
void PushColumnEntryFromLine(const std::string &rLine, int col)
std::map< std::string, std::string > mVariablesToUnits
std::map< std::string, int > mVariablesToColumns
std::vector< double > GetUnlimitedDimensionValues()
std::vector< double > GetValues(const std::string &rVariableName)
std::vector< double > mValues
std::string GetAbsolutePath() const
bool IsDir() const
bool Exists() const
std::string GetOutputDirectoryFullPath() const