Chaste Commit::a9c8bf7350f67d7cf086e6fe3cf5461521554546
XmlTools.cpp
1/*
2
3Copyright (c) 2005-2026, University of Oxford.
4All rights reserved.
5
6University of Oxford means the Chancellor, Masters and Scholars of the
7University of Oxford, having an administrative office at Wellington
8Square, Oxford OX1 2JD, UK.
9
10This file is part of Chaste.
11
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19 * Neither the name of the University of Oxford nor the names of its
20 contributors may be used to endorse or promote products derived from this
21 software without specific prior written permission.
22
23THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34*/
35
36#include "XmlTools.hpp"
37
38#include <iostream>
39
40#include <xercesc/dom/DOM.hpp>
41#include <xercesc/util/PlatformUtils.hpp>
42#include <xercesc/util/QName.hpp>
43#include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
44#include <xercesc/framework/Wrapper4InputSource.hpp>
45#include <xercesc/validators/common/Grammar.hpp>
46
47#include <xsd/cxx/xml/sax/std-input-source.hxx>
48#include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
49#include <xsd/cxx/tree/exceptions.hxx>
50
51#include "ChasteXsdVersion.hpp"
52#include "Exception.hpp"
53
54XSD_DOM_AUTO_PTR<xercesc::DOMDocument> XmlTools::ReadXmlFile(
55 const std::string& rFileName,
56 const ::xsd::cxx::tree::properties<char>& rProps,
57 bool validate)
58{
59 XSD_DOM_AUTO_PTR<xercesc::DOMDocument> p_doc;
60 try
61 {
62 // Initialise Xerces
63 xercesc::XMLPlatformUtils::Initialize();
64 // Set up an error handler
65 ::xsd::cxx::tree::error_handler<char> error_handler;
66 // Parse XML to DOM
67 p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps, validate);
68 // Any errors?
69 error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
70 }
71 catch (const ::xsd::cxx::tree::parsing<char>& e)
72 {
73 Finalize();
74 // Test for missing schema/xml file
75#if CHASTE_XSD_VERSION_AT_LEAST(3, 0, 0)
76 const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
77 const ::xsd::cxx::tree::error<char>& first_error = diags[0];
78#else
79 const ::xsd::cxx::tree::errors<char>& errors = e.errors();
80 const ::xsd::cxx::tree::error<char>& first_error = errors[0];
81#endif
82 if (first_error.line() == 0u)
83 {
84 std::cerr << first_error << std::endl;
85 EXCEPTION("Missing file parsing configuration file: " + rFileName);
86 }
87 else
88 {
89 std::cerr << e << std::endl;
90 EXCEPTION("XML parsing error in configuration file: " + rFileName);
91 }
92 }
93// LCOV_EXCL_START
94 catch (...)
95 { // This shouldn't happen, but just in case...
96 Finalize();
97 throw;
98 }
99// LCOV_EXCL_STOP
100 return p_doc;
101}
102
103
105{
106 xercesc::XMLPlatformUtils::Terminate();
107}
108
110{
111 // The init=true case will very rarely be used, but a parameter to the constructor is needed
112 // to stop some compilers complaining about an unused variable!
113 if (init)
114 {
115// LCOV_EXCL_START
116 xercesc::XMLPlatformUtils::Initialize();
117// LCOV_EXCL_STOP
118 }
119}
120
125
126XSD_DOM_AUTO_PTR<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
127 const std::string& rFileName,
128 ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
129 const ::xsd::cxx::tree::properties<char>& rProps,
130 bool validate)
131{
132 using namespace xercesc;
133 namespace xml = xsd::cxx::xml;
134
135 // Get an implementation of the Load-Store (LS) interface.
136 const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
137 DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
138
139#if _XERCES_VERSION >= 30000
140 // Xerces-C++ 3.0.0 and later.
141 XSD_DOM_AUTO_PTR<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
142 DOMConfiguration* p_conf(p_parser->getDomConfig());
143
144 // Discard comment nodes in the document.
145 p_conf->setParameter(XMLUni::fgDOMComments, false);
146
147 // Enable datatype normalization.
148 p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
149
150 // Do not create EntityReference nodes in the DOM tree. No
151 // EntityReference nodes will be created, only the nodes
152 // corresponding to their fully expanded substitution text
153 // will be created.
154 p_conf->setParameter(XMLUni::fgDOMEntities, false);
155
156 // Perform namespace processing.
157 p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
158
159 // Do not include ignorable whitespace in the DOM tree.
160 p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
161
162 // Enable validation.
163 if (validate)
164 {
165 p_conf->setParameter(XMLUni::fgDOMValidate, true);
166 p_conf->setParameter(XMLUni::fgXercesSchema, true);
167 p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
168 // Code taken from xsd/cxx/xml/dom/parsing-source.txx
169 if (!rProps.schema_location().empty())
170 {
171 xml::string locn(rProps.schema_location());
172 const void* p_locn(locn.c_str());
173 p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
174 const_cast<void*>(p_locn));
175 }
176 if (!rProps.no_namespace_schema_location().empty())
177 {
178 xml::string locn(rProps.no_namespace_schema_location());
179 const void* p_locn(locn.c_str());
180
181 p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
182 const_cast<void*>(p_locn));
183 }
184 }
185 else
186 {
187 // This branch is only used by projects
188// LCOV_EXCL_START
189 p_conf->setParameter(XMLUni::fgDOMValidate, false);
190 p_conf->setParameter(XMLUni::fgXercesSchema, false);
191 p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
192// LCOV_EXCL_STOP
193 }
194
195 // We will release the DOM document ourselves.
196 p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
197
198 // Set error handler.
199 xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
200 p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
201
202#else // _XERCES_VERSION < 30000
203 // Same as above but for Xerces-C++ 2 series.
204 XSD_DOM_AUTO_PTR<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
205
206 p_parser->setFeature(XMLUni::fgDOMComments, false);
207 p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
208 p_parser->setFeature(XMLUni::fgDOMEntities, false);
209 p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
210 p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
211 p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
212
213 // Code taken from xsd/cxx/xml/dom/parsing-source.txx
214 if (validate)
215 {
216 p_parser->setFeature(XMLUni::fgDOMValidation, true);
217 p_parser->setFeature(XMLUni::fgXercesSchema, true);
218 p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
219 if (!rProps.schema_location().empty())
220 {
221 xml::string locn(rProps.schema_location());
222 const void* p_locn(locn.c_str());
223 p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
224 const_cast<void*>(p_locn));
225 }
226
227 if (!rProps.no_namespace_schema_location().empty())
228 {
229 xml::string locn(rProps.no_namespace_schema_location());
230 const void* p_locn(locn.c_str());
231
232 p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
233 const_cast<void*>(p_locn));
234 }
235 }
236 else
237 {
238 // This branch is only used by projects
239// LCOV_EXCL_START
240 p_parser->setFeature(XMLUni::fgDOMValidation, false);
241 p_parser->setFeature(XMLUni::fgXercesSchema, false);
242 p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
243// LCOV_EXCL_STOP
244 }
245
246 xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
247 p_parser->setErrorHandler(&ehp);
248
249#endif // _XERCES_VERSION >= 30000
250
251 // Do the parse
252 XSD_DOM_AUTO_PTR<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
253
254 if (ehp.failed())
255 {
256 p_doc.reset();
257 }
258
259 return p_doc;
260}
261
262// LCOV_EXCL_START
263void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
264{
265 std::string prefix = X2C(pNode->getPrefix());
266 std::string name = X2C(pNode->getLocalName());
267 std::string nsuri = X2C(pNode->getNamespaceURI());
268 std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
269 if (showChildren)
270 {
271 for (xercesc::DOMNode* p_node = pNode->getFirstChild();
272 p_node != NULL;
273 p_node = p_node->getNextSibling())
274 {
275 std::cout << " child type " << p_node->getNodeType();
276 PrintNode("", p_node, false);
277 }
278 xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
279 if (p_attrs)
280 {
281 for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
282 {
283 xercesc::DOMNode* p_attr = p_attrs->item(i);
284 std::string value = X2C(p_attr->getNodeValue());
285 PrintNode(" attr (" + value + ")", p_attr, false);
286 }
287 }
288 }
289}
290// LCOV_EXCL_STOP
291
292xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
293 xercesc::DOMElement* pElement,
294 const XMLCh* pNamespace)
295{
296 using namespace xercesc;
297
298 //PrintNode("Renaming", pElement, true);
299 DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
300 std::vector<std::string> attr_values;
301 if (p_orig_attrs)
302 {
303 for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
304 {
305 DOMNode* p_attr = p_orig_attrs->item(i);
306 attr_values.push_back(X2C(p_attr->getNodeValue()));
307 }
308 }
309 DOMElement* p_new_elt = static_cast<DOMElement*>(
310 pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
311 //PrintNode(" to", p_new_elt, true);
312 // Fix attributes - some get broken by the rename!
313 if (p_orig_attrs)
314 {
315 DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
316 assert(p_new_attrs);
317 assert(p_new_attrs == p_orig_attrs);
318 assert(p_new_attrs->getLength() == attr_values.size());
319 for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
320 {
321 DOMNode* p_attr = p_new_attrs->item(i);
322 p_attr->setNodeValue(X(attr_values[i]));
323 }
324 }
325 //PrintNode(" after attr fix", p_new_elt, true);
326
327 std::vector<DOMElement*> children = GetChildElements(p_new_elt);
328 for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
329 {
330 SetNamespace(pDocument, *it, pNamespace);
331 }
332
333 return p_new_elt;
334}
335
336xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
337 xercesc::DOMElement* pElement,
338 const std::string& rNamespace)
339{
340 return SetNamespace(pDocument, pElement, X(rNamespace));
341}
342
343
344std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(const xercesc::DOMElement* pElement)
345{
346 std::vector<xercesc::DOMElement*> children;
347 for (xercesc::DOMNode* p_node = pElement->getFirstChild();
348 p_node != NULL;
349 p_node = p_node->getNextSibling())
350 {
351 if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
352 {
353 children.push_back(static_cast<xercesc::DOMElement*>(p_node));
354 }
355 }
356 return children;
357}
358
359
360void XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
361 const std::vector<std::string>& rNames,
362 std::vector<xercesc::DOMElement*>& rResults,
363 unsigned depth)
364{
365 for (xercesc::DOMNode* p_node = pContextElement->getFirstChild();
366 p_node != NULL;
367 p_node = p_node->getNextSibling())
368 {
369 if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE &&
370 X2C(p_node->getLocalName()) == rNames[depth])
371 {
372 xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_node);
373 if (depth == rNames.size() - 1)
374 {
375 rResults.push_back(p_child_elt);
376 }
377 else
378 {
379 FindElements(p_child_elt, rNames, rResults, depth+1);
380 }
381 }
382 }
383}
384
385std::vector<xercesc::DOMElement*> XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
386 const std::string& rPath)
387{
388 std::vector<xercesc::DOMElement*> results;
389 std::vector<std::string> path;
390 size_t start_pos = 0;
391 size_t slash_pos = 0;
392 while (slash_pos != std::string::npos)
393 {
394 slash_pos = rPath.find('/', start_pos);
395 if (slash_pos == std::string::npos)
396 {
397 path.push_back(rPath.substr(start_pos));
398 }
399 else
400 {
401 path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
402 }
403 start_pos = slash_pos + 1;
404 }
405 FindElements(pContextElement, path, results);
406 return results;
407}
408
409
410void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
411 xercesc::DOMElement* pElement,
412 const XMLCh* pNewElementLocalName)
413{
414 const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
415 const XMLCh* p_prefix = pElement->getPrefix();
416 const XMLCh* p_qualified_name;
417 if (p_prefix)
418 {
419// LCOV_EXCL_START
420 // We can't actually cover this code, since versions of the parameters file which need this
421 // transform didn't use a namespace, so can't have a namespace prefix!
422 xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
423 p_qualified_name = qname.getRawName();
424// LCOV_EXCL_STOP
425 }
426 else
427 {
428 p_qualified_name = pNewElementLocalName;
429 }
430 xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
431 // Move all child nodes of pElement to be children of p_wrapper_elt
432 xercesc::DOMNodeList* p_children = pElement->getChildNodes();
433 for (unsigned i=0; i<p_children->getLength(); i++)
434 {
435 xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
436 p_wrapper_elt->appendChild(p_child);
437 }
438 // Add the wrapper as the sole child of pElement
439 pElement->appendChild(p_wrapper_elt);
440}
441
442
443std::string XmlTools::EscapeSpaces(const std::string& rPath)
444{
445 std::string escaped_path;
446 for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
447 {
448 if (*it == ' ')
449 {
450 escaped_path += "%20";
451 }
452 else
453 {
454 escaped_path += *it;
455 }
456 }
457 return escaped_path;
458}
#define EXCEPTION(message)
Finalizer(bool init)
Definition XmlTools.cpp:109
static xercesc::DOMElement * SetNamespace(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const std::string &rNamespace)
Definition XmlTools.cpp:336
static std::string EscapeSpaces(const std::string &rPath)
Definition XmlTools.cpp:443
static XSD_DOM_AUTO_PTR< xercesc::DOMDocument > ReadFileToDomDocument(const std::string &rFileName, ::xsd::cxx::xml::error_handler< char > &rErrorHandler, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition XmlTools.cpp:126
static void PrintNode(const std::string &rMsg, xercesc::DOMNode *pNode, bool showChildren=false)
Definition XmlTools.cpp:263
static std::vector< xercesc::DOMElement * > FindElements(const xercesc::DOMElement *pContextElement, const std::string &rPath)
Definition XmlTools.cpp:385
static XSD_DOM_AUTO_PTR< xercesc::DOMDocument > ReadXmlFile(const std::string &rFileName, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition XmlTools.cpp:54
static std::vector< xercesc::DOMElement * > GetChildElements(const xercesc::DOMElement *pElement)
Definition XmlTools.cpp:344
static void WrapContentInElement(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const XMLCh *pNewElementLocalName)
Definition XmlTools.cpp:410
static void Finalize()
Definition XmlTools.cpp:104