Chaste Commit::baa90ac2819b962188b7562f2326be23c47859a7
XmlTools.cpp
1/*
2
3Copyright (c) 2005-2024, University of Oxford.
4All rights reserved.
5
6University of Oxford means the Chancellor, Masters and Scholars of the
7University of Oxford, having an administrative office at Wellington
8Square, Oxford OX1 2JD, UK.
9
10This file is part of Chaste.
11
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19 * Neither the name of the University of Oxford nor the names of its
20 contributors may be used to endorse or promote products derived from this
21 software without specific prior written permission.
22
23THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34*/
35
36#include "XmlTools.hpp"
37
38#include <iostream>
39
40#include <xercesc/dom/DOM.hpp>
41#include <xercesc/util/PlatformUtils.hpp>
42#include <xercesc/util/QName.hpp>
43#include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
44#include <xercesc/framework/Wrapper4InputSource.hpp>
45#include <xercesc/validators/common/Grammar.hpp>
46
47#include <xsd/cxx/xml/sax/std-input-source.hxx>
48#include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
49#include <xsd/cxx/tree/exceptions.hxx>
50
51#include "Exception.hpp"
52
53XSD_DOM_AUTO_PTR<xercesc::DOMDocument> XmlTools::ReadXmlFile(
54 const std::string& rFileName,
55 const ::xsd::cxx::tree::properties<char>& rProps,
56 bool validate)
57{
58 XSD_DOM_AUTO_PTR<xercesc::DOMDocument> p_doc;
59 try
60 {
61 // Initialise Xerces
62 xercesc::XMLPlatformUtils::Initialize();
63 // Set up an error handler
64 ::xsd::cxx::tree::error_handler<char> error_handler;
65 // Parse XML to DOM
66 p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps, validate);
67 // Any errors?
68 error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
69 }
70 catch (const ::xsd::cxx::tree::parsing<char>& e)
71 {
72 Finalize();
73 // Test for missing schema/xml file
74#if (XSD_INT_VERSION >= 3000000L)
75 const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
76 const ::xsd::cxx::tree::error<char>& first_error = diags[0];
77#else
78 const ::xsd::cxx::tree::errors<char>& errors = e.errors();
79 const ::xsd::cxx::tree::error<char>& first_error = errors[0];
80#endif
81 if (first_error.line() == 0u)
82 {
83 std::cerr << first_error << std::endl;
84 EXCEPTION("Missing file parsing configuration file: " + rFileName);
85 }
86 else
87 {
88 std::cerr << e << std::endl;
89 EXCEPTION("XML parsing error in configuration file: " + rFileName);
90 }
91 }
92// LCOV_EXCL_START
93 catch (...)
94 { // This shouldn't happen, but just in case...
95 Finalize();
96 throw;
97 }
98// LCOV_EXCL_STOP
99 return p_doc;
100}
101
102
104{
105 xercesc::XMLPlatformUtils::Terminate();
106}
107
109{
110 // The init=true case will very rarely be used, but a parameter to the constructor is needed
111 // to stop some compilers complaining about an unused variable!
112 if (init)
113 {
114// LCOV_EXCL_START
115 xercesc::XMLPlatformUtils::Initialize();
116// LCOV_EXCL_STOP
117 }
118}
119
124
125XSD_DOM_AUTO_PTR<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
126 const std::string& rFileName,
127 ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
128 const ::xsd::cxx::tree::properties<char>& rProps,
129 bool validate)
130{
131 using namespace xercesc;
132 namespace xml = xsd::cxx::xml;
133
134 // Get an implementation of the Load-Store (LS) interface.
135 const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
136 DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
137
138#if _XERCES_VERSION >= 30000
139 // Xerces-C++ 3.0.0 and later.
140 XSD_DOM_AUTO_PTR<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
141 DOMConfiguration* p_conf(p_parser->getDomConfig());
142
143 // Discard comment nodes in the document.
144 p_conf->setParameter(XMLUni::fgDOMComments, false);
145
146 // Enable datatype normalization.
147 p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
148
149 // Do not create EntityReference nodes in the DOM tree. No
150 // EntityReference nodes will be created, only the nodes
151 // corresponding to their fully expanded substitution text
152 // will be created.
153 p_conf->setParameter(XMLUni::fgDOMEntities, false);
154
155 // Perform namespace processing.
156 p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
157
158 // Do not include ignorable whitespace in the DOM tree.
159 p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
160
161 // Enable validation.
162 if (validate)
163 {
164 p_conf->setParameter(XMLUni::fgDOMValidate, true);
165 p_conf->setParameter(XMLUni::fgXercesSchema, true);
166 p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
167 // Code taken from xsd/cxx/xml/dom/parsing-source.txx
168 if (!rProps.schema_location().empty())
169 {
170 xml::string locn(rProps.schema_location());
171 const void* p_locn(locn.c_str());
172 p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
173 const_cast<void*>(p_locn));
174 }
175 if (!rProps.no_namespace_schema_location().empty())
176 {
177 xml::string locn(rProps.no_namespace_schema_location());
178 const void* p_locn(locn.c_str());
179
180 p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
181 const_cast<void*>(p_locn));
182 }
183 }
184 else
185 {
186 // This branch is only used by projects
187// LCOV_EXCL_START
188 p_conf->setParameter(XMLUni::fgDOMValidate, false);
189 p_conf->setParameter(XMLUni::fgXercesSchema, false);
190 p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
191// LCOV_EXCL_STOP
192 }
193
194 // We will release the DOM document ourselves.
195 p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
196
197 // Set error handler.
198 xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
199 p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
200
201#else // _XERCES_VERSION < 30000
202 // Same as above but for Xerces-C++ 2 series.
203 XSD_DOM_AUTO_PTR<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
204
205 p_parser->setFeature(XMLUni::fgDOMComments, false);
206 p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
207 p_parser->setFeature(XMLUni::fgDOMEntities, false);
208 p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
209 p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
210 p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
211
212 // Code taken from xsd/cxx/xml/dom/parsing-source.txx
213 if (validate)
214 {
215 p_parser->setFeature(XMLUni::fgDOMValidation, true);
216 p_parser->setFeature(XMLUni::fgXercesSchema, true);
217 p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
218 if (!rProps.schema_location().empty())
219 {
220 xml::string locn(rProps.schema_location());
221 const void* p_locn(locn.c_str());
222 p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
223 const_cast<void*>(p_locn));
224 }
225
226 if (!rProps.no_namespace_schema_location().empty())
227 {
228 xml::string locn(rProps.no_namespace_schema_location());
229 const void* p_locn(locn.c_str());
230
231 p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
232 const_cast<void*>(p_locn));
233 }
234 }
235 else
236 {
237 // This branch is only used by projects
238// LCOV_EXCL_START
239 p_parser->setFeature(XMLUni::fgDOMValidation, false);
240 p_parser->setFeature(XMLUni::fgXercesSchema, false);
241 p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
242// LCOV_EXCL_STOP
243 }
244
245 xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
246 p_parser->setErrorHandler(&ehp);
247
248#endif // _XERCES_VERSION >= 30000
249
250 // Do the parse
251 XSD_DOM_AUTO_PTR<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
252
253 if (ehp.failed())
254 {
255 p_doc.reset();
256 }
257
258 return p_doc;
259}
260
261// LCOV_EXCL_START
262void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
263{
264 std::string prefix = X2C(pNode->getPrefix());
265 std::string name = X2C(pNode->getLocalName());
266 std::string nsuri = X2C(pNode->getNamespaceURI());
267 std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
268 if (showChildren)
269 {
270 for (xercesc::DOMNode* p_node = pNode->getFirstChild();
271 p_node != NULL;
272 p_node = p_node->getNextSibling())
273 {
274 std::cout << " child type " << p_node->getNodeType();
275 PrintNode("", p_node, false);
276 }
277 xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
278 if (p_attrs)
279 {
280 for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
281 {
282 xercesc::DOMNode* p_attr = p_attrs->item(i);
283 std::string value = X2C(p_attr->getNodeValue());
284 PrintNode(" attr (" + value + ")", p_attr, false);
285 }
286 }
287 }
288}
289// LCOV_EXCL_STOP
290
291xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
292 xercesc::DOMElement* pElement,
293 const XMLCh* pNamespace)
294{
295 using namespace xercesc;
296
297 //PrintNode("Renaming", pElement, true);
298 DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
299 std::vector<std::string> attr_values;
300 if (p_orig_attrs)
301 {
302 for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
303 {
304 DOMNode* p_attr = p_orig_attrs->item(i);
305 attr_values.push_back(X2C(p_attr->getNodeValue()));
306 }
307 }
308 DOMElement* p_new_elt = static_cast<DOMElement*>(
309 pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
310 //PrintNode(" to", p_new_elt, true);
311 // Fix attributes - some get broken by the rename!
312 if (p_orig_attrs)
313 {
314 DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
315 assert(p_new_attrs);
316 assert(p_new_attrs == p_orig_attrs);
317 assert(p_new_attrs->getLength() == attr_values.size());
318 for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
319 {
320 DOMNode* p_attr = p_new_attrs->item(i);
321 p_attr->setNodeValue(X(attr_values[i]));
322 }
323 }
324 //PrintNode(" after attr fix", p_new_elt, true);
325
326 std::vector<DOMElement*> children = GetChildElements(p_new_elt);
327 for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
328 {
329 SetNamespace(pDocument, *it, pNamespace);
330 }
331
332 return p_new_elt;
333}
334
335xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
336 xercesc::DOMElement* pElement,
337 const std::string& rNamespace)
338{
339 return SetNamespace(pDocument, pElement, X(rNamespace));
340}
341
342
343std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(const xercesc::DOMElement* pElement)
344{
345 std::vector<xercesc::DOMElement*> children;
346 for (xercesc::DOMNode* p_node = pElement->getFirstChild();
347 p_node != NULL;
348 p_node = p_node->getNextSibling())
349 {
350 if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
351 {
352 children.push_back(static_cast<xercesc::DOMElement*>(p_node));
353 }
354 }
355 return children;
356}
357
358
359void XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
360 const std::vector<std::string>& rNames,
361 std::vector<xercesc::DOMElement*>& rResults,
362 unsigned depth)
363{
364 for (xercesc::DOMNode* p_node = pContextElement->getFirstChild();
365 p_node != NULL;
366 p_node = p_node->getNextSibling())
367 {
368 if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE &&
369 X2C(p_node->getLocalName()) == rNames[depth])
370 {
371 xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_node);
372 if (depth == rNames.size() - 1)
373 {
374 rResults.push_back(p_child_elt);
375 }
376 else
377 {
378 FindElements(p_child_elt, rNames, rResults, depth+1);
379 }
380 }
381 }
382}
383
384std::vector<xercesc::DOMElement*> XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
385 const std::string& rPath)
386{
387 std::vector<xercesc::DOMElement*> results;
388 std::vector<std::string> path;
389 size_t start_pos = 0;
390 size_t slash_pos = 0;
391 while (slash_pos != std::string::npos)
392 {
393 slash_pos = rPath.find('/', start_pos);
394 if (slash_pos == std::string::npos)
395 {
396 path.push_back(rPath.substr(start_pos));
397 }
398 else
399 {
400 path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
401 }
402 start_pos = slash_pos + 1;
403 }
404 FindElements(pContextElement, path, results);
405 return results;
406}
407
408
409void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
410 xercesc::DOMElement* pElement,
411 const XMLCh* pNewElementLocalName)
412{
413 const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
414 const XMLCh* p_prefix = pElement->getPrefix();
415 const XMLCh* p_qualified_name;
416 if (p_prefix)
417 {
418// LCOV_EXCL_START
419 // We can't actually cover this code, since versions of the parameters file which need this
420 // transform didn't use a namespace, so can't have a namespace prefix!
421 xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
422 p_qualified_name = qname.getRawName();
423// LCOV_EXCL_STOP
424 }
425 else
426 {
427 p_qualified_name = pNewElementLocalName;
428 }
429 xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
430 // Move all child nodes of pElement to be children of p_wrapper_elt
431 xercesc::DOMNodeList* p_children = pElement->getChildNodes();
432 for (unsigned i=0; i<p_children->getLength(); i++)
433 {
434 xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
435 p_wrapper_elt->appendChild(p_child);
436 }
437 // Add the wrapper as the sole child of pElement
438 pElement->appendChild(p_wrapper_elt);
439}
440
441
442std::string XmlTools::EscapeSpaces(const std::string& rPath)
443{
444 std::string escaped_path;
445 for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
446 {
447 if (*it == ' ')
448 {
449 escaped_path += "%20";
450 }
451 else
452 {
453 escaped_path += *it;
454 }
455 }
456 return escaped_path;
457}
#define EXCEPTION(message)
Finalizer(bool init)
Definition XmlTools.cpp:108
static xercesc::DOMElement * SetNamespace(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const std::string &rNamespace)
Definition XmlTools.cpp:335
static std::string EscapeSpaces(const std::string &rPath)
Definition XmlTools.cpp:442
static XSD_DOM_AUTO_PTR< xercesc::DOMDocument > ReadFileToDomDocument(const std::string &rFileName, ::xsd::cxx::xml::error_handler< char > &rErrorHandler, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition XmlTools.cpp:125
static void PrintNode(const std::string &rMsg, xercesc::DOMNode *pNode, bool showChildren=false)
Definition XmlTools.cpp:262
static std::vector< xercesc::DOMElement * > FindElements(const xercesc::DOMElement *pContextElement, const std::string &rPath)
Definition XmlTools.cpp:384
static XSD_DOM_AUTO_PTR< xercesc::DOMDocument > ReadXmlFile(const std::string &rFileName, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition XmlTools.cpp:53
static std::vector< xercesc::DOMElement * > GetChildElements(const xercesc::DOMElement *pElement)
Definition XmlTools.cpp:343
static void WrapContentInElement(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const XMLCh *pNewElementLocalName)
Definition XmlTools.cpp:409
static void Finalize()
Definition XmlTools.cpp:103