Chaste  Release::2018.1
XmlTools.cpp
1 /*
2 
3 Copyright (c) 2005-2018, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #include "XmlTools.hpp"
37 
38 #include <iostream>
39 
40 #include <xercesc/dom/DOM.hpp>
41 #include <xercesc/util/PlatformUtils.hpp>
42 #include <xercesc/util/QName.hpp>
43 #include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
44 #include <xercesc/framework/Wrapper4InputSource.hpp>
45 #include <xercesc/validators/common/Grammar.hpp>
46 
47 #include <xsd/cxx/xml/sax/std-input-source.hxx>
48 #include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
49 #include <xsd/cxx/tree/exceptions.hxx>
50 
51 #include "Exception.hpp"
52 
53 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadXmlFile(
54  const std::string& rFileName,
55  const ::xsd::cxx::tree::properties<char>& rProps,
56  bool validate)
57 {
58  xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> p_doc;
59  try
60  {
61  // Initialise Xerces
62  xercesc::XMLPlatformUtils::Initialize();
63  // Set up an error handler
64  ::xsd::cxx::tree::error_handler<char> error_handler;
65  // Parse XML to DOM
66  p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps, validate);
67  // Any errors?
68  error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
69  }
70  catch (const ::xsd::cxx::tree::parsing<char>& e)
71  {
72  Finalize();
73  // Test for missing schema/xml file
74 #if (XSD_INT_VERSION >= 3000000L)
75  const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
76  const ::xsd::cxx::tree::error<char>& first_error = diags[0];
77 #else
78  const ::xsd::cxx::tree::errors<char>& errors = e.errors();
79  const ::xsd::cxx::tree::error<char>& first_error = errors[0];
80 #endif
81  if (first_error.line() == 0u)
82  {
83  std::cerr << first_error << std::endl;
84  EXCEPTION("Missing file parsing configuration file: " + rFileName);
85  }
86  else
87  {
88  std::cerr << e << std::endl;
89  EXCEPTION("XML parsing error in configuration file: " + rFileName);
90  }
91  }
92 // LCOV_EXCL_START
93  catch (...)
94  { // This shouldn't happen, but just in case...
95  Finalize();
96  throw;
97  }
98 // LCOV_EXCL_STOP
99  return p_doc;
100 }
101 
102 
104 {
105  xercesc::XMLPlatformUtils::Terminate();
106 }
107 
109 {
110  // The init=true case will very rarely be used, but a parameter to the constructor is needed
111  // to stop some compilers complaining about an unused variable!
112  if (init)
113  {
114 // LCOV_EXCL_START
115  xercesc::XMLPlatformUtils::Initialize();
116 // LCOV_EXCL_STOP
117  }
118 }
119 
121 {
123 }
124 
125 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
126  const std::string& rFileName,
127  ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
128  const ::xsd::cxx::tree::properties<char>& rProps,
129  bool validate)
130 {
131  using namespace xercesc;
132  namespace xml = xsd::cxx::xml;
133 
134  // Get an implementation of the Load-Store (LS) interface.
135  const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
136  DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
137 
138 #if _XERCES_VERSION >= 30000
139  // Xerces-C++ 3.0.0 and later.
140  xml::dom::auto_ptr<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
141  DOMConfiguration* p_conf(p_parser->getDomConfig());
142 
143  // Discard comment nodes in the document.
144  p_conf->setParameter(XMLUni::fgDOMComments, false);
145 
146  // Enable datatype normalization.
147  p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
148 
149  // Do not create EntityReference nodes in the DOM tree. No
150  // EntityReference nodes will be created, only the nodes
151  // corresponding to their fully expanded substitution text
152  // will be created.
153  p_conf->setParameter(XMLUni::fgDOMEntities, false);
154 
155  // Perform namespace processing.
156  p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
157 
158  // Do not include ignorable whitespace in the DOM tree.
159  p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
160 
161  // Enable validation.
162  if (validate)
163  {
164  p_conf->setParameter(XMLUni::fgDOMValidate, true);
165  p_conf->setParameter(XMLUni::fgXercesSchema, true);
166  p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
167  // Code taken from xsd/cxx/xml/dom/parsing-source.txx
168  if (!rProps.schema_location().empty())
169  {
170  xml::string locn(rProps.schema_location());
171  const void* p_locn(locn.c_str());
172  p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
173  const_cast<void*>(p_locn));
174  }
175  if (!rProps.no_namespace_schema_location().empty())
176  {
177  xml::string locn(rProps.no_namespace_schema_location());
178  const void* p_locn(locn.c_str());
179 
180  p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
181  const_cast<void*>(p_locn));
182  }
183  }
184  else
185  {
186  // This branch is only used by projects
187 // LCOV_EXCL_START
188  p_conf->setParameter(XMLUni::fgDOMValidate, false);
189  p_conf->setParameter(XMLUni::fgXercesSchema, false);
190  p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
191 // LCOV_EXCL_STOP
192  }
193 
194  // We will release the DOM document ourselves.
195  p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
196 
197  // Set error handler.
198  xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
199  p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
200 
201 #else // _XERCES_VERSION >= 30000
202  // Same as above but for Xerces-C++ 2 series.
203  xml::dom::auto_ptr<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
204 
205  p_parser->setFeature(XMLUni::fgDOMComments, false);
206  p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
207  p_parser->setFeature(XMLUni::fgDOMEntities, false);
208  p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
209  p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
210  p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
211 
212  // Code taken from xsd/cxx/xml/dom/parsing-source.txx
213  if (validate)
214  {
215  p_parser->setFeature(XMLUni::fgDOMValidation, true);
216  p_parser->setFeature(XMLUni::fgXercesSchema, true);
217  p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
218  if (!rProps.schema_location().empty())
219  {
220  xml::string locn(rProps.schema_location());
221  const void* p_locn(locn.c_str());
222  p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
223  const_cast<void*>(p_locn));
224  }
225 
226  if (!rProps.no_namespace_schema_location().empty())
227  {
228  xml::string locn(rProps.no_namespace_schema_location());
229  const void* p_locn(locn.c_str());
230 
231  p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
232  const_cast<void*>(p_locn));
233  }
234  }
235  else
236  {
237  // This branch is only used by projects
238 // LCOV_EXCL_START
239  p_parser->setFeature(XMLUni::fgDOMValidation, false);
240  p_parser->setFeature(XMLUni::fgXercesSchema, false);
241  p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
242 // LCOV_EXCL_STOP
243  }
244 
245  xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
246  p_parser->setErrorHandler(&ehp);
247 
248 #endif // _XERCES_VERSION >= 30000
249 
250  // Do the parse
251  xml::dom::auto_ptr<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
252 
253  if (ehp.failed())
254  {
255  p_doc.reset();
256  }
257 
258  return p_doc;
259 }
260 
261 // LCOV_EXCL_START
262 void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
263 {
264  std::string prefix = X2C(pNode->getPrefix());
265  std::string name = X2C(pNode->getLocalName());
266  std::string nsuri = X2C(pNode->getNamespaceURI());
267  std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
268  if (showChildren)
269  {
270  for (xercesc::DOMNode* p_node = pNode->getFirstChild();
271  p_node != NULL;
272  p_node = p_node->getNextSibling())
273  {
274  std::cout << " child type " << p_node->getNodeType();
275  PrintNode("", p_node, false);
276  }
277  xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
278  if (p_attrs)
279  {
280  for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
281  {
282  xercesc::DOMNode* p_attr = p_attrs->item(i);
283  std::string value = X2C(p_attr->getNodeValue());
284  PrintNode(" attr (" + value + ")", p_attr, false);
285  }
286  }
287  }
288 }
289 // LCOV_EXCL_STOP
290 
291 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
292  xercesc::DOMElement* pElement,
293  const XMLCh* pNamespace)
294 {
295  using namespace xercesc;
296 
297  //PrintNode("Renaming", pElement, true);
298  DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
299  std::vector<std::string> attr_values;
300  if (p_orig_attrs)
301  {
302  for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
303  {
304  DOMNode* p_attr = p_orig_attrs->item(i);
305  attr_values.push_back(X2C(p_attr->getNodeValue()));
306  }
307  }
308  DOMElement* p_new_elt = static_cast<DOMElement*>(
309  pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
310  //PrintNode(" to", p_new_elt, true);
311  // Fix attributes - some get broken by the rename!
312  if (p_orig_attrs)
313  {
314  DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
315  assert(p_new_attrs);
316  assert(p_new_attrs == p_orig_attrs);
317  assert(p_new_attrs->getLength() == attr_values.size());
318  for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
319  {
320  DOMNode* p_attr = p_new_attrs->item(i);
321  p_attr->setNodeValue(X(attr_values[i]));
322  }
323  }
324  //PrintNode(" after attr fix", p_new_elt, true);
325 
326  std::vector<DOMElement*> children = GetChildElements(p_new_elt);
327  for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
328  {
329  SetNamespace(pDocument, *it, pNamespace);
330  }
331 
332  return p_new_elt;
333 }
334 
335 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
336  xercesc::DOMElement* pElement,
337  const std::string& rNamespace)
338 {
339  return SetNamespace(pDocument, pElement, X(rNamespace));
340 }
341 
342 
343 std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(const xercesc::DOMElement* pElement)
344 {
345  std::vector<xercesc::DOMElement*> children;
346  for (xercesc::DOMNode* p_node = pElement->getFirstChild();
347  p_node != NULL;
348  p_node = p_node->getNextSibling())
349  {
350  if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
351  {
352  children.push_back(static_cast<xercesc::DOMElement*>(p_node));
353  }
354  }
355  return children;
356 }
357 
358 
359 void XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
360  const std::vector<std::string>& rNames,
361  std::vector<xercesc::DOMElement*>& rResults,
362  unsigned depth)
363 {
364  for (xercesc::DOMNode* p_node = pContextElement->getFirstChild();
365  p_node != NULL;
366  p_node = p_node->getNextSibling())
367  {
368  if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE &&
369  X2C(p_node->getLocalName()) == rNames[depth])
370  {
371  xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_node);
372  if (depth == rNames.size() - 1)
373  {
374  rResults.push_back(p_child_elt);
375  }
376  else
377  {
378  FindElements(p_child_elt, rNames, rResults, depth+1);
379  }
380  }
381  }
382 }
383 
384 std::vector<xercesc::DOMElement*> XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
385  const std::string& rPath)
386 {
387  std::vector<xercesc::DOMElement*> results;
388  std::vector<std::string> path;
389  size_t start_pos = 0;
390  size_t slash_pos = 0;
391  while (slash_pos != std::string::npos)
392  {
393  slash_pos = rPath.find('/', start_pos);
394  if (slash_pos == std::string::npos)
395  {
396  path.push_back(rPath.substr(start_pos));
397  }
398  else
399  {
400  path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
401  }
402  start_pos = slash_pos + 1;
403  }
404  FindElements(pContextElement, path, results);
405  return results;
406 }
407 
408 
409 void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
410  xercesc::DOMElement* pElement,
411  const XMLCh* pNewElementLocalName)
412 {
413  const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
414  const XMLCh* p_prefix = pElement->getPrefix();
415  const XMLCh* p_qualified_name;
416  if (p_prefix)
417  {
418 // LCOV_EXCL_START
419  // We can't actually cover this code, since versions of the parameters file which need this
420  // transform didn't use a namespace, so can't have a namespace prefix!
421  xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
422  p_qualified_name = qname.getRawName();
423 // LCOV_EXCL_STOP
424  }
425  else
426  {
427  p_qualified_name = pNewElementLocalName;
428  }
429  xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
430  // Move all child nodes of pElement to be children of p_wrapper_elt
431  xercesc::DOMNodeList* p_children = pElement->getChildNodes();
432  for (unsigned i=0; i<p_children->getLength(); i++)
433  {
434  xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
435  p_wrapper_elt->appendChild(p_child);
436  }
437  // Add the wrapper as the sole child of pElement
438  pElement->appendChild(p_wrapper_elt);
439 }
440 
441 
442 std::string XmlTools::EscapeSpaces(const std::string& rPath)
443 {
444  std::string escaped_path;
445  for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
446  {
447  if (*it == ' ')
448  {
449  escaped_path += "%20";
450  }
451  else
452  {
453  escaped_path += *it;
454  }
455  }
456  return escaped_path;
457 }
static std::string EscapeSpaces(const std::string &rPath)
Definition: XmlTools.cpp:442
static void PrintNode(const std::string &rMsg, xercesc::DOMNode *pNode, bool showChildren=false)
Definition: XmlTools.cpp:262
static xercesc::DOMElement * SetNamespace(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const std::string &rNamespace)
Definition: XmlTools.cpp:335
#define EXCEPTION(message)
Definition: Exception.hpp:143
static std::vector< xercesc::DOMElement * > FindElements(const xercesc::DOMElement *pContextElement, const std::string &rPath)
Definition: XmlTools.cpp:384
Finalizer(bool init)
Definition: XmlTools.cpp:108
static xsd::cxx::xml::dom::auto_ptr< xercesc::DOMDocument > ReadFileToDomDocument(const std::string &rFileName,::xsd::cxx::xml::error_handler< char > &rErrorHandler, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition: XmlTools.cpp:125
static std::vector< xercesc::DOMElement * > GetChildElements(const xercesc::DOMElement *pElement)
Definition: XmlTools.cpp:343
static void WrapContentInElement(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const XMLCh *pNewElementLocalName)
Definition: XmlTools.cpp:409
static xsd::cxx::xml::dom::auto_ptr< xercesc::DOMDocument > ReadXmlFile(const std::string &rFileName, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition: XmlTools.cpp:53
static void Finalize()
Definition: XmlTools.cpp:103