Chaste  Release::3.4
XmlTools.cpp
1 /*
2 
3 Copyright (c) 2005-2016, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #include "XmlTools.hpp"
37 
38 #include <iostream>
39 
40 #include <xercesc/dom/DOM.hpp>
41 #include <xercesc/util/PlatformUtils.hpp>
42 #include <xercesc/util/QName.hpp>
43 #include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
44 #include <xercesc/framework/Wrapper4InputSource.hpp>
45 #include <xercesc/validators/common/Grammar.hpp>
46 
47 #include <xsd/cxx/xml/sax/std-input-source.hxx>
48 #include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
49 #include <xsd/cxx/tree/exceptions.hxx>
50 
51 #include "Exception.hpp"
52 
53 
54 
55 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadXmlFile(
56  const std::string& rFileName,
57  const ::xsd::cxx::tree::properties<char>& rProps,
58  bool validate)
59 {
60  xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> p_doc;
61  try
62  {
63  // Initialise Xerces
64  xercesc::XMLPlatformUtils::Initialize();
65  // Set up an error handler
66  ::xsd::cxx::tree::error_handler<char> error_handler;
67  // Parse XML to DOM
68  p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps, validate);
69  // Any errors?
70  error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
71  }
72  catch (const ::xsd::cxx::tree::parsing<char>& e)
73  {
74  Finalize();
75  // Test for missing schema/xml file
76 #if (XSD_INT_VERSION >= 3000000L)
77  const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
78  const ::xsd::cxx::tree::error<char>& first_error = diags[0];
79 #else
80  const ::xsd::cxx::tree::errors<char>& errors = e.errors();
81  const ::xsd::cxx::tree::error<char>& first_error = errors[0];
82 #endif
83  if (first_error.line() == 0u)
84  {
85  std::cerr << first_error << std::endl;
86  EXCEPTION("Missing file parsing configuration file: " + rFileName);
87  }
88  else
89  {
90  std::cerr << e << std::endl;
91  EXCEPTION("XML parsing error in configuration file: " + rFileName);
92  }
93  }
94 #define COVERAGE_IGNORE
95  catch (...)
96  { // This shouldn't happen, but just in case...
97  Finalize();
98  throw;
99  }
100 #undef COVERAGE_IGNORE
101  return p_doc;
102 }
103 
104 
106 {
107  xercesc::XMLPlatformUtils::Terminate();
108 }
109 
111 {
112  // The init=true case will very rarely be used, but a parameter to the constructor is needed
113  // to stop some compilers complaining about an unused variable!
114  if (init)
115  {
116 #define COVERAGE_IGNORE
117  xercesc::XMLPlatformUtils::Initialize();
118 #undef COVERAGE_IGNORE
119  }
120 }
121 
123 {
125 }
126 
127 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
128  const std::string& rFileName,
129  ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
130  const ::xsd::cxx::tree::properties<char>& rProps,
131  bool validate)
132 {
133  using namespace xercesc;
134  namespace xml = xsd::cxx::xml;
135 
136  // Get an implementation of the Load-Store (LS) interface.
137  const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
138  DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
139 
140 #if _XERCES_VERSION >= 30000
141  // Xerces-C++ 3.0.0 and later.
142  xml::dom::auto_ptr<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
143  DOMConfiguration* p_conf(p_parser->getDomConfig());
144 
145  // Discard comment nodes in the document.
146  p_conf->setParameter(XMLUni::fgDOMComments, false);
147 
148  // Enable datatype normalization.
149  p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
150 
151  // Do not create EntityReference nodes in the DOM tree. No
152  // EntityReference nodes will be created, only the nodes
153  // corresponding to their fully expanded substitution text
154  // will be created.
155  p_conf->setParameter(XMLUni::fgDOMEntities, false);
156 
157  // Perform namespace processing.
158  p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
159 
160  // Do not include ignorable whitespace in the DOM tree.
161  p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
162 
163  // Enable validation.
164  if (validate)
165  {
166  p_conf->setParameter(XMLUni::fgDOMValidate, true);
167  p_conf->setParameter(XMLUni::fgXercesSchema, true);
168  p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
169  // Code taken from xsd/cxx/xml/dom/parsing-source.txx
170  if (!rProps.schema_location().empty())
171  {
172  xml::string locn(rProps.schema_location());
173  const void* p_locn(locn.c_str());
174  p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
175  const_cast<void*>(p_locn));
176  }
177  if (!rProps.no_namespace_schema_location().empty())
178  {
179  xml::string locn(rProps.no_namespace_schema_location());
180  const void* p_locn(locn.c_str());
181 
182  p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
183  const_cast<void*>(p_locn));
184  }
185  }
186  else
187  {
188  // This branch is only used by projects
189 #define COVERAGE_IGNORE
190  p_conf->setParameter(XMLUni::fgDOMValidate, false);
191  p_conf->setParameter(XMLUni::fgXercesSchema, false);
192  p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
193 #undef COVERAGE_IGNORE
194  }
195 
196  // We will release the DOM document ourselves.
197  p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
198 
199  // Set error handler.
200  xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
201  p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
202 
203 #else // _XERCES_VERSION >= 30000
204  // Same as above but for Xerces-C++ 2 series.
205  xml::dom::auto_ptr<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
206 
207  p_parser->setFeature(XMLUni::fgDOMComments, false);
208  p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
209  p_parser->setFeature(XMLUni::fgDOMEntities, false);
210  p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
211  p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
212  p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
213 
214  // Code taken from xsd/cxx/xml/dom/parsing-source.txx
215  if (validate)
216  {
217  p_parser->setFeature(XMLUni::fgDOMValidation, true);
218  p_parser->setFeature(XMLUni::fgXercesSchema, true);
219  p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
220  if (!rProps.schema_location().empty())
221  {
222  xml::string locn(rProps.schema_location());
223  const void* p_locn(locn.c_str());
224  p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
225  const_cast<void*>(p_locn));
226  }
227 
228  if (!rProps.no_namespace_schema_location().empty())
229  {
230  xml::string locn(rProps.no_namespace_schema_location());
231  const void* p_locn(locn.c_str());
232 
233  p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
234  const_cast<void*>(p_locn));
235  }
236  }
237  else
238  {
239  // This branch is only used by projects
240 #define COVERAGE_IGNORE
241  p_parser->setFeature(XMLUni::fgDOMValidation, false);
242  p_parser->setFeature(XMLUni::fgXercesSchema, false);
243  p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
244 #undef COVERAGE_IGNORE
245  }
246 
247  xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
248  p_parser->setErrorHandler(&ehp);
249 
250 #endif // _XERCES_VERSION >= 30000
251 
252  // Do the parse
253  xml::dom::auto_ptr<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
254 
255  if (ehp.failed())
256  {
257  p_doc.reset();
258  }
259 
260  return p_doc;
261 }
262 
263 #define COVERAGE_IGNORE
264 void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
265 {
266  std::string prefix = X2C(pNode->getPrefix());
267  std::string name = X2C(pNode->getLocalName());
268  std::string nsuri = X2C(pNode->getNamespaceURI());
269  std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
270  if (showChildren)
271  {
272  for (xercesc::DOMNode* p_node = pNode->getFirstChild();
273  p_node != NULL;
274  p_node = p_node->getNextSibling())
275  {
276  std::cout << " child type " << p_node->getNodeType();
277  PrintNode("", p_node, false);
278  }
279  xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
280  if (p_attrs)
281  {
282  for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
283  {
284  xercesc::DOMNode* p_attr = p_attrs->item(i);
285  std::string value = X2C(p_attr->getNodeValue());
286  PrintNode(" attr (" + value + ")", p_attr, false);
287  }
288  }
289  }
290 }
291 #undef COVERAGE_IGNORE
292 
293 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
294  xercesc::DOMElement* pElement,
295  const XMLCh* pNamespace)
296 {
297  using namespace xercesc;
298 
299  //PrintNode("Renaming", pElement, true);
300  DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
301  std::vector<std::string> attr_values;
302  if (p_orig_attrs)
303  {
304  for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
305  {
306  DOMNode* p_attr = p_orig_attrs->item(i);
307  attr_values.push_back(X2C(p_attr->getNodeValue()));
308  }
309  }
310  DOMElement* p_new_elt = static_cast<DOMElement*>(
311  pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
312  //PrintNode(" to", p_new_elt, true);
313  // Fix attributes - some get broken by the rename!
314  if (p_orig_attrs)
315  {
316  DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
317  assert(p_new_attrs);
318  assert(p_new_attrs == p_orig_attrs);
319  assert(p_new_attrs->getLength() == attr_values.size());
320  for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
321  {
322  DOMNode* p_attr = p_new_attrs->item(i);
323  p_attr->setNodeValue(X(attr_values[i]));
324  }
325  }
326  //PrintNode(" after attr fix", p_new_elt, true);
327 
328  std::vector<DOMElement*> children = GetChildElements(p_new_elt);
329  for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
330  {
331  SetNamespace(pDocument, *it, pNamespace);
332  }
333 
334  return p_new_elt;
335 }
336 
337 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
338  xercesc::DOMElement* pElement,
339  const std::string& rNamespace)
340 {
341  return SetNamespace(pDocument, pElement, X(rNamespace));
342 }
343 
344 
345 std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(const xercesc::DOMElement* pElement)
346 {
347  std::vector<xercesc::DOMElement*> children;
348  for (xercesc::DOMNode* p_node = pElement->getFirstChild();
349  p_node != NULL;
350  p_node = p_node->getNextSibling())
351  {
352  if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
353  {
354  children.push_back(static_cast<xercesc::DOMElement*>(p_node));
355  }
356  }
357  return children;
358 }
359 
360 
361 void XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
362  const std::vector<std::string>& rNames,
363  std::vector<xercesc::DOMElement*>& rResults,
364  unsigned depth)
365 {
366  for (xercesc::DOMNode* p_node = pContextElement->getFirstChild();
367  p_node != NULL;
368  p_node = p_node->getNextSibling())
369  {
370  if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE &&
371  X2C(p_node->getLocalName()) == rNames[depth])
372  {
373  xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_node);
374  if (depth == rNames.size() - 1)
375  {
376  rResults.push_back(p_child_elt);
377  }
378  else
379  {
380  FindElements(p_child_elt, rNames, rResults, depth+1);
381  }
382  }
383  }
384 }
385 
386 std::vector<xercesc::DOMElement*> XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
387  const std::string& rPath)
388 {
389  std::vector<xercesc::DOMElement*> results;
390  std::vector<std::string> path;
391  size_t start_pos = 0;
392  size_t slash_pos = 0;
393  while (slash_pos != std::string::npos)
394  {
395  slash_pos = rPath.find('/', start_pos);
396  if (slash_pos == std::string::npos)
397  {
398  path.push_back(rPath.substr(start_pos));
399  }
400  else
401  {
402  path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
403  }
404  start_pos = slash_pos + 1;
405  }
406  FindElements(pContextElement, path, results);
407  return results;
408 }
409 
410 
411 void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
412  xercesc::DOMElement* pElement,
413  const XMLCh* pNewElementLocalName)
414 {
415  const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
416  const XMLCh* p_prefix = pElement->getPrefix();
417  const XMLCh* p_qualified_name;
418  if (p_prefix)
419  {
420 #define COVERAGE_IGNORE
421  // We can't actually cover this code, since versions of the parameters file which need this
422  // transform didn't use a namespace, so can't have a namespace prefix!
423  xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
424  p_qualified_name = qname.getRawName();
425 #undef COVERAGE_IGNORE
426  }
427  else
428  {
429  p_qualified_name = pNewElementLocalName;
430  }
431  xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
432  // Move all child nodes of pElement to be children of p_wrapper_elt
433  xercesc::DOMNodeList* p_children = pElement->getChildNodes();
434  for (unsigned i=0; i<p_children->getLength(); i++)
435  {
436  xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
437  p_wrapper_elt->appendChild(p_child);
438  }
439  // Add the wrapper as the sole child of pElement
440  pElement->appendChild(p_wrapper_elt);
441 }
442 
443 
444 std::string XmlTools::EscapeSpaces(const std::string& rPath)
445 {
446  std::string escaped_path;
447  for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
448  {
449  if (*it == ' ')
450  {
451  escaped_path += "%20";
452  }
453  else
454  {
455  escaped_path += *it;
456  }
457  }
458  return escaped_path;
459 }
static std::string EscapeSpaces(const std::string &rPath)
Definition: XmlTools.cpp:444
static void PrintNode(const std::string &rMsg, xercesc::DOMNode *pNode, bool showChildren=false)
Definition: XmlTools.cpp:264
static xercesc::DOMElement * SetNamespace(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const std::string &rNamespace)
Definition: XmlTools.cpp:337
#define EXCEPTION(message)
Definition: Exception.hpp:143
static std::vector< xercesc::DOMElement * > FindElements(const xercesc::DOMElement *pContextElement, const std::string &rPath)
Definition: XmlTools.cpp:386
Finalizer(bool init)
Definition: XmlTools.cpp:110
static xsd::cxx::xml::dom::auto_ptr< xercesc::DOMDocument > ReadFileToDomDocument(const std::string &rFileName,::xsd::cxx::xml::error_handler< char > &rErrorHandler, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition: XmlTools.cpp:127
static std::vector< xercesc::DOMElement * > GetChildElements(const xercesc::DOMElement *pElement)
Definition: XmlTools.cpp:345
static void WrapContentInElement(xercesc::DOMDocument *pDocument, xercesc::DOMElement *pElement, const XMLCh *pNewElementLocalName)
Definition: XmlTools.cpp:411
static xsd::cxx::xml::dom::auto_ptr< xercesc::DOMDocument > ReadXmlFile(const std::string &rFileName, const ::xsd::cxx::tree::properties< char > &rProps, bool validate=true)
Definition: XmlTools.cpp:55
static void Finalize()
Definition: XmlTools.cpp:105