#ifndef __RapidXmlElement_h_ #define __RapidXmlElement_h_ #include #include "rapidxml.hpp" #include "MiscSupport.h" ///////////////////////////////////////////////////////////////////// // RapidXmlElement // // Convenience functions on top of ../shared/rapidxml.hpp. This is // the normal trick we've used in a lot of places. The primary // concern is that the programmer doesn't want to keep asking if a // pointer is null when he's traversing a tree. node["hi"][1] will // return NULL if (a) node is NULL, (b) we can't find a child named // "hi" in a, or (c) the child named "hi" has fewer than 2 children. // (0 is the first child, 1 is the second.) // // Notice that RapidXmlElement is a simple wrapper around a // rapidxml::xml_node<> pointer. You can't overload any operators // for a pointer, only a class. And you can't overload [] for any // class except that one that matches the first argument. There is // no overhead for this class. The object code would look exactly // the same if these functions returned a pointer. This trick just // makes the source code look cleaner. // // This also hides some of the template options. Just like I say // std::string, not basic_string< char >, I can just say // RapidXmlElement because I only deal with the char type. ///////////////////////////////////////////////////////////////////// class RapidXmlElement { private: typedef rapidxml::xml_node<> xml_node; typedef rapidxml::xml_attribute<> xml_attribute; xml_node *_node; RapidXmlElement(xml_node *node) : _node(node) { } public: // We have to start from a document because the document is responsible // for memory management. If you release the document, but you still have // RapidXmlElement objects based on that document, that's an error. // // We can't convert a string directly to a RapidXmlElement for this reason. // If we created a document in a temporary variable then used that to grab // the root RapidXmlElement, the element would be invalided as soon as we // returned it. // // Also see RapidXmlDocument, our wrapper around xml_document. typedef rapidxml::xml_document<> document; // The default constructor. The same as asking for the nth child of a // an element which has fewer than n children. That is to say this is the // same way we already handle errors in other places. This is helpful as a // placeholder. You might want to create a variable of this type before you // know it's value. RapidXmlElement() : _node(NULL) { } // Same as saying *this = RapidXmlElement(); void clear() { _node = NULL; } // Point to the root element in the document. Note: Some XML parsers // enforse the rule that there can only be one top level element in the // file and others don't. This code adds the rule. If there is more than // one child, or no children, we return NULL. // // Note in C# you have to explicitly ask for element 0 from the document. // In GWT, like this, you are directly given the root element. Be careful // when you're copying from C# and make sure you leave out the first request // for element 0. RapidXmlElement(document const &document) : _node((document.first_node() == document.last_node()) ?document.first_node():NULL) { } // Look for the element with the given name. It's not defined what happens // if there are multiple elements with that name. If the current element is // NULL or it has no children with the given name, return NULL. // // This works like node() in C#, but this is slightly shorter. RapidXmlElement operator[] (char const *childName) const { if (!_node) return NULL; return _node->first_node(childName); } // Look for the element with the given name. It's not defined what happens // if there are multiple elements with that name. If the current element is // NULL or it has no children with the given name, return NULL. // // This works like node() in C#, but this is slightly shorter. RapidXmlElement operator[] (std::string const &childName) const { if (!_node) return NULL; return _node->first_node(childName.c_str(), childName.size()); } // Look for the nth element. 0 is the first element. If the current element // is NULL, or the nth child doesn't exist, return NULL. Negative values for // which are explicitly allowed and they will always return NULL. // // This works like node() in C#, but this is slightly shorter. RapidXmlElement operator[] (int which) const { if (!_node) return NULL; if (which < 0) return NULL; xml_node *item = _node->first_node(); while (true) { if (which == 0) return item; which--; item = item->next_sibling(); if (!item) return NULL; } } // Look in the given element for a property with the given name. If either // doesn't exist, return the default value. // // Some other libraries call this a "property." That was my mistake. // "Attribute" is the correct terminology. std::string attribute(char const *name, std::string const &defaultValue = "") const { if (!_node) return defaultValue; xml_attribute *attribute = _node->first_attribute(name); if (!attribute) return defaultValue; return attribute->value(); } // Look in the given element for a property with the given name. If either // doesn't exist, return the default value. std::string attribute(std::string const &name, std::string const &defaultValue = "") const { if (!_node) return defaultValue; xml_attribute *attribute = _node->first_attribute(name.c_str(), name.size()); if (!attribute) return defaultValue; return attribute->value(); } // Look in the given element for a property with the given name. If either // doesn't exist, or the result can't be parsed, return the default value. int64_t attribute(char const *name, int64_t defaultValue) const { return strtolDefault(attribute(name), defaultValue); } // Look in the given element for a property with the given name. If either // doesn't exist, or the result can't be parsed, return the default value. int64_t attribute(std::string const &name, int64_t defaultValue) const { return strtolDefault(attribute(name), defaultValue); } PropertyList allAttributes() const { PropertyList result; if (_node) for (xml_attribute *attribute = _node->first_attribute(); attribute != NULL; attribute = attribute->next_attribute()) result[attribute->name()] = attribute->value(); return result; } std::string name(std::string const &defaultValue = "") const { return _node?_node->name():defaultValue; } // This is mostly aimed at a ranged based for loop. // If parent is of type RapidXmlElement, you can say // for (RapidXmlElement child : parent) class Iterator { private: xml_node *_node; friend RapidXmlElement; Iterator(xml_node *node) : _node(node) { } public: // These return void for simplicity. I think that's sufficient for most // things we do with iterators. // // These do not let you go backward from the end. Some containers let you // say something like myVector.end()-- to access the last item. ++ or -- // on the end will keep you at the end. Again, I did this for simplicity. void operator ++(int unused) { _node = _node?_node->next_sibling():NULL; } void operator ++() { _node = _node?_node->next_sibling():NULL; } void operator --(int unused) { _node = _node?_node->previous_sibling():NULL; } void operator --() { _node = _node?_node->previous_sibling():NULL; } bool operator ==(Iterator const &other) const { return _node == other._node; } bool operator !=(Iterator const &other) const { return _node != other._node; } RapidXmlElement operator *() const { return _node; } }; // If you try to iterate over a non-existant element, that's not a problem. // You just iterate 0 times. Otherwise you iterate over the direct children // of the current element. Iterator begin() const { return _node?_node->first_node():NULL; } Iterator end() const { return NULL; } // NULL is false, anything else is true. Could not find what you're looking // for is false. Found what you asked for is true. operator bool() const { return _node; } bool operator !() const { return !_node; } }; class RapidXmlDocument : NoCopy, NoAssign { private: // The XML parser needs a COPY of the input. The parser will modify this // data, which is one reason we often start by making a copy. The element // objects will contain pointers to this data, so we have to keep it around // as long as we keep the element objects. std::string _workingCopy; std::string _errorMessage; RapidXmlElement::document _document; RapidXmlElement _root; void uninitializedError() { _errorMessage = "Uninitialized"; } void loadFromWorkingCopy() { try { _document.parse<0>(&_workingCopy[0]); _root = RapidXmlElement(_document); if (!_root) _errorMessage = "An XML document should have exactly one root element."; else _errorMessage.clear(); } catch (rapidxml::parse_error &ex) { // ex.where() is a char * which points into _workingCopy. _errorMessage = ex.what(); _errorMessage += ": "; _errorMessage += ex.where(); } } public: RapidXmlDocument() { uninitializedError(); } void clear() { _workingCopy.clear(); _document.clear(); _root.clear(); } // The usual C++ style conversion. Instead of if (doc.valid()) say // if (doc). operator bool() const { return _root; } bool operator !() const { return !_root; } RapidXmlElement root() const { return _root; } void loadFromString(std::string const &source) { _workingCopy = source; loadFromWorkingCopy(); } void loadFromString(std::string &&source) { _workingCopy = std::move(source); loadFromWorkingCopy(); } void loadFromFile(char const *fileName) { // Inspired by // http://insanecoding.blogspot.in/2011/11/how-to-read-in-file-in-c.html clear(); std::ifstream in(fileName, std::ios::in | std::ios::binary); in.seekg(0, std::ios::end); if (in) { // If in is invalid (e.g. file not found) and we didn't check for that, // tellg() would return -1 and resize() would abort the program. (If you // call seekg(), tellg(), read(), etc., on an invalid stream, by default // that's just silently ignored.) _workingCopy.resize(in.tellg()); in.seekg(0, std::ios::beg); in.read(&_workingCopy[0], _workingCopy.size()); } if (in) loadFromWorkingCopy(); else { _workingCopy.clear(); _errorMessage = errorString() + ": " + fileName; } } void loadFromFile(std::string const &fileName) { loadFromFile(fileName.c_str()); } std::string getErrorMessage() const { return _errorMessage; } }; #endif