#ifndef __Marshal_h_ #define __Marshal_h_ #include #include #include #include #include #include // Unmarshalling data can always lead to an exception. What if you are // trying to read a 4 byte integer, but only 2 bytes are left in the source? // With XML (and a lot of other cases) we try our best to make sense of what // we have. But XML gives us a lot of context. We can't even start looking // at the XML file if the end is cut off, or we accidentally try to interpret // a GIF file as an XML file, or someone just left off a quotation mark. Also // consider a less extreme case. We're expecting 3 integers but someone only // supplied 2. When reading XML we know exactly what data is missing so it's // easy to supply a default value or otherwise handle the data without giving // up. If we had a missing field near the beginning of a binary marshaled // message we probably wouldn't even catch it until much later; many fields // would be filled with garbage. // // We might also throw this when marshaling data. In those cases // assert(false) would probably do just as well. But since we have this, // why not use it. class MarshallingException : public std::exception { private: std::string _what; public: MarshallingException(std::string const &what) : _what(what) { } MarshallingException() : _what("Error marshalling / unmarshalling data") { } virtual const char* what() const throw() { return _what.c_str(); } }; void marshal(std::string &destination, std::string const &string); void unmarshal(std::string const &source, size_t &offset, std::string &value); inline std::string unmarshalRemainder(std::string const &source, size_t &offset) { const size_t origOffset = offset; offset = source.length(); return std::string(source, origOffset); } inline void marshal(std::string &destination, char *string) { marshal(destination, std::string(string)); } // You do need both of these, const and non const. C++ will prefer a function // that exactly matches the arguments (i.e. const or non const). If that fails // then C++ will look for a template. If that fails, the third step is to try // to convert char * to char const *. Note that SFINAE is only used to help // pick among a set of templates. Once the compiler thinks that a template // function is right, SFINAE might point it to a different template function, // but it will never push it back to the conversion that you really wanted. // // In this case if we got rid of the char * version of marshal(), C++ would try // to use our POD version of marshal, and would then give you a compiler error. // // Note that there is no unmarshal function with the type of char *. Unmarshal // to a std::string, instead. This is mostly aimed at literal strings, which // are of type char *, which we always expect to be converted to std::string // at the first convenient time. Normally you get that conversion for free, // but sometimes C++ gets confused. In this case it would have tried to // use the template for POD types. In other cases I've seen it convert char * // to bool, rather than std::string. inline void marshal(std::string &destination, char const *string) { marshal(destination, std::string(string)); } template< class T > void marshal(std::string &destination, T const &value) { static_assert(std::is_pod< T >::value, "This function is aimed at POD types."); static_assert(!std::is_pointer< T >::value, "You cannot marshal a pointer."); // Just copy the bytes as is. destination.append((const char*)&value, sizeof(T)); } // This will verify that we have enough data. If you try to read off the // end of the source, this will throw an exception. // // Next this will update offset to point to the next piece of data, right // after what we're describing here. // // Finally, this will return a pointer to the beginning of the data. void const *unmarshalBytes(std::string const &source, size_t &offset, size_t count); template< class T > void unmarshal(std::string const &source, size_t &offset, T &value) { static_assert(std::is_pod< T >::value, "This function is aimed at POD types."); static_assert(!std::is_pointer< T >::value, "You cannot marshal a pointer."); value = *static_cast< T const * >(unmarshalBytes(source, offset, sizeof(T))); } // This may be overkill. You could send the size directly to marshal() as it // is a POD type. But I like the idea the something small would only need 1 // byte, even if that added to the cost of something bigger. // // Also, we are somewhat arbitrarily setting the maximum possible size to // 2^32-1. If the size is longer than that, the actual data would be // ridiculously big, more than we plan to deal with in this library. We only // reserve enough space for a 32 bit number, to save space and still have // somewhat simple code. But the source of this will probably be a size_t // (e.g. std::string::length() or std::vector::size()) so the value might // be too big. This function gives us a single place to check for that and // throw an exception. void marshalSize(std::string &destination, size_t value); size_t unmarshalSize(std::string const &source, size_t &offset); template< class T > void marshal(std::string &destination, std::vector< T > const &value) { marshalSize(destination, value.size()); for (T const &item : value) marshal(item); } template< class T > void unmarshal(std::string const &source, size_t &offset, std::vector< T > &value) { try { size_t size = unmarshalSize(source, offset); value.resize(size); for (T &item : value) unmarshal(source, offset, item); } catch (...) { value.clear(); throw; } } template< class T, class U > void marshal(std::string &destination, std::pair< T, U > const &value) { // It would be tempting to let the POD version of marshal() have a crack // at this. The code in this fuction would only be a fallback, if one of the // types was not POD. That might save a little time becaue we could make one // call to std::string::append() rather than two. But there's a problem. // What if T or U has a specialized version of marshal()? Like a char *. // The implementation below will work for any valid input type and will // complain about the others. marshal(destination, value.first); marshal(destination, value.second); } template< class K, class V > void marshal(std::string &destination, std::map< K, V > const &value) { marshalSize(destination, value.size()); for (auto const &item : value) marshal(destination, item); } template< class K, class V > void unmarshal(std::string const &source, size_t &offset, std::map< K, V > &map) { try { map.clear(); const size_t count = unmarshalSize(source, offset); for (size_t i = 0; i < count; i++) { K key; unmarshal(source, offset, key); V value; unmarshal(source, offset, value); map[key] = value; } } catch (...) { map.clear(); throw; } } // Returns true if and only if we are exactly at the end of the data. This is // mostly intented to be used with an assertion. It's a bad policy to keep // reading until this is true. inline bool unmarshalFinished(std::string const &source, size_t offset) { return source.length() == offset; } void unmarshalFinishedOrThrow(std::string const &source, size_t offset); // This may be an easier way to unmarshal a single item. This assumes that // the entire source string came from marshalling a single item, and you are // trying to retrieve that same item here. template < class T > void unmarshal(std::string const &source, T &result) { size_t offset = 0; unmarshal(source, offset, result); if (!unmarshalFinished(source, offset)) throw MarshallingException("Extra data found after unmarshalling."); } // This may be an easier way to unmarshal a single item. This assumes that // the entire source string came from marshalling a single item, and you are // trying to retrieve that same item here. template < class T > T unmarshal(std::string const &source) { T result; unmarshal(source, result); return result; } // And an easier way to marshal a single item. template < class T > std::string marshal(T const &item) { std::string result; marshal(result, item); return result; } // Pack multiple Boolean values into the same byte then add that byte to the // destination. The first bool listed (a) is the least significant bit (1). // The second bool listed (b) is 2. // // We could and probably should add versions of this function for 2 to 8 bits. // It's tempting to have just one version with a lot of default arguments // all defaulting to false. That would cause a small but unnecessary // performance hit. inline void marshal(std::string &destination, bool a, bool b, bool c) { uint8_t byte = 0; if (a) byte |= 1; if (b) byte |= 2; if (c) byte |= 4; marshal(destination, byte); } // The arguments to unmarshal() should come in the same order as in marshal(), // i.e. least significant bit first. // // In case of an exception the values of a, b and c are undefined. (There is // not yet a standard for this. For aggregate data structures, we usually // clear out the entire data structure instead of returning a partially built // data structure. For individual values we normally leave the value // unchanged.) inline void unmarshal(std::string const &source, size_t &offset, bool &a, bool &b, bool &c) { uint8_t byte; unmarshal(source, offset, byte); a = byte & 1; b = byte & 2; c = byte & 4; } // Typically we marshal() and unmarshal() data in the exactly same order. // Sometimes, however, it makes more sense to push and pop from the same end. // In the case of strings, we modify our data format to put the size of the // string after the data when pushing. marshal() would put the size first. void mPush(std::string &destination, std::string const &source); std::string mPop(std::string &source); // We can marshal() a POD type onto the end of a string, then use mPop() to // get the value back. That's easy because the POD type has a fixed size. template< class T > void mPop(std::string &source, T const &value) { static_assert(std::is_pod< T >::value, "This function is aimed at POD types."); static_assert(!std::is_pointer< T >::value, "You cannot marshal a pointer."); // Just copy the bytes as is. if (source.length() < sizeof(T)) throw MarshallingException("Reading past end of data. data size = " + ntoa(source.length()) + ", requested size = " + ntoa(sizeof(T))); memcpy((char*)&value, &source[source.length() - sizeof(T)], sizeof(T)); source.resize(source.length() - sizeof(T)); } // Run some test code. The details are likely to change over time. This might // not be appropriate to run in production code. void testMarshal(); #endif