#include #include #include #include "DataFormat.h" ///////////////////////////////////////////////////////////////////// // RecordBuilder ///////////////////////////////////////////////////////////////////// void RecordBuilder::appendFieldHeader(FieldId id, StreamingDataType type) { FixedInfo header; header.fieldId = id; header.offset = _variableInfo.length(); header.type = type; _fixedInfo.push_back(header); } void RecordBuilder::appendFieldHeader(FieldId id, StreamingDataType type, int16_t value) { FixedInfo header; header.fieldId = id; header.offset = value; header.type = type; _fixedInfo.push_back(header); } void RecordBuilder::append(FieldId id, std::string const &value) { if (value.length() <= 255) { appendFieldHeader(id, StreamingDataType::STRING1B); const uint8_t length = value.length(); _variableInfo += length; _variableInfo += value; } else if (value.length() <= 0xffff) { appendFieldHeader(id, StreamingDataType::STRING2B); const uint16_t length = value.length(); _variableInfo.append((char *)&length, 2); _variableInfo += value; } else { // This seems pointless. This will probably cause the entire record to // be too big. We are truncating the input to fit. appendFieldHeader(id, StreamingDataType::STRING2B); _variableInfo += 0xff; _variableInfo += 0xff; _variableInfo.append(value, 0, 0xffff); } } void RecordBuilder::append(FieldId id, double value) { if (!std::isfinite(value)) // If we added this, the reader would properly interpret it as an EMPTY // value, rather than a number. But why waste the space? Just silently // ignore the request and don't write anything. The resulting record // will be smaller but equivalent. return; if ((value <= 3.2767) && (value >= -3.2768)) { // The magnitude is small enough that we wouldn't lose anything. // That test is just an optimization. The next test is sufficient. // Note that we have to convert this to an int now. If we round but // leave it as a double, that caused unexpected issues. const int16_t asInt = (int16_t)round(value * 10000); if (asInt/10000.0 == value) { // We didn't loose anything by rounding. appendFieldHeader(id, StreamingDataType::DOUBLE_10000, asInt); return; } } else if ((value <= 163.835) && (value >= -163.84)) { // The magnitude is small enough that we wouldn't lose anything. // That test is just an optimization. The next test is sufficient. // Note that we have to convert this to an int now. If we round but // leave it as a double, that caused unexpected issues. const int16_t asInt = (int16_t)round(value * 200); if (asInt/200.0 == value) { // We didn't loose anything by rounding. appendFieldHeader(id, StreamingDataType::DOUBLE_200, asInt); return; } } appendFieldHeader(id, StreamingDataType::DOUBLE); _variableInfo.append((char *)&value, sizeof(value)); } void RecordBuilder::append(FieldId id, float value) { if (!std::isfinite(value)) // If we added this, the reader would properly interpret it as an EMPTY // value, rather than a number. But why waste the space? Just silently // ignore the request and don't write anything. The resulting record // will be smaller but equivalent. return; if ((value <= 3.2767f) && (value >= -3.2768f)) { // The magnitude is small enough that we wouldn't lose anything. // That test is just an optimization. The next test is sufficient. // Note that we have to convert this to an int now. If we round but // leave it as a double, that caused unexpected issues. const int16_t asInt = (int16_t)roundf(value * 10000); if (asInt/10000.0f == value) { // We didn't loose anything by rounding. appendFieldHeader(id, StreamingDataType::DOUBLE_10000, asInt); return; } } if ((value <= 163.835f) && (value >= -163.84f)) { // The magnitude is small enough that we wouldn't lose anything. // That test is just an optimization. The next test is sufficient. // Note that we have to convert this to an int now. If we round but // leave it as a double, that caused unexpected issues. const int16_t asInt = (int16_t)roundf(value * 200); if (asInt/200.0f == value) { // We didn't loose anything by rounding. appendFieldHeader(id, StreamingDataType::DOUBLE_200, asInt); return; } } appendFieldHeader(id, StreamingDataType::FLOAT); _variableInfo.append((char *)&value, sizeof(value)); } void RecordBuilder::append(FieldId id, int16_t value) { appendFieldHeader(id, StreamingDataType::INT16_new, value); } void RecordBuilder::append(FieldId id, int32_t value) { if ((value >= std::numeric_limits< int16_t >::min()) && (value <= std::numeric_limits< int16_t >::max())) append(id, (int16_t)value); else { appendFieldHeader(id, StreamingDataType::INT32); _variableInfo.append((char*)&value, 4); } } void RecordBuilder::append(FieldId id, int64_t value) { if ((value >= std::numeric_limits< int32_t >::min()) && (value <= std::numeric_limits< int32_t >::max())) append(id, (int32_t)value); else reserveInt(id, value); } void RecordBuilder::reserveInt(FieldId id, int64_t toAdd) { appendFieldHeader(id, StreamingDataType::INT64); _variableInfo.append((char*)&toAdd, 8); } void RecordBuilder::exportAndAppend(std::string &result) { Offset fieldCount = _fixedInfo.size(); result.append((char *)&fieldCount, sizeof(fieldCount)); std::sort(_fixedInfo.begin(), _fixedInfo.end()); result.append((char const *)&_fixedInfo[0], sizeof(FixedInfo) * fieldCount); result += _variableInfo; } std::string RecordBuilder::exportAsString() { std::string result; exportAndAppend(result); return result; } ///////////////////////////////////////////////////////////////////// // ValueBox ///////////////////////////////////////////////////////////////////// // We're doing some low level stuff, including some type punning. I'm not // expecting any problems, but it never hurts to explicitly check. If these // are not right the code could read and write random bits of memory. static_assert((sizeof(InternalType) == 1) && (sizeof(ValueBox) == 16), "Serious internal error."); static_assert(sizeof(RecordRef) == sizeof(char *), "Serious internal error."); void ValueBox::releaseDynamicMemory() { switch (_type) { case InternalType::STRING: delete [] _value.asDynamicCharStar; break; case InternalType::RECORD: asRecord().~RecordRef(); break; default: assert(false); break; } _type = InternalType::EMPTY; } void ValueBox::assign(std::string const &value) { assign(value.c_str(), value.length()); } void ValueBox::assign(char const *value) { assign(value, strlen(value)); } void ValueBox::assign(char const *value, size_t length) { if (length <= 14) { _type = InternalType::CHAR_STAR; memcpy(_asCharStar, value, length); _asCharStar[length] = 0; } else { _type = InternalType::STRING; _value.asDynamicCharStar = new char[length+1]; memcpy(_value.asDynamicCharStar, value, length); _value.asDynamicCharStar[length] = 0; } } void ValueBox::assign(Record::Ref const &value) { _type = InternalType::RECORD; new (&asRecord()) Record::Ref(value); } void ValueBox::assignDynamicMemory(ValueBox const &other) { switch (other._type) { case InternalType::STRING: assign(other.asCharStar()); break; case InternalType::RECORD: assign(other.asRecord()); break; default: assert(false); break; } } void ValueBox::getString(bool &valid, char const *&value) const { switch (_type) { case InternalType::CHAR_STAR: valid = true; value = _asCharStar; break; case InternalType::STRING: valid = true; value = _value.asDynamicCharStar; break; case InternalType::CACHE_EMPTY: assert(false); // fall through default: valid = false; } } void ValueBox::getString(bool &valid, std::string &value) const { char const *asCharStar; getString(valid, asCharStar); if (valid) value = asCharStar; } int64_t ValueBox::getInt() const { bool valid; int64_t value; getInt(valid, value); assert(valid); return value; } double ValueBox::getDouble() const { bool valid; double value; getDouble(valid, value); assert(valid); return value; } ValueBox ValueBox::lookUpValue(FieldId fieldId) const { if (_type == InternalType::RECORD) return asRecord()->lookUpValue(fieldId); return ValueBox(); } bool ValueBox::operator ==(ValueBox const &other) const { if (_type == other._type) { switch (_type) { case InternalType::EMPTY: return true; case InternalType::INTEGER: return _value.asInt == other._value.asInt; case InternalType::DOUBLE: return _value.asDouble == other._value.asDouble; case InternalType::CHAR_STAR: return !strcmp(_asCharStar, other._asCharStar); case InternalType::STRING: return !strcmp(_value.asDynamicCharStar, other._value.asDynamicCharStar); case InternalType::RECORD: return asRecord() == other.asRecord(); case InternalType::CACHE_EMPTY: // Should this case throw an exception? As far as I know, this function // is only used in our test program, so it probably doesn't matter. return true; } assert(false); } /* These were required by a previous implementation. They are no longer * required. Now the length of a string tells you for certain which internal * type we will use. else if ((_type == InternalType::CHAR_STAR) && (other._type == InternalType::STRING)) return !other.asString().compare(_value.asCharStar); else if ((_type == InternalType::STRING) && (other._type == InternalType::CHAR_STAR)) return !asString().compare(other._value.asCharStar); */ else return false; } void ValueBox::writeToClient(PropertyList &destination, std::string const &name) const { switch (_type) { case InternalType::INTEGER: destination[name] = ntoa(_value.asInt); break; case InternalType::DOUBLE: // Looks like this was 6 digits in the old code. But that mostly used // floats instead of doubles. Try 8 and see what we get. destination[name] = dtoa(_value.asDouble, 8); break; case InternalType::CHAR_STAR: case InternalType::STRING: { char const *const value = asCharStar(); // Checking for an empty string is an optimization. We expect that the // reader will treat a missing key the same way as a key that maps to the // empty string. We can't say that for sure, but it seems reasonable. if (*value) destination[name] = value; break; } case InternalType::CACHE_EMPTY: assert(false); // fall through default: // Default case, do nothing. This is appropriate for null values. This is // probably good for records, which were never meant to be written like // this. break; } } std::string ValueBox::shortDebug() const { switch (_type) { case InternalType::INTEGER: return ntoa(_value.asInt); case InternalType::DOUBLE: return ntoa(_value.asDouble); case InternalType::CHAR_STAR: case InternalType::STRING: { // We used to use « and » around a short string. But now that it's // obvious which form is in use, it's no longer important here. std::string result = "“"; result += asCharStar(); result += "”"; return result; } case InternalType::RECORD: return asRecord()->shortDebug(); case InternalType::EMPTY: return "EMPTY"; case InternalType::CACHE_EMPTY: return "CACHE_EMPTY"; } assert(false); } ValueBox::ValueBox(std::string const &value) { assign(value); } ValueBox::ValueBox(char const *value) { assign(value); } ValueBox::ValueBox(RecordRef const &value) { assign(value); } void ValueBox::appendTo(RecordBuilder &destination, FieldId fieldId) const { switch (_type) { case InternalType::INTEGER: destination.append(fieldId, _value.asInt); break; case InternalType::DOUBLE: destination.append(fieldId, _value.asDouble); break; case InternalType::STRING: destination.append(fieldId, _value.asDynamicCharStar); break; case InternalType::CHAR_STAR: destination.append(fieldId, &_asCharStar); break; default: // For a NULL, explicitly do nothing. Other types might be an error or // ignored. break; } } ///////////////////////////////////////////////////////////////////// // Record ///////////////////////////////////////////////////////////////////// RecordBuilder::FixedInfo const *Record::findField(FieldId id) const { int startIndex = 0; // Lowest one we're looking at. int endIndex = _fieldCount; // The highest plus one. while (true) { const int length = endIndex - startIndex; if (length <= 0) return NULL; const int middle = startIndex + length / 2; FixedInfo const *const result = &fixedInfo()[middle]; const FieldId middleId = result->fieldId; if (middleId == id) return result; if (id < middleId) // Look at the left half. endIndex = middle; else // Look at the right half. startIndex = middle + 1; } }; /* // An interesting attempt at making things faster. In my tests, I only saw // an improvement of about 5.5%. (That was looking at all the time in // findField() and lookUpValue(). The profiler made it hard for me to seperate // those two. RecordBuilder::FixedInfo const *Record::findField(FieldId id) const { bool magicLeft = true; bool magicRight = true; int startIndex = 0; // Lowest one we're looking at. int endIndex = _fieldCount; // The highest plus one. while (true) { if (magicLeft) { if (startIndex >= endIndex) return NULL; magicLeft = false; FixedInfo const *const first = &_fixedInfo[startIndex]; const FieldId firstId = first->fieldId; const int greatestPossibleOffset = id - (int)firstId; if (greatestPossibleOffset < 0) // Requested item comes before the first item. return NULL; if (greatestPossibleOffset == 0) return first; startIndex++; const int lastPossibleIndex = startIndex + greatestPossibleOffset; if (lastPossibleIndex < endIndex) { endIndex = lastPossibleIndex; magicRight = true; } } if (magicRight) { if (startIndex >=endIndex) return NULL; magicRight = false; FixedInfo const *const last = &_fixedInfo[endIndex-1]; const FieldId lastId = last->fieldId; const int greatestPossibleOffset = lastId - (int)id; if (greatestPossibleOffset < 0) // Requested item comes after the first item. return NULL; if (greatestPossibleOffset == 0) return last; endIndex--; const int firstPossibleIndex = endIndex - greatestPossibleOffset; if (firstPossibleIndex > startIndex) { startIndex = firstPossibleIndex; magicLeft = true; } } const int length = endIndex - startIndex; if (length <= 0) return NULL; const int middle = startIndex + length / 2; FixedInfo const *const result = &_fixedInfo[middle]; const FieldId middleId = result->fieldId; if (middleId == id) return result; if (id < middleId) { endIndex = middle; magicRight = true; } else { startIndex = middle + 1; magicLeft = true; } } }; */ /* // This is a slight variation on the attempt above. This can make // Record::lookUpValue 9.5% faster. // // The previous version had two seperate phases. One would check the first // and last to see if we could limit the results. The other was a standard // binary search. This does both at once. Whenever I move the middle, I // check if there are new limits on where the output could be. That's cheap // because I was already doing most of that work anyway as part of the normal // binary search. // // Just adding that extra check to the middle was not sufficient. By itself // it actually made things slower in my tests. If the thing we were looking // for was near the middle, it should have worked well. If the thing // we were looking for was near one end of the other, the new code would have // done nothing helpful. So I added one single check to the left and right // extremes. It seems that all of the other checks were replaced by the new // code when we update the middle. // // I tried another version. It only did the two checks at the top. After that // it became a normal binary search. That seemed slightly slower than the // version listed below, but the difference was small compared to the errors // in my measurements. RecordBuilder::FixedInfo const *Record::findField(FieldId id) const { int startIndex = 0; // Lowest one we're looking at. int endIndex = _fieldCount; // The highest plus one. { if (startIndex >= endIndex) return NULL; FixedInfo const *const first = &_fixedInfo[0]; const FieldId firstId = first->fieldId; const int difference = id - (int)firstId; if (difference < 0) return NULL; if (difference == 0) return first; startIndex++; const int lastPossibleIndex = startIndex + difference; if (lastPossibleIndex < endIndex) endIndex = lastPossibleIndex; } { if (startIndex >= endIndex) return NULL; FixedInfo const *const last = &_fixedInfo[endIndex-1]; const FieldId lastId = last->fieldId; const int difference = lastId - (int)id; if (difference < 0) return NULL; if (difference == 0) return last; endIndex--; const int firstPossibleIndex = endIndex - difference; if (firstPossibleIndex > startIndex) startIndex = firstPossibleIndex; } while (true) { const int length = endIndex - startIndex; if (length <= 0) return NULL; const int middle = startIndex + length / 2; FixedInfo const *const result = &_fixedInfo[middle]; const FieldId middleId = result->fieldId; const int difference = id - (int)middleId; if (difference == 0) return result; if (difference < 0) { // Look at the left half. endIndex = middle; const int limit = endIndex + difference; if (limit > startIndex) startIndex = limit; } else { // Look at the right half. startIndex = middle + 1; const int limit = startIndex + difference; if (limit < endIndex) endIndex = limit; } } }; */ Record::Record(char *encodedStart, size_t encodedLength, bool needToDeleteEncoded) : _encodedStart(encodedStart), _encodedLength(encodedLength), _needToDeleteEncoded(needToDeleteEncoded) { // The first Offset is the number of fields. Followed by n Offset values, // which point to the n fields. Followed by the fields. if (encodedLength < sizeof(Offset)) { // Error! _fieldCount = 0; _encodedLength = 0; return; } _fieldCount = *(Offset const *)encodedStart; _variableInfoStart = _fieldCount * sizeof(FixedInfo) + sizeof(Offset); if (_variableInfoStart > (int)encodedLength) { // Error! _fieldCount = 0; _encodedLength = 0; return; } // Now we know it's safe to read all of the members of the _fixedInfo array. // So it's safe to do a binary search. Once we get a pointer to the // variable part of the record, we should do additional checks. } SmarterP< Record > Record::create(std::string const &encoded) { return createCopy(encoded.c_str(), encoded.length()); } SmarterP< Record > Record::createCopy(char const *start, size_t length) { char *ourCopy = new char[length]; memcpy(ourCopy, start, length); SmarterP< Record > result(NULL, ourCopy, length, true); if (!result->success()) result.clear(); return result; } Record::Ref Record::createShare(char const *start, size_t length) { // We are not going to modify the data. We are returning a const smart // pointer to a Record, and the Record will not modify this data. This // was more obvious when we stored this in a std::string instead of a // char *, but the result is the same. If the Record is const, we don't // modify the contents of start / p / encodedStart. char *p = const_cast< char * >(start); Ref result(NULL, p, length, false); if (!result->success()) result.clear(); return result; } ValueBox Record::lookUpValue(FieldId fieldId) const { FixedInfo const *const fixedInfo = findField(fieldId); if (!fixedInfo) return ValueBox(); char const *p = fixedInfo->offset + _encodedStart + _variableInfoStart; char const *const end = _encodedStart + _encodedLength; switch (fixedInfo->type) { case StreamingDataType::UINT8_obsolete: if (p + 1 <= end) return (int64_t)*(uint8_t const *)p; break; case StreamingDataType::INT16_obsolete: if (p + 2 <= end) return (int64_t)*(int16_t const *)p; break; case StreamingDataType::INT32: if (p + 4 <= end) return (int64_t)*(int32_t const *)p; break; case StreamingDataType::INT64: if (p + 8 <= end) return *(int64_t const *)p; break; case StreamingDataType::FLOAT: if (p + sizeof(float) <= end) return (double)*(float const *)p; break; case StreamingDataType::DOUBLE: if (p + sizeof(double) <= end) return *(double const *)p; break; case StreamingDataType::DOUBLE0_obsolete: return 0.0; case StreamingDataType::DOUBLE3_obsolete: if (p + 3 <= end) { int32_t result; char *const byte = (char *)&result; byte[0] = p[0]; byte[1] = p[1]; byte[2] = p[2]; // Sign extend from 3 bytes to 4. if (byte[2] & 0x80) byte[3] = 0xff; else byte[3] = 0; return result / 10000.0; } break; case StreamingDataType::INT16_new: return (int16_t)fixedInfo->offset; case StreamingDataType::DOUBLE_10000: return ((int16_t)fixedInfo->offset)/10000.0; case StreamingDataType::DOUBLE_200: return ((int16_t)fixedInfo->offset)/200.0; case StreamingDataType::STRING1B: if (p + 1 <= end) { uint8_t size = *(uint8_t const *)p; p++; if (p + size <= end) return ValueBox(p, size); } break; case StreamingDataType::STRING2B: if (p + 2 <= end) { uint16_t size = *(uint16_t const *)p; p += 2; if (p + size <= end) return ValueBox(p, size); } break; } return ValueBox(); } bool Record::update(FieldId fieldId, int64_t newValue) { FixedInfo const *const fixedInfo = findField(fieldId); if (!fixedInfo) // The record must already contain this field. return false; if (fixedInfo->type != StreamingDataType::INT64) // The field must already have the right size and type. return false; const int offset = fixedInfo->offset + _variableInfoStart; if (offset + sizeof(int64_t) > _encodedLength) // Problem with the record. The header is pointing to something outside // of the body of the record. return false; char *start = _encodedStart + offset; *(int64_t *)start = newValue; return true; // Success! } std::string Record::debugDump() const { TclList result; for (int i = 0; i < _fieldCount; i++) { TclList row; row< Record::debugGetFieldInfo() const { std::vector< RecordBuilder::FixedInfo > result; result.reserve(_fieldCount); for (int i = 0; i < _fieldCount; i++) result.push_back(fixedInfo()[i]); return result; } std::string const &Record::asString(StreamingDataType type) { static const std::string values[] = { "UINT8_obsolete", "INT16_obsolete", "INT32", "INT64", "FLOAT", "DOUBLE", "STRING1B", "STRING2B", "DOUBLE0_obsolete", "DOUBLE3_obsolete", "INT16_new", "DOUBLE_10000", "DOUBLE_200" }; return values[(int)type]; } std::vector< FieldId > Record::getFields() const { std::vector< FieldId > result; result.reserve(_fieldCount); for (int i = 0; i < _fieldCount; i++) result.push_back(fixedInfo()[i].fieldId); return result; } const Record::Ref Record::EMPTY(NULL, (char *)NULL, 0, false);