#ifndef __GridReaderBase_h_ #define __GridReaderBase_h_ #include #include #include #include #include "../shared/ThreadSafeRefCount.h" #include "../shared/MiscSupport.h" #include "CandleTimer.h" #include "EpochCounter.h" #include "GridInstanceData.h" // This should really say archetype, not prototype. // http://english.stackexchange.com/questions/23918/what-is-the-difference-between-archetype-and-prototype class GridPrototype { private: const int _rowCount; const int _colCount; const CandleTimer::Ref _candleTimer; const bool _packed; std::map< std::string, int > _columns; std::vector< std::string > _formulae; public: GridPrototype(int rowCount, int colCount, CandleTimer::Ref candleTimer, bool packed); GridPrototype(GridPrototype const &original, CandleTimer::Ref candleTimer); CandleTimer::Ref const &getCandleTimer() const { return _candleTimer; } // returns -1 if the column is not found. int findColumn(std::string const &name) const; int getWidth() const { return _colCount; } std::string getFormula(int row, int col) const; void getColumns(std::vector< std::string > &out) const; void setColumnName(const std::string &name, int col); void setFormula(int row, int col, std::string formula, bool safe = true); void setFormula(int row, const std::string &col, const std::string &formula); // This copies the formula to the first row and to every row below it. void setFormulaDown(int firstRow, const std::string &col, const std::string &formula); bool getPacked() const { return _packed; } std::string debugDump() const; }; typedef TSRefCount< GridPrototype > GridPrototypeRef; // GridDataProvider will need some context to give data. You might make a // request like "show me the opening price". We need to know what candle // you are talking about. That means that you might get a different answer // for each row. And you need to know how to translate a row number into a // time. And you'll also have to know what symbol we are talking about. // // More than likely we will pass a pointer to the grid to // GridDataProvider::getValue(). GridDataProviderContext is an interface // which limits which data we can look at. This is just to make the code a // little cleaner. class GridDataProviderContext { public: virtual int getCalculationRowPacked() const =0; virtual int getCalculationRowPossible(int packedOffset) const =0; virtual GridPrototypeRef const &getPrototype() const =0; }; // This is used to implement the current() function available within the grid. // For example, the user might say current("open") to ask for the opening price // for the current period. This interface is meant to be very generic. It // currently works for daily and intraday day prices, which are similar but are // handled by different classes. It could be used for many other things, // including references to other grids. That's not available yet, but it is // planned. class GridDataProvider { public: // The input is the epoch which was current the last time this grid updated. // The result result says how far back to delete old data. Anything before // or at that time should be thrown away. virtual time_t restartAt(EpochCounter::Epoch epoch) const =0; // Data type is something like "high", "close", or "volume". This is // flexible so different providers can offer other types of data. // Offset says how many rows back to look. 0 means the data associated with // the current row. Negative numbers are not legal. virtual double getValue(std::string const &dataType, GridDataProviderContext *context, int offset, std::string &errorMsg) const =0; // If there are no trades during the entire period covered by a candle, we // mark that candle as invalid. virtual bool getValid(GridDataProviderContext *context) const =0; // This does a type of caching. You tell the GridDataProvider which rows // you are interested in. It will grab that data all at once and hold on // to it until you are done. This is a type of caching: if we request // data that is not preloaded, that will be loaded each time it is requested. // // Unlike a general purpose cache (like your disk cache) this is usually // aimed at a specific operation. For example, you might clear all the old // data from a grid, call preloadToEnd() to get the data needed to fill the // grid, fill the grid, then call releasePreload(). This is all done in one // event handler with no interruptions. If someone else needs similar data, // we can't help them. // // There are two states. Initially nothing is preloaded. You can call // preloadToEnd() to move to the preloaded state. You can call // releasePreload() to return to the empty state. Calling one of these // functions at another time (loading when it's already loaded or clearing // when it's already cleared) is an error. The exact results are undefined, // but don't do it. // // This cache is specific to a thread. Several different threads can (and // probably will) each cache a different grid at the same time. Multiple // threads can cache the same grid at once, although they probably won't. // We only expect to be cached while someone owns the write lock for a grid. // That's not strictly required, but that's the likely case. // // Each thread can only cache one thread at a time. Again, the exact results // if you try to break that rule are not clearly defined. But don't do it. // // This call will probably block. // // Currently we only have an option to read all the data that's missing from // the grid. That's reasonable because that's the common case. There are // options to fill only part of the grid, but that's only for debugging. // There are reasons why it we wouldn't switch to that for normal use. And, // of course, this is just a cache. Nothing will break if you just skip the // preload step when it doesn't do exactly what you need. // // You should check the current epoch before calling preloadToEnd(), // and not again. Presumably you will read this right after clearing the // old data from the grid. virtual void preloadToEnd(GridDataProviderContext *context) const =0; virtual void releasePreload() const =0; virtual std::string getSymbol() const =0; virtual std::string debugDump() const =0; virtual ~GridDataProvider() {} }; typedef TSRefCount< GridDataProvider > GridDataProviderRef; // All const public members of GridInstance are thread safe. A normal // mutex prevents readers and writers from conflicting. These operations // are typically fast, so no thread should have to wait very long. // // If you want to modify a GridInstance, you should start by calling // writeLock() to try to acquire the write lock. This is separate from // the normal mutexes. You can hold the write lock for as long as you need // to. Typically you might want to mix a lot of reads and writes, all within // the write lock. // // Grabbing the write lock starts a transaction. No one sees your changes // until you release it. A reader has only a very limited sense of a // transaction. He can't say that he wants to do three different reads and he // wants them all to be consistent. Individual operations, like give me a sum, // are all consistent because they are done while holding the mutex. The // important part of a transaction is that we avoid missing data. A writer is // free to delete the last few rows then rebuild them from scratch. A consumer // might see part of the old and part of the new. But the consumer would not // see an empty row. // // If two threads both try to grab the write lock at the same time, one of // them will sleep until the other calls releaseWriteLock(). // // Packed vs possible: There are two ways that we can store data. When data // is present, we always add it. When the data provider says that there is // no data for a row, we have two options. (a) We can store all NaNs for the // values from the data provider. And we can let the user formulas deal with // these as they wish. Or (b) we can skip the entire row. If _packed is // true, then we are doing (b). Either way, when someone asked for a value // three rows up, they are talking about 3 rows that we decided to store. // In the case of (a) that's the same for every stock, but in the case of // (b) that's not necessarily true. When a function says it works with // packed rows, it's talking about the rows that we actually store. If a // function talks about possible rows, it's talking about the ideal picture // from the CandleTimer. It is possible to convert between the two if you // are looking at a particular GridInstance. If _packed is false, then the // packed and possible functions act the same. class GridInstance : public GridDataProviderContext { private: const GridPrototypeRef _prototype; const GridDataProviderRef _dataProvider; mutable pthread_mutex_t _mutex; GridInstanceData::PackableValues _values; EpochCounter::Epoch _epoch; // _writerThread says which thread owns the writeLock. We handle reads // differently depending whether or not the current thread holds the write // lock. The locked thread can see the most recently written values // immediately. Other threads see the last committed values. _writerThread // is meaningless if _writeLocked is false. bool _writeLocked; pthread_t _writerThread; mutable pthread_cond_t _writerCondition; bool useCommitted() const; bool holdingWriteLock() const; // Several functions will do the same thing. If this thread holds the // write lock: Grab the inProgress grid, read from it, do work, return a // result. Otherwise: Grab mutex, grab the committed grid, read from it, // do work, save a value, release the mutex, return the value. The read and // do work steps are identical either way. This object encapsulates the // beginning and end parts so you can focus on the read and work steps. // This way you don't have to duplicate the read and work code. This also // means that you don't have to store the result in a temporary variable. // Just create this object in a variable on the stack, and the code will // automatically release the mutex, if required, when the function exits. class WhichData : NoCopy, NoAssign { private: pthread_mutex_t *const _mutex; const GridInstanceData::Grid _grid; public: WhichData(GridInstance const *gridInstance); ~WhichData(); GridInstanceData::Grid const &grid() const { return _grid; } bool holdingWriteLock() const { return !_mutex; } bool inProgress() const { return holdingWriteLock(); } bool committed() const { return !holdingWriteLock(); } GridInstanceData::Use use() const; }; //GridInstanceData::Grid getGrid() const;; //GridInstanceData::Use use() const; // The output from EpochCounter::restartAt() would be the perfect input for // deleteStartingAt(). void deleteStartingAt(time_t time); public: static double invalid(); GridInstance(GridPrototypeRef const &prototype, GridDataProviderRef const &dataProvider); virtual GridPrototypeRef const &getPrototype() const { return _prototype; } GridDataProviderRef const &getDataProvider() const { return _dataProvider; } double referencePacked(int row, int column) const; double referencePacked(int row, const std::string &column) const; double sum(int firstRow, int lastRow, int column, bool skipBadValues) const; double sum(int firstRow, int lastRow, const std::string &column, bool skipBadValues) const; double count(int firstRow, int lastRow, int column) const; double count(int firstRow, int lastRow, const std::string &column) const; double average(int firstRow, int lastRow, int column, bool skipBadValues) const; double average(int firstRow, int lastRow, const std::string &column, bool skipBadValues) const; double max(int firstRow, int lastRow, int column, bool skipBadValues) const; double max(int firstRow, int lastRow, const std::string &column, bool skipBadValues) const; double min(int firstRow, int lastRow, int column, bool skipBadValues) const; double min(int firstRow, int lastRow, const std::string &column, bool skipBadValues) const; // If all values are legal, return them in order, oldest first. If there are // any bad value, return an empty list. We hold a lock the entire time, so // the values will be consistent. void getValuesAll(std::vector< double > &result, int firstRow, int lastRow, int column) const; void getValuesAll(std::vector< double > &result, int firstRow, int lastRow, const std::string &column) const; // The row of the next item we will insert. // I purposely made this non-const. It only makes sense when you are // modifying the data. It always applies to the data in progress, not the // committed data. int nextRow(); // The row we're looking at for the current computation. sum(), ref(), etc., // are all relative to this row. This is packed. All calculations are based // on the the packed format. In fact, the entire point of the packed format // is for calculations. Any memory we save is a freebie. virtual int getCalculationRowPacked() const; // Start from getCalculationRowPacked(). Subtract the offset. Then // convert from packed to possible. virtual int getCalculationRowPossible(int packedOffset) const; // The number of rows that are finished. // // If this grid is not packed, you can compare that to the // CandleTimer, which tells you how many rows should be finished. This // does not report incomplete rows. We might do progress one cell at a // time, rather than one row at a time. int completedRowCountPossible() const; int completedRowCountPacked() const; // The column of the next item we will insert. int nextCol(); // The column for the value need to calculate. int getCalculationCol() { return nextCol(); } std::string getNextFormula() const; void removeOldData(); void store(double value); void skipRow(); GridInstance *writeLock() const; void releaseWriteLock(); virtual ~GridInstance(); std::string debugDump() const; bool isPacked() const { return _values.isPacked(); } int getPackedToPossible(int packed) const; typedef GridInstanceData::Round Round; static const int NOT_FOUND = GridInstanceData::NOT_FOUND; int getPossibleToPacked(int packed, Round round = Round::Up) const; double referencePossible(int row, int column, Round round = Round::Up) const; double referencePossible(int row, const std::string &column, Round round = Round::Up) const; }; typedef TSRefCount< GridInstance > GridInstanceRef; #endif