#ifndef __Strategy_h_ #define __Strategy_h_ #include "../shared/ThreadSafeRefCount.h" #include "../ax_alert_server/AlertConfig.h" #include "../ax_alert_server/TopListConfig.h" #include "Semantics.h" #include "Execution.h" namespace Parse { // Naming the data seems a lot more flexible than just numbering it from // 0. (I.e. a map is better than a vector.) This is more like the old sql // version. typedef std::map< std::string, Tree > ColumnsByName; // It's convenient to return the results the same way. This is more or // less what we had with MySql. Note that an EMPTY value will probably not // be placed in the map at all. See SerializeValuesByName.h for some related // support routines. typedef std::map< std::string, ValueBox > ValuesByName; // This stores several different formulas. When you parse a strategy, you // will need a where condition, several columns, and possibly a sort key. // You want to prepare these all at once. They will all apply to the same // user. They might share a lot of code; if you use the same formula in // the where condition, a column, and the sort key, you don't want to compute // it three separate times. These probably won't all be computed at once. // Start with the where condition to see if you need to go any further. Then // the sort condition to see if this is one of the top n records. // // This class contains a lot of the code that's shared between alerts and // top lists. // // This class is a nice place to split the processing so a developer can see // what's going on. Originally that was the only reason Strategy.C exported // this class. But that's not longer the only reason. See ITopListConfig. // // Copying works like std::string. The copy is completely independent of the // original. Modifying one does not affect the other. You can access the // copies in different threads at the same time. But the copy operation is // still reasonably fast. The effort is proportional to the number of // columns, and has nothing to do with how complicated the formulas are. class StrategyTrees { private: Tree _where; Tree _sort; ColumnsByName _columns; int _cacheSize; enum { OPT_NONE, OPT_PARTIAL, OPT_FULL } _optimized; static Tree usePrice(bool current, Tree orig); void usePrice(bool current); enum Count { count0, count1, countMore }; typedef std::map< Tree, Count, TreeCompare > Counts; static void countDuplicates(Tree tree, Counts &counts); typedef std::map< Tree, Tree, TreeCompare > Replacements; static Tree replaceTopDown(Tree original, Replacements const &replacements); public: StrategyTrees(Tree where, Tree sort, ColumnsByName columns) : _where(where), _sort(sort), _columns(columns), _cacheSize(0), _optimized(OPT_NONE) { } StrategyTrees() : _where(NullValue::noToken()), _sort(NullValue::noToken()), _cacheSize(0), _optimized(OPT_NONE) { } void clear() { // Effectively, *this = StrategyTrees(); _where = NullValue::noToken(); _sort = NullValue::noToken(); _cacheSize = 0; _optimized = OPT_NONE; } void useCurrentPrice(); void useLastPrice(); int getCacheSize() const { return _cacheSize; } // This step is optional. If you call optimizeFull(), and you haven't // called this yet, it will call optimizePartial() for you. Calling this // more than once doesn't do anything. Calling this after calling // optimizeFull() is an error. I.e. the requested state must be the same // as or ahead of the current state; you can't go back. // // Explicitly calling optimizePartial() can save you some time. Imagine // you plan to run a group of strategies at the same time. Imagine the // group constantly changes. You don't want to repeat any more work than // you have to. In this case create a StrategyTrees object and call // optimizePartial(). Do that once for each strategy. call optimizeFull() // on a copy of these strategies each time the list of strategies changes. void optimizePartial(); // This will automatically call optimizePartial() if necessary. Do not // call optimizePartial() or optimizeFull() on a strategy after calling // optimizeFull() once. // // (optimizeFull() is meant to work on a group of strategies all working // together. If you want to use a strategy in more than one group, make // copies of the original and optimize each of them separately. If you // tried to optimize a strategy for multiple lists, you'd cause some // strange errors, possibly a core dump, so we're extra careful here.) // // This will find all duplicated code and create cache objects. When // the code executes, you won't have to call the duplicated code more than // once. Although not strictly necessary, this will help performance // a lot. static void optimizeFull(std::vector< StrategyTrees * > const &trees); void optimizeFull() { optimizeFull({ this }); } Tree getWhere() const { return _where; } Tree getSort() const { return _sort; } ColumnsByName const &getColumns() const { return _columns; } bool operator ==(StrategyTrees const &other) const; std::string shortDebug() const; }; // This is read only and thread safe. class CompiledStrategyTrees : NoAssign, NoCopy { private: typedef std::map< std::string, Execution::Executable * > Columns; Execution::Executable *_where; Execution::Executable *_sort; Columns _columns; // Create a new executable which always returns the EMPTY value. static Execution::Executable *empty(); // This will return something reasonable even if the input is NULL or we // get a parse exception. static Execution::Executable *create(Tree tree); public: CompiledStrategyTrees(StrategyTrees const &trees); ~CompiledStrategyTrees(); bool evaluateWhere(Execution::RecordInfo &recordInfo) const; double evaluateSort(Execution::RecordInfo &recordInfo) const; ValuesByName evaluateColumns(Execution::RecordInfo &recordInfo) const; void cacheInfoDebug(std::map< int, int > &used) const; std::string describeDebug() const; typedef SmarterCP< CompiledStrategyTrees > Ref; }; /* This allows us to satisfy alert requests. This lets you compile the * strategy starting from a collaborate string. Then you can use this * object to evalute alert records to see if they match the query or not. * if they do, you can use this object to get values for the columns. */ class AlertStrategy { private: StrategyTrees _halfOptimized; CompiledStrategyTrees::Ref _compiled; int _cacheSize; // Does the actual work of compiling. compile() adds error checking and // reporting. static void compileImpl(std::vector< AlertStrategy * > const &strategies); // Check that the compiled strategies look reasonable. This is basically // an assertion; if this finds a problem it's a problem in our code, not // the user's request. This will return an empty list if all is good. // Otherwise the list will have some information about the failure that // might help a programmer. static TclList compileVerify(std::vector< AlertStrategy * > const & strategies); static void dumpCacheToLog(std::vector< AlertStrategy * > const & strategies); public: // Initially there is nothing in the strategy. If you try to execute // the where condition or to view the columns, that will probably // cause a core dump. Previous revisions of this file said that these // functions would do something safe and uninteresting, but that was // never true. AlertStrategy(); // Parse and compile the request. void load(std::string const &collaborate, UserId userId, DatabaseWithRetry &database, // You can skip the final optimization step. This will // leave the object in a state where you can't execute anything // or you'll probably get a core dump. The preferred way of // using this object is that you always find a group of // strategies and optimize them all at once. So it would be // wasteful (but otherwise correct) to do the optimization now. // By default we do the optimization here because of various // older programs (most test programs) which run strategies // one at a time. bool compileNow = true, TclList *debugDump = NULL); // This has been replace by isRunnable() and isLoaded(). I purposely // changed the name so old code using this won't compile any more. You // should look at any old code and decide which of the new options to // use. //bool loaded() const { return _compiled; } // If this is false, calling evaluateWhere() or evaluateColumns() would // be a bad idea. The program would program would probably dump core, // but we don't promise anything specific. bool isRunnable() const { return _compiled; } // Do we need this? This goes back to the old days when a strategy was // always a pointer. Some test code would create a pointer variable and // check if it was null. Since we don't use pointers any more, we needed // a replacement way to say if the strategy had be set or not. bool isLoaded() const { return _halfOptimized.getWhere(); } // Returns the object to its initial state. void clear() { _halfOptimized.clear(); _compiled = NULL; _cacheSize = 0; } // Compile and optimize this strategy to be run by itself. This is offered // as a convenience. Calling compile() on a list including only this item // will have the same effect. void compile(); // Compile all of these strategies. They are optimized to share the same // cache. So you only call init() once. Call it before the first strategy // in the list. Continue using the RecordInfo for all of the strategies. // It's safe to call this multiple times. Each time you will keep the // same strategy loaded, but change the optimization / cache usage. // It is safe and reasonably efficient skip a strategy. If one strategy // has been temporarily silenced because it has already reported enough // events, or a strategy has been completely deleted, there's no immediate // need to recompile. The order of the arguments doesn't matter. // // Do not call this unless isLoaded() is true for every strategy! static void compile(std::vector< AlertStrategy * > const &strategies); // Set up the cache in the record info. This is required before evaluating // the where condition or the columns for a record. void init(Execution::RecordInfo &recordInfo) const; bool evaluateWhere(Execution::RecordInfo &recordInfo) const; ValuesByName evaluateColumns(Execution::RecordInfo &recordInfo) const; // This is mostly aimed at ParserTest.C. For the most part AlertStrategy // is self contained. It knows how to create and use the strategy trees. // Aside from debugging, StrategyTrees should be considered an // implementation detail. StrategyTrees const &debugGetStrategyTrees() const { return _halfOptimized; } }; // This is enough information for us to run a top list query. It could have // come from multiple places. class ITopListConfig { public: virtual int getCount() const =0; virtual std::string getSingleSymbol() const =0; virtual StrategyTrees getStrategyTrees() const =0; virtual bool outsideMarketHours() const =0; virtual ~ITopListConfig() { } }; // We know that if we start from a collaborate string and load a normal // top list strategy, it should be able to do more for us. There's no // need to start from scratch when we already have a good TopListConfig // object hiding under the hood, waiting to help us. class ITopListConfigFromCollaborate : public ITopListConfig { public: // This is for the metadata message. virtual void getInitialDescription(XmlNode &node) const =0; }; class TopListStrategyBase { public: typedef std::vector< ValuesByName > Values; typedef std::map< std::string, Record::Ref > Records; struct Result { Values values; time_t start; time_t end; void addAsChild(XmlNode &parent, std::string const &windowId) const; Result() { } // Add the default back. The move constructor hid this. Result(Result const &other) : // Ditto values(other.values), start(other.start), end(other.end) { } Result(Result && other); void operator =(Result const &other); void operator =(Result && other); }; protected: int _resultCount; std::string _singleSymbol; struct StrategyInProgress { Result result; TopListStrategyBase const *strategy; CompiledStrategyTrees::Ref executables; // May be NULL. StrategyInProgress(TopListStrategyBase const *strategy, CompiledStrategyTrees::Ref const &executables) : strategy(strategy), executables(executables) { } void operator =(StrategyInProgress const &other) { result = other.result; strategy = other.strategy; executables = other.executables; } void operator =(StrategyInProgress &&other) { result = std::move(other.result); strategy = other.strategy; executables = other.executables; } StrategyInProgress(StrategyInProgress &&other) { result = std::move(other.result); strategy = other.strategy; executables = other.executables; } }; typedef std::vector< StrategyInProgress > StrategiesInProgress; static void run(Records const &records, StrategiesInProgress &strategies, int cacheSize); static void run(Records const &records, StrategiesInProgress &strategies, int cacheSize, time_t endTime, int seconds); struct PossibleResult { double sortOn; Execution::RecordInfo *recordInfo; bool operator <(PossibleResult const &other) const { return sortOn < other.sortOn; } bool operator ==(PossibleResult const &other) const { return sortOn == other.sortOn; } }; TopListStrategyBase(); public: int getResultCount() const { return _resultCount; } std::string const &getSingleSymbol() const { return _singleSymbol; } bool hasSingleSymbol() const { return !_singleSymbol.empty(); } }; class TopListStrategy : public TopListStrategyBase { private: int _cacheSize; int getCacheSize() const { return _cacheSize; } CompiledStrategyTrees::Ref _streaming; CompiledStrategyTrees::Ref _frozen; public: Result run(Records const &records, time_t endTime, int seconds) const; Result run(Records const &records, time_t endTime, Records const &frozenRecords, time_t frozenEndTime, int seconds) const; void init(Execution::RecordInfo &recordInfo) const; TopListStrategy(); void load(std::string collaborate, UserId userId, DatabaseWithRetry &database, TclList *debugDump = NULL); void load(ITopListConfig &config, TclList *debugDump); // Create a config object. This could be loaded directly via load(). // Or you can create another ITopListConfig which is based on this one. // // This function creates a new object and the caller is responsible for // deleting this object. // // The result may (and probably does) maintain a pointer to the database. // You should only access this object in the same thread as you created // it in. // // TODO The fact that we store a pointer to the database is a sign of // a terrible API. I've started adding some direct calls to // DatabaseForThread to help. Ideally this unit should only use // DatabaseForThread and should never ask for the database as an input. static ITopListConfigFromCollaborate *getConfig(std::string collaborate, UserId userId, DatabaseWithRetry & database, bool saveToMru, bool magicColumns, bool strategyColumns, TclList *debugDump); }; class MergableTopListStrategy : public TopListStrategyBase { private: StrategyTrees _liveTrees, _frozenTrees; public: MergableTopListStrategy() { } void load(std::string collaborate, UserId userId, DatabaseWithRetry &database); class List : NoCopy, NoAssign { private: const std::vector< MergableTopListStrategy > _strategies; int _liveCacheSize, _frozenCacheSize; StrategiesInProgress _liveStrategies, _frozenStrategies; // This doesn't just pick between the two. This will do some // initialization. We don't compile these until we need them. StrategiesInProgress &getStrategies(bool live, int &cacheSize); public: // This will copy the strategy objects. These will be needed later // for their metadata. In particular, the names of the columns. List(std::vector< MergableTopListStrategy > const &strategies); std::vector< Result > run(Records const &records, time_t endTime, int seconds); std::vector< Result > run(Records const &records, time_t endTime, Records const &frozenRecords, time_t frozenEndTime, int seconds); }; }; }; #endif