#include "ProcessData.h" #include "CandleModels.h" #include "GdCharts.h" #include "FilenameEncode.h" #include "../data_framework/MarketHours.h" #include "../data_framework/VolumeWeightedData.h" #include "../../shared/MiscSupport.h" #include "../../shared/GlobalConfigFile.h" #include "../../shared/TwoDLookup.h" #include #include #include #include #include #include #include #include double inline square(const double &num) { return num * num; } double stdDev(const std::vector< double > &values) { double mean = 0; double deviations = 0; for(unsigned int i = 0; i < values.size(); i++) { mean += values[i]; } mean = mean / values.size(); for(unsigned int i = 0; i < values.size(); i++) { deviations += square(values[i] - mean); } return sqrt(deviations/(values.size()-1)); } void splitPath(const std::string &baseName, std::string &path, std::string &remainder) { size_t pos = baseName.rfind('/'); if (pos != std::string::npos) { assert(pos + 1 < baseName.length()); path = baseName.substr(0, pos + 1); remainder = baseName.substr(pos + 1); } else { remainder = baseName; } assert(!remainder.empty()); } // puts a vector of strings into a comma delimited string encased in double quotes // would probably fail if you passed in something with a quote or a comma std::string commaText(const std::vector< std::string > &strings) { std::string output = "\""; for(std::vector< std::string >::const_iterator it = strings.begin(); it < strings.end(); it++) { if (it != strings.begin()) { output += ","; } output += *it; } output += "\""; return output; } ProcessData::ProcessData() : periods(26), minAcceptableR2(0.25), dailyWeight(1.0083524548945949942911810645439), // more precision than a double can handle dumpCandles(NULL), outputData(NULL), volumeData(NULL), TCData(NULL), volumeBlocks(NULL), standardCandles(NULL), fundamentalData(NULL), pristineData(NULL), additionalData(NULL) { } ProcessData::~ProcessData() { flushOutputFiles(); if (dumpCandles) { delete dumpCandles; } // StandardCandlesInput.Free; } void removeZeroDays(BarList &data) { BarList newData; for (BarList::iterator it = data.begin(); it != data.end(); it++) { if (it->volume > 0) { newData.push_back(*it); } } // If there was a change, we use the changed data. If the new data // has a length of 0 then we have an instrument that doesn't // report volume ever, so this test doesn't prove anything. In // that case we just skip this step of the scrubbing. if (newData.size() != data.size() && newData.size() > 0) { data = newData; } } void trimData(BarList &data) { if (data.size() > 0) { BarList::iterator it = data.end(); while (true) { it--; if (it->open <= 0.0 || it->high <= 0.0 || it->low <= 0.0 || it->close <= 0.0) { if (it + 1 == data.end()) { data.clear(); } else { data.assign(it + 1, data.end()); } return; } if (it == data.begin()) { return; } if ((it->startTime - (it-1)->startTime) >= 14 * MARKET_HOURS_DAY) { data.assign(it, data.end()); return; } } } } BarList scrubDailyData(const BarList &data) { BarList result = data; removeZeroDays(result); trimData(result); return result; } int getPeriod(time_t start) { return (secondOfTheDay(start) - (15 * MARKET_HOURS_MINUTE + 6 * MARKET_HOURS_HOUR)) / MARKET_HOURS_PERIOD; } void ProcessData::processAllData(const std::string &symbol, BarList const &data1Min, BarList const &data1Day, CorporateActions const &corporateActions, const FundamentalData &dataFundamental) { unsigned int averageDailyVolume; BarList data1DayScrubbed = scrubDailyData(data1Day); processSymbolData1Day(symbol, data1DayScrubbed, averageDailyVolume); processSymbolData1Min(symbol, data1Min, averageDailyVolume); processSymbolFundamental(symbol, dataFundamental); processSymbolMisc(symbol, data1DayScrubbed, dataFundamental); if(!_chartsDirectory.empty()) buildCharts(symbol, data1Day); if (!skipDumpCandles()) { dumpCandles->addData(symbol, data1Min, data1DayScrubbed, corporateActions); } } void ProcessData::processSymbolData1Day(const std::string &symbol, const BarList &bars, unsigned int &averageDailyVolume) { // Set a good default value, just in case. averageDailyVolume = 0; if (bars.size() > 0) { SMA(symbol, bars, 200); SMA(symbol, bars, 50); SMA(symbol, bars, 20); highsAndLows(symbol, bars); lastPrice(symbol, bars); averageVol(symbol, bars, averageDailyVolume); upDays(symbol, bars); correlation(symbol, bars); rangeContraction(symbol, bars); brightVolatility(symbol, bars); previousDay(symbol, bars); bollingerStdDev(symbol, bars, 20); consolidation(symbol, bars); averageTrueRange(symbol, bars, 14); nDayRange(symbol, bars, 5); nDayRange(symbol, bars, 10); nDayRange(symbol, bars, 20); RSI(symbol, bars, 14); pristineLists(symbol, bars); } } void ProcessData::processSymbolData1Min(const std::string &symbol, const BarList &bars, const unsigned int averageDailyVolume) { saveStandardCandles(symbol, bars); findBunnies(symbol, bars, 130); sharesPerPrint(symbol, bars); std::vector< int > volumeByPeriod(periods, 0); std::string realDate; int totalDays = 0; int timeFrame; int volumeBreak; unsigned long marketVolume = 0; unsigned long preVolume = 0; unsigned long postVolume = 0; time_t previousTimeTag = 0; double sumOfVolumeWeightedChanges = 0.0; for(BarList::const_iterator it = bars.begin(); it != bars.end(); it++) { realDate = ctimeString(it->startTime); timeFrame = getPeriod(it->startTime); if (timeFrame < 1) { preVolume += it->volume; } else if (timeFrame > periods) { postVolume += it->volume; } else { if (midnight(previousTimeTag) != midnight(it->startTime)) { totalDays++; } marketVolume += it->volume; previousTimeTag = it->startTime; sumOfVolumeWeightedChanges += (it->high - it->low) * sqrt(it->volume); } } if (marketVolume > 0) { volumeBreak = int(1.0 * marketVolume / totalDays / periods + 0.5); outputData->add("Volume Break", symbol, itoa(volumeBreak)); outputData->add("Tick Volatility", symbol, dtoa(sumOfVolumeWeightedChanges * sqrt(volumeBreak) / marketVolume)); for (unsigned int i = 0; i < bars.size(); i++) { timeFrame = (secondOfTheDay(bars[i].startTime) - (6 * MARKET_HOURS_HOUR + 15 * MARKET_HOURS_MINUTE)) / MARKET_HOURS_PERIOD; if (timeFrame >= 1 && (unsigned) timeFrame <= volumeByPeriod.size()) { volumeByPeriod[timeFrame - 1] += bars[i].volume; } } for (unsigned int i = 0; i < volumeByPeriod.size(); i++) { volumeData->add(itoa(i + 1), symbol, itoa(int((double(volumeByPeriod[i])) / totalDays + 0.5))); } volumeData->add("Pre", symbol, itoa(int(0.5 + 1.0 * preVolume / totalDays))); volumeData->add("Post", symbol,itoa(int(0.5 + 1.0 * postVolume / totalDays))); if (averageDailyVolume >= 150000 && volumeBreak > 0) { makeVolumeCandles(symbol, volumeBreak, bars); } } } void copyField(TwoDArrayWriter *&writer, const std::string &name, const std::string &symbol, const std::string &value) { if (!value.empty()) { writer->add(name, symbol, value); } } void copyField(TwoDArrayWriter *&writer, const std::string &name, const std::string &symbol, const double value) { if (value > 0) { writer->add(name, symbol, dtoa(value)); } } void copyField(TwoDArrayWriter *&writer, const std::string &name, const std::string &symbol, const int value) { if (value > 0) { writer->add(name, symbol, itoa(value)); } } void copyField(TwoDArrayWriter *&writer, const std::string &name, const std::string &symbol, const long value) { if (value > 0) { writer->add(name, symbol, itoa(value)); } } void ProcessData::processSymbolFundamental(const std::string &symbol, const FundamentalData &dataFundamental) { copyField(fundamentalData, "Company Name", symbol, dataFundamental.companyName); copyField(outputData, "Listed Exchange", symbol, dataFundamental.listedExchange); copyField(fundamentalData, "Prev Put Volume", symbol, dataFundamental.putVolume); copyField(fundamentalData, "Prev Call Volume", symbol, dataFundamental.callVolume); copyField(fundamentalData, "Dividend", symbol, dataFundamental.dividend); copyField(fundamentalData, "Earnings", symbol, dataFundamental.earnings); copyField(fundamentalData, "P/E Ratio", symbol, dataFundamental.peRatio); copyField(fundamentalData, "Beta", symbol, dataFundamental.beta); copyField(fundamentalData, "Short Interest", symbol, dataFundamental.shortInterest); copyField(fundamentalData, "Market Cap", symbol, dataFundamental.marketCap); copyField(fundamentalData, "Shares Outstanding", symbol, dataFundamental.sharesOutstanding); copyField(fundamentalData, "EPS Net Income", symbol, dataFundamental.epsNetIncome); copyField(fundamentalData, "Income", symbol, dataFundamental.income); copyField(fundamentalData, "Revenues", symbol, dataFundamental.revenues); copyField(fundamentalData, "Assets", symbol, dataFundamental.assets); copyField(fundamentalData, "Debt", symbol, dataFundamental.debt); copyField(fundamentalData, "52 Week High", symbol, dataFundamental.high52Week); copyField(fundamentalData, "52 Week Low", symbol, dataFundamental.low52Week); } void ProcessData::processSymbolMisc(const std::string &symbol, BarList const &data1Day, const FundamentalData &dataFundamental) { if (data1Day.size() > 0) { double lifetimeHigh = data1Day.back().high; double lifetimeLow = data1Day.back().low; for (unsigned int i = 0; i < data1Day.size() - 1; i++) { lifetimeHigh = std::max(lifetimeHigh, data1Day[i].high); lifetimeLow = std::min(lifetimeLow, data1Day[i].low); } lifetimeHigh = std::max(lifetimeHigh, dataFundamental.high52Week); if (dataFundamental.low52Week > 0) lifetimeLow = std::min(lifetimeLow, dataFundamental.low52Week); copyField(fundamentalData, "Lifetime High", symbol, lifetimeHigh); copyField(fundamentalData, "Lifetime Low", symbol, lifetimeLow); } } inline int dayOfTheWeek(time_t time) { struct tm brokenDown; localtime_r(&time, &brokenDown); return brokenDown.tm_wday; } /* MinutesPerBar described the output. If this is two, then adjacent pairs of one minute bars are merged. We always start counting with the first bar at the open. If the number of minutes per bar does not divide evenly into the number of bars in a day, then the last bar gets shorted. We divide up the day based only on the MinutesPerBar input, not the actual bar data. If a minute is missing from the day, then we just have fewer minutes in the resulting bar in the output. For example, if MinutesPerBar=3 and there were no trades in the second minute, then we use the first two bars of input to create the first bar of output. The third bar of input will apply to the second bar of input. OneMinuteBars in the input. We always start from one minute bars for simplicity. Presumably we could use 5 minute bars to make 10 and 15 minute bars. But since we are only making one request, and that request is in a different piece of code, we always request one minute bars. The datasource will sometimes skip a bar by leaving the bar out, and other times it will skip the bar by setting volume to 0. Other times a symbol will have no volume at all. This function deals with all of these cases correctly. If a period has no data, we deal with that in the most naive way. If all of the input bars corresponding to this period had a volume of 0, we report a volume of 0 on the output bar. If all of the input bars corresponding to an output bar are missing, then the output bar will be missing. */ BarList ProcessData::extractFrom1MinuteBars(const int minutesPerBar, const BarList &oneMinuteBars) { BarList result; assert(minutesPerBar > 0); result.resize(oneMinuteBars.size()); time_t lastOutputDate = 0; time_t currentInputDate; int lastOutputPeriod = -1; int outputIndex = -1; int currentInputSecond, currentInputPeriod; for (unsigned int inputIndex = 0; inputIndex < oneMinuteBars.size(); inputIndex++) { currentInputDate = midnight(oneMinuteBars[inputIndex].startTime); currentInputSecond= secondOfTheDay(oneMinuteBars[inputIndex].startTime); /* Skip a bar that is not part of a normal trading day. This unfortunately misses some things, like when normal stocks are on holiday, but the futures are still trading. In that case we would ideally like to skip the day for the futures, since the data is so sparse. */ if(currentInputSecond < MARKET_HOURS_OPEN || currentInputSecond >= MARKET_HOURS_CLOSE || dayOfTheWeek(currentInputDate) < 2) //Saturday and Sunday { currentInputPeriod = -1; } else { currentInputPeriod = (currentInputSecond - MARKET_HOURS_OPEN) / (minutesPerBar * MARKET_HOURS_MINUTE); } if (currentInputPeriod < 0) { // Skip it. } else if (currentInputDate == lastOutputDate && currentInputPeriod == lastOutputPeriod) { // Add to this candle. result[outputIndex].high = std::max(result[outputIndex].high, oneMinuteBars[inputIndex].high); result[outputIndex].low = std::min(result[outputIndex].low, oneMinuteBars[inputIndex].low); result[outputIndex].close = oneMinuteBars[inputIndex].close; result[outputIndex].volume = result[outputIndex].volume + oneMinuteBars[inputIndex].volume; } else { // Start a new candle. outputIndex++; result[outputIndex] = oneMinuteBars[inputIndex]; result[outputIndex].startTime = (currentInputPeriod * minutesPerBar * MARKET_HOURS_MINUTE) + MARKET_HOURS_OPEN; } lastOutputDate = currentInputDate; lastOutputPeriod = currentInputPeriod; } result.resize(outputIndex + 1); return result; } /* This takes bars which came from the output of extractFrom1MinuteBars and it looks for missing bars. If there is a missing bar, then we throw away all the data before it. If there is more than one, then we throw away everything before the last one. */ void ProcessData::breakAtMissingBar(BarList &bars, const int minutesPerCandle) { time_t previousDate = 0; time_t currentDate; int previousSeconds = 0; int currentSeconds; bool stockContainsVolume = false; for (BarList::iterator it = bars.begin(); it != bars.end(); it++) { if (it->volume > 0) { stockContainsVolume = true; break; } } // This loop logic is a little convoluted because vector.assign() doesn't // handle reverse iterators in 'normal' order for (BarList::iterator it = bars.end(); it != bars.begin(); ) { it--; currentDate = midnight(it->startTime); currentSeconds = secondOfTheDay(it->startTime); /* The bars are not consecutive. We work backwards from the most recent bar and stop when we get to a missing bar. The logic can miss some cases, like a stock which is perfect except that the first or last bar of a day is missing. But the logic should do a sufficient job most of the time. */ if (((currentDate == previousDate) && (currentSeconds + minutesPerCandle * MARKET_HOURS_MINUTE != previousSeconds)) || (stockContainsVolume && (it->volume == 0))) { bars.assign(it + 1, bars.end()); return; } previousDate = currentDate; previousSeconds = currentSeconds; } } inline int stringToIntDefault(const std::string &str, const int &defaultVal) { int retVal; std::stringstream ss(str); if ( (ss >> retVal).fail() ) { return defaultVal; } else return retVal; } void ProcessData::saveStandardCandles(const std::string &symbol, const BarList &bars) { std::string encodedBars, newBar; std::string minutesPerCandleKey; int minutesPerCandle, maxCandleCount, currentCandleCount; BarList currentCandleSet; std::vector< std::string > allCandleSets; allCandleSets = _standardCandlesInput.getRowHeaders(); for (unsigned int whichCandleSet = 0; whichCandleSet < allCandleSets.size(); whichCandleSet++) { minutesPerCandleKey = allCandleSets[whichCandleSet]; minutesPerCandle = stringToIntDefault(minutesPerCandleKey, -1); assert(minutesPerCandle > 0 && minutesPerCandle < 256); maxCandleCount = stringToIntDefault(_standardCandlesInput.get("Max Candle Count", minutesPerCandleKey), -1); if (maxCandleCount < 1) maxCandleCount = INT_MAX; currentCandleSet = extractFrom1MinuteBars(minutesPerCandle, bars); breakAtMissingBar(currentCandleSet, minutesPerCandle); currentCandleCount = 0; encodedBars = ""; for(BarList::const_reverse_iterator rit = currentCandleSet.rbegin(); rit != currentCandleSet.rend(); rit++) { newBar = dtoa(rit->open) + ':' + dtoa(rit->high) + ':' + dtoa(rit->low) + ':' + dtoa(rit->close) + ':' + itoa(rit->volume); if (currentCandleCount == 0) { encodedBars = newBar; } else { encodedBars = newBar + ';' + encodedBars; } currentCandleCount++; if (currentCandleCount >= maxCandleCount) { break; } } // save all of them, even the empties so that we get our header row correct standardCandles->add(minutesPerCandleKey, symbol, encodedBars); } } void ProcessData::makeVolumeCandles(const std::string &symbol, const int64_t volumeBreak, const BarList &bars) { CandleModelList candles; assert(volumeBreak > 0); VolumeBlockFactory volumeBlockFactory(volumeBreak); int volumeRemainingThisCandle = volumeBreak; Prints prints; for (BarList::const_iterator it = bars.begin(); it != bars.end(); it++) { if (it->startTime > time(NULL) - 7 * MARKET_HOURS_DAY) { CandleModel candle(it->open, it->high, it->low, it->close, it->volume, it->startTime, it->startTime + MARKET_HOURS_PERIOD); candles.addCandle(candle); } } volumeRemainingThisCandle = volumeBreak; while(!candles.done()) { if (volumeRemainingThisCandle == 0) { volumeRemainingThisCandle = volumeBreak; } candles.extractVolume(volumeRemainingThisCandle, prints); for(Prints::const_iterator it = prints.begin(); it != prints.end(); it++) { volumeBlockFactory.addPrint(it->price, it->volume, it->time); } } volumeBlocks->add("Vol", symbol + '_' + itoa(volumeBreak), volumeBlocksToString(volumeBlockFactory.getBlocks(), true)); } // not sure why this didn't work from inline double sqr(const double &x) { return x * x; } std::string floatToFixedStr(const double &x, const unsigned int &precision) { std::stringstream ss; std::string str; ss.precision(precision); ss << std::fixed; ss << x; ss >> str; return str; } void ProcessData::findBunnies(const std::string &symbol, const BarList &bars, const int &minutesPerBar) { const int periods = 23; std::vector< double > differences(periods); double stdDev, m, b; // X is time, Y is price. double sX = 0.0; double sXX = 0.0; double sY = 0.0; double sYY = 0.0; double sXY = 0.0; double sumOfSquares = 0.0; BarList currentCandleSet = extractFrom1MinuteBars(minutesPerBar, bars); std::string key = "Bunny " + itoa(minutesPerBar); fundamentalData->add(key, symbol, ""); // Reserve space in the header even if we print nothing. breakAtMissingBar(currentCandleSet, minutesPerBar); if (currentCandleSet.size() >= (unsigned) periods) { try { currentCandleSet.assign(currentCandleSet.end() - periods, currentCandleSet.end()); assert(currentCandleSet.size() == (unsigned) periods); for(unsigned int i = 0; i < differences.size(); i++) { sX += i; sXX += sqr(i); sY += currentCandleSet[i].close; sYY += sqr(currentCandleSet[i].close); sXY += i * currentCandleSet[i].close; } stdDev = sqrt((periods * sYY - sqr(sY)) / (periods * (periods - 1))); m = (sXY - sX * sY / periods) / (sXX - sX * sX / periods); b = (sY - m * sX) / periods; for(unsigned int i = 0; i < differences.size(); i++) { differences[i] = (currentCandleSet[i].close - (m * i + b)) / stdDev; sumOfSquares += sqr(differences[i]); } fundamentalData->add(key, symbol, floatToFixedStr(sqrt(sumOfSquares / periods), 6)); } catch (...) { // If everything is flat, std dev will be 0, and we'll have a // divided by 0 error. } } } void ProcessData::sharesPerPrint(const std::string &symbol, const BarList &bars) { double totalShares = 0.0; double totalPrints = 0.0; for(BarList::const_iterator it = bars.begin(); it != bars.end(); it++) { totalShares += it->volume; totalPrints += it->printCount; } if (totalShares > 0 && totalPrints > 0) outputData->add("Shares per Print", symbol, floatToFixedStr(totalShares / totalPrints, 6)); } void ProcessData::setOutputFiles(std::string baseName) { std::string path; std::string remainder; std::vector< std::string > cols; flushOutputFiles(); splitPath(baseName, path, remainder); // This was the original file, hence the simple name. This contains general // background data which could be useful at different times. cols.push_back("Volume Break"); cols.push_back("Tick Volatility"); cols.push_back("Bright Volatility"); cols.push_back("Last Price"); cols.push_back("Avg Daily Volume"); cols.push_back("Correlation Symbol"); cols.push_back("Correlation M"); cols.push_back("F Correlation M"); cols.push_back("Shares per Print"); cols.push_back("Listed Exchange"); outputData = new TwoDArrayWriter(baseName, cols); // This is the average volume for each time period. This was seperated // from OutputData primarily to make both files more readable. Most column // names are only numbers, similar to the StandardCandles file. The file // name is required to disambiguate. volumeData = new TwoDArrayWriter(path + "V_" + remainder); // This is similar to OutputData, except that it should expire at the end // of the day. Ideally, if the overnight data program crashed, this file // would be deleted, but the old values from OutputData would be used. We // don't do that, but the possibility exists. TC stands for "time // critical." cols.clear(); cols.push_back("Range Contraction"); cols.push_back("20 Day SMA"); cols.push_back("50 Day SMA"); cols.push_back("200 Day SMA"); cols.push_back("20 Day StdDev"); cols.push_back("EarliestDate"); cols.push_back("Highs"); cols.push_back("Lows"); cols.push_back("Up Days"); cols.push_back("Previous High"); cols.push_back("Previous Low"); cols.push_back("5 Day Close"); cols.push_back("5 Day High"); cols.push_back("5 Day Low"); cols.push_back("10 Day Close"); cols.push_back("10 Day High"); cols.push_back("10 Day Low"); cols.push_back("20 Day Close"); cols.push_back("20 Day High"); cols.push_back("20 Day Low"); TCData = new TwoDArrayWriter(path + "TC_" + remainder, cols); // This stores historical volume blocks. These should look similar to what // it would look like if we were receiving the tick data in real time. We // approximate this using 1 minute candles. We massage that data to put it // into the same format as the realtime data, although we"re always loosing // something. The row names are different in here than in most of the files. // The name of the row includes the symbol and the number of shares per // block. Currently we only support one number of shares per block, so // this don"t do much. However, many parts of the code were written to // support other options, so this is consistant. cols.clear(); cols.push_back("Vol"); volumeBlocks = new TwoDArrayWriter(path + "VB_" + remainder, cols); // This takes the candlestick data and stores it in a local file with very // little masaging. The row is the symbol name, and the column name is the // number of minutes per candle. (VolumeBlocks should have been organized // this way.) standardCandles = new TwoDArrayWriter(path + "SC_" + remainder); // This is data which is used only by the database, not the software // which generates the alerts. This was seperated out to make sure that // software doesn"t waste memory on things it doesn"t need. Some of the // files above are used only by the alerts generation software, and some // are used by both that software and the database. The name "Fundamental" // comes from the history of this file. It is no longer accurate. cols.clear(); cols.push_back("Company Name"); cols.push_back("Lifetime High"); cols.push_back("Lifetime Low"); cols.push_back("52 Week High"); cols.push_back("52 Week Low"); cols.push_back("Previous Close"); cols.push_back("Previous Open"); cols.push_back("Correlation R2"); cols.push_back("F Correlation R2"); cols.push_back("Consolidation Days"); cols.push_back("Consolidation Top"); cols.push_back("Consolidation Bottom"); cols.push_back("Average True Range"); cols.push_back("Bunny 130"); cols.push_back("Previous Volume"); cols.push_back("Prev Put Volume"); cols.push_back("Prev Call Volume"); cols.push_back("Dividend"); cols.push_back("Earnings"); cols.push_back("P/E Ratio"); cols.push_back("Beta"); cols.push_back("Short Interest"); cols.push_back("Market Cap"); cols.push_back("Shares Outstanding"); cols.push_back("EPS Net Income"); cols.push_back("Income"); cols.push_back("Revenues"); cols.push_back("Assets"); cols.push_back("Debt"); cols.push_back("14 Day RSI"); fundamentalData = new TwoDArrayWriter(path + "F_" + remainder, cols); // This file stores a 1 for each stock if it qualifies as that particular // Pristine strategy. This data will be processed by another program which // will pull out a list of stocks for each category and add them to the // database under a specified account (currently Pristine_Dynamic_Test) cols.clear(); cols.push_back("PBS"); cols.push_back("PSS"); cols.push_back("PBS+"); cols.push_back("PSS+"); cols.push_back("WRB"); cols.push_back("CBS"); cols.push_back("CSS"); cols.push_back("D20+"); cols.push_back("D20-"); pristineData = new TwoDArrayWriter(path + "Pristine_" + remainder, cols); // This file is used for debugging. You can put things into this file and // read them with notepad. They are not used anywhere else. cols.clear(); cols.push_back("Volatility 9 days"); additionalData = new TwoDArrayWriter(path + "Additional_" + remainder, cols); } void ProcessData::flushOutputFiles() { delete outputData; outputData = NULL; delete volumeData; volumeData = NULL; delete TCData; TCData = NULL; delete volumeBlocks; volumeBlocks = NULL; delete standardCandles; standardCandles = NULL; delete fundamentalData; fundamentalData = NULL; delete pristineData; pristineData = NULL; delete additionalData; additionalData = NULL; // FDumpCandles.Free; // FDumpCandles = TDumpCandles.Create } void ProcessData::loadStandardCandles(const std::string &filename) { _standardCandlesInput.loadFromCSV(filename); } void ProcessData::previousDay(const std::string &symbol, const BarList &bars) { if (bars.size() >= 1) { BarList::const_iterator it = bars.end() - 1; fundamentalData->add("Previous Volume", symbol, itoa(it->volume)); fundamentalData->add("Previous Open", symbol, dtoa(it->open)); TCData->add("Previous High", symbol, dtoa(it->high)); TCData->add("Previous Low", symbol, dtoa(it->low)); if (bars.size() >= 2) { it--; fundamentalData->add("Previous Close", symbol, dtoa(it->close)); } } } void ProcessData::volatility(const std::string &symbol, const BarList &bars, const unsigned int &days) { double value = 0; value = commonVolatility(bars, days); if (value > 0) { additionalData->add("Volatility " + itoa(days) + " days", symbol, dtoa(value)); } } void ProcessData::brightVolatility(const std::string &symbol, const BarList &bars) { double value1; double value3; double value6; double value12; value12 = commonVolatilityMonths(bars, 12); if (value12 > 0) { value6 = commonVolatilityMonths(bars, 6); value3 = commonVolatilityMonths(bars, 3); value1 = commonVolatilityMonths(bars, 1); if ( (value6 > 0) && (value3 > 0) && (value1 > 0) ) { additionalData->add("Volatility 12 months", symbol, dtoa(value12)); additionalData->add("Volatility 6 months", symbol, dtoa(value6)); additionalData->add("Volatility 3 months ", symbol, dtoa(value3)); additionalData->add("Volatility 1 months ", symbol, dtoa(value1)); // delphi had some exception handling which I'm omitting for now outputData->add("Bright Volatility", symbol, dtoa((value12 + value6 + value3 + value1 * 2) / 5 / 19.1)); } } } double ProcessData::commonVolatility(const BarList &bars, const unsigned int &days) { double retVal = 0; std::vector< double > values(days); double mean; double sum; // todo: exception handling? if (days < bars.size()) { for(unsigned int i = 1; i < days; i++) { values[i - 1] = log(bars[bars.size() - i].close / bars[bars.size() - 1 - i].close); } sum = 0; for(unsigned int i = 0; i < days; i++) { sum = sum + values[i]; } mean = sum / days; sum = 0; for(unsigned int i = 0; i < days; i++) { sum = sum + square(values[i] - mean); } retVal = sqrt(sum * 252 / days); } return retVal; } // TODO: Make sure this is correct double ProcessData::commonVolatilityMonths(const BarList &bars, const unsigned int &months) { if (bars.size() > 0) { unsigned int i = 0; struct tm *firstDate_tm; firstDate_tm = localtime(&(bars[bars.size() - 1].startTime)); firstDate_tm->tm_mon = firstDate_tm->tm_mon - months; firstDate_tm->tm_isdst = -1; time_t firstDate = mktime(firstDate_tm); while ( (i < bars.size() - 1) && bars[i].startTime <= firstDate ) i++; if (i > 0) // We must skip at least one day, or we can't be sure this is valid. // If we have only 5 days of data, and we use this algorithm to go // back a month, we will think that we have an entire month's data. // We need to see at least one value before the month started to prove // to ourselves that we have an entire months data. If we have exactly // the amount of data requested, we will fail. It's hard to do that // better. return commonVolatility(bars, bars.size() - i); } return 0.0; } double ProcessData::range(const BarList &bars, const unsigned int &daysFromEnd) { return bars[bars.size() - 1 - daysFromEnd].high - bars[bars.size() - 1 - daysFromEnd].low; } void ProcessData::rangeContraction(const std::string &symbol, const BarList &bars) { if (bars.size() > 2) { // range is contracting if(range(bars, 1) > range(bars, 0)) { for(unsigned int i = 1; i < bars.size(); i++) { // Report when we see the condition change. If we see the // range contract for three days, then we run out of data, we // don't know what the value is. So we report nothing. if (range(bars, i + 1) <= range(bars, i)) { TCData->add("Range Contraction", symbol, itoa(i)); break; } } } // range is expanding else if(range(bars, 1) < range(bars, 0)) { for(unsigned int i = 1; i < bars.size(); i++) { if (range(bars, i + 1) >= range(bars, i)) { TCData->add("Range Contraction", symbol, itoa(-i)); break; } } } } } void ProcessData::bollingerStdDev(const std::string &symbol, const BarList &bars, const unsigned int &days) { if (days <= bars.size()) { double value; unsigned int offset = bars.size() - days; std::vector< double > closes(days); for (unsigned int i = 0; i < days; i++) { closes[i] = bars[i+ offset].close; } value = stdDev(closes); if (value > 0) { TCData->add(itoa(days) + " Day StdDev", symbol, dtoa(value)); } } } void ProcessData::SMA(const std::string &symbol, const BarList &bars, const unsigned int &days) { if (days < bars.size()) { double total = 0; for (unsigned int i = bars.size() - days; i < bars.size(); i++) { total += bars[i].close; } TCData->add(itoa(days) + " Day SMA", symbol, dtoa(total/days)); } } void ProcessData::highsAndLows(const std::string &symbol, const BarList &bars) { if (bars.size() > 0) { double highest = -DBL_MAX; double lowest = DBL_MAX; std::vector< std::string > highList; std::vector< std::string > lowList; TCData->add("EarliestDate", symbol, itoa(bars[0].startTime)); for (BarList::const_iterator it = (bars.end() - 1); it >= bars.begin(); it--) { if(it->low < lowest) { lowest = it->low; lowList.push_back(dtoa(it->low)); lowList.push_back(itoa(it->startTime)); } if(it->high > highest) { highest = it->high; highList.push_back(dtoa(it->high)); highList.push_back(itoa(it->startTime)); } } TCData->add("Highs", symbol, commaText(highList)); TCData->add("Lows", symbol, commaText(lowList)); } } void ProcessData::lastPrice(const std::string &symbol, const BarList &bars) { if (bars.size() > 0) { outputData->add("Last Price", symbol, dtoa(bars.back().close)); } } void ProcessData::averageVol(const std::string &symbol, const BarList &bars, unsigned int &averageDailyVolume) { double remainder; unsigned long totalVolume = 0; unsigned int first = 0; if (bars.size() > 10) first = bars.size() - 10; unsigned int last = bars.size() - 1; unsigned int totalBars = 1 + last - first; if (first < last) { for (unsigned int i = first; i <= last; i++) { totalVolume += bars[i].volume; } averageDailyVolume = totalVolume / totalBars; // round up if necessary remainder = (totalVolume % totalBars) / ((double) totalBars); if (remainder >= 0.5) averageDailyVolume++; outputData->add("Avg Daily Volume", symbol, itoa(averageDailyVolume)); } } int ProcessData::trend(const std::string &symbol, const BarList &bars, TrendType trendType, const unsigned int &offset) { enum DirectionType { DIRECTION_NONE, DIRECTION_DOWN, DIRECTION_SAME, DIRECTION_UP }; DirectionType previousDirection = DIRECTION_NONE; DirectionType currentDirection = DIRECTION_NONE; double priceIncrease = 0; int count = 0; unsigned int minDay = 1; if (trendType == TREND_GREEN_BAR) minDay = 0; for (BarList::const_iterator it = bars.end() - 1; it > bars.begin() + minDay; it--) { switch (trendType) { case TREND_OPEN: priceIncrease = it->open - (it - 1)->open; break; case TREND_CLOSE: priceIncrease = it->close - (it - 1)->close; break; case TREND_HIGH: priceIncrease = it->high - (it - 1)->high; break; case TREND_LOW: priceIncrease = it->low - (it - 1)->low; break; case TREND_GREEN_BAR: priceIncrease = it->close - it->open; break; } if (priceIncrease > 0) { currentDirection = DIRECTION_UP; } else if (priceIncrease < 0) { currentDirection = DIRECTION_DOWN; } else { break; } if (previousDirection != DIRECTION_NONE && previousDirection != currentDirection) { break; } if (currentDirection == DIRECTION_UP) { count++; } else { count--; } previousDirection = currentDirection; } return count; } void ProcessData::upDays(const std::string &symbol, const BarList &bars) { TCData->add("Up Days", symbol, itoa(trend(symbol, bars, TREND_CLOSE, 0))); } void ProcessData::correlation(const std::string &symbol, const BarList &bars) { //if not symbol is future double M = 0.0, R2 = 0.0; std::string correlationSymbol; bool success; findBestCorrelation(symbol, bars, success, correlationSymbol, M, R2); if(success && R2 >= minAcceptableR2) { outputData->add("Correlation Symbol", symbol, correlationSymbol); outputData->add("Correlation M", symbol, dtoa(M)); fundamentalData->add("Correlation R2", symbol, dtoa(R2)); } doCloseCloseCorrelation(_futuresDailyData, bars, success, M, R2); if(success && R2 >= minAcceptableR2) { outputData->add("F Correlation M", symbol, dtoa(M)); fundamentalData->add("F Correlation R2", symbol, dtoa(R2)); } } void ProcessData::consolidation(const std::string &symbol, const BarList &bars) { const unsigned int chartWidth = 40; if (bars.size() >= chartWidth) { double chartTop = -DBL_MAX; double chartBottom = DBL_MAX; double top = -DBL_MAX; double bottom = DBL_MAX; double potentialTop; double potentialBottom; double maxSize; for (BarList::const_iterator it = bars.end() - 1; it >= bars.end() - chartWidth; it--) { chartTop = std::max(chartTop, it->high); chartBottom = std::min(chartBottom, it->low); } maxSize = (chartTop - chartBottom) * 0.09; unsigned int i = 1; while (true) { if (i > chartWidth) break; potentialTop = std::max(top, std::max(bars[bars.size() - i].open, bars[bars.size() - i].close)); potentialBottom = std::min(bottom, std::min(bars[bars.size() - i].open, bars[bars.size() - i].close)); if (potentialTop - potentialBottom > maxSize) break; top = potentialTop; bottom = potentialBottom; i++; } fundamentalData->add("Consolidation Days", symbol, itoa(i-1)); if (i > 1) { fundamentalData->add("Consolidation Top", symbol, dtoa(top)); fundamentalData->add("Consolidation Bottom", symbol, dtoa(bottom)); } } } void ProcessData::averageTrueRange(const std::string &symbol, const BarList &bars, const unsigned int &days) { if(bars.size() > days) { double total = 0.0; double high; double low; double previousClose; double trueRange; for (unsigned int i = bars.size() - days; i < bars.size(); i++) { high = bars[i].high; low = bars[i].low; previousClose = bars[i - 1].close; trueRange = std::max(high, previousClose) - std::min(low, previousClose); total = total + trueRange; } fundamentalData->add("Average True Range", symbol, dtoa(total/days)); } } void ProcessData::nDayRange(const std::string &symbol, const BarList &bars, const unsigned int &days) { std::string daysString = itoa(days) + " Day "; if (bars.size() > days) { TCData->add(daysString + "Close", symbol, dtoa(bars[bars.size() - 1 - days].close)); } if (bars.size() >= days) { double highest = -DBL_MAX; double lowest = DBL_MAX; for (unsigned int i = bars.size() - days; i < bars.size(); i++) { highest = std::max(highest, bars[i].high); lowest = std::min(lowest, bars[i].low); } TCData->add(daysString + "High", symbol, dtoa(highest)); TCData->add(daysString + "Low", symbol, dtoa(lowest)); } } void ProcessData::RSI(const std::string &symbol, const BarList &bars, const unsigned int &days) { if(days < bars.size()) { double upAverage = 0.0; double downAverage = 0.0; double change; // initial smoothed value for (unsigned int i = 1; i <= days; i++) { change = bars[i].close - bars[i - 1].close; if (change < 0) { downAverage -= change; } else { upAverage += change; } } upAverage = upAverage/days; downAverage = downAverage/days; // subsequent updates for(unsigned int i = days + 1; i < bars.size(); i++) { change = bars[i].close - bars[i - 1].close; if(change < 0) { downAverage = (downAverage * (days - 1) - change) / days; upAverage = (upAverage * (days - 1)) / days; } else { upAverage = (upAverage * (days - 1) + change) / days; downAverage = (downAverage * (days - 1)) / days; } } if (downAverage == 0) { fundamentalData->add(itoa(days) + " Day RSI", symbol, dtoa(100.0)); } else if(upAverage != 0) { fundamentalData->add(itoa(days) + " Day RSI", symbol, dtoa(100 - 100 / (1 + upAverage/downAverage))); } } } void ProcessData::pristineLists(const std::string &symbol, const BarList &bars) { if (bars.size() >= 3) { enum TailType { TAIL_BOTTOMING, TAIL_NONE, TAIL_TOPPING }; enum COGType { COG_MINUS, COG_NONE, COG_PLUS }; enum RangeBarType { RANGE_BAR_NARROW, RANGE_BAR_WIDE, RANGE_BAR_NONE }; TailType tail = TAIL_NONE; COGType COG = COG_NONE; RangeBarType rangeBar = RANGE_BAR_NONE; int highUpDays = trend(symbol, bars, TREND_HIGH, 0); int prevHighUpDays = trend(symbol, bars, TREND_HIGH, 1); int greenBars = trend(symbol, bars, TREND_GREEN_BAR,0); int prevGreenBars = trend(symbol, bars, TREND_GREEN_BAR, 1); unsigned int lastBar = bars.size() - 1; double range = bars[lastBar].high - bars[lastBar].low; double prevRange = bars[lastBar - 1].high - bars[lastBar - 1].low; double bodySize = fabs(bars[lastBar].close - bars[lastBar].open); double tailSize; // Changing of the guard if (greenBars == 1 && prevGreenBars <= -3) { COG = COG_PLUS; } else if ((greenBars == -1) && (prevGreenBars >= 3)) { COG = COG_MINUS; } // Topping/Bottoming Tail if (prevGreenBars >= 3) { tail = TAIL_TOPPING; tailSize = bars[lastBar].high - std::max(bars[lastBar].open, bars[lastBar].close); } else if (prevGreenBars <= -3) { tail = TAIL_BOTTOMING; tailSize = std::min(bars[lastBar].open, bars[lastBar].close) - bars[lastBar].low; } // If we have the proper direction, make sure that the tail is large enough to qualify if (tail != TAIL_NONE && (!((range >= prevRange * 0.5) && (tailSize >= bodySize * 0.6) && (tailSize >= range / 3.0)))) { tail = TAIL_NONE; } // Pristine Buy/Sell Setups if (highUpDays <= -3 || greenBars <= -3) { pristineData->add("PBS", symbol, itoa(1)); } else if (highUpDays >= 3 || greenBars <= 3) { pristineData->add("PSS", symbol, itoa(1)); } // Pristine Buy/Sell Setups+ if (COG == COG_PLUS || tail == TAIL_BOTTOMING) { pristineData->add("PBS+", symbol, itoa(1)); } else if (COG == COG_MINUS || tail == TAIL_TOPPING) { pristineData->add("PSS+", symbol, itoa(1)); } // For the rest we need 20 days of previous history for SMA and average range if (bars.size() >= 21) { double price= bars[lastBar].close; double closeTotal = 0; double rangeTotal = 0; for (unsigned int i = bars.size() - 21; i < bars.size() - 1; i++) { closeTotal += bars[i].close; rangeTotal += bars[i].high - bars[i].low; } double SMA = closeTotal/20; double avgRange = rangeTotal/20; // Wide Range Bar, this is both its own list, and a criterion for some other lists if (range <= 0.65 * avgRange) { rangeBar = RANGE_BAR_NARROW; } else if (range >= 1.7 * avgRange) { rangeBar = RANGE_BAR_WIDE; pristineData->add("WRB", symbol, itoa(1)); } else { rangeBar = RANGE_BAR_NONE; } // Climatic Buy/Sell if (((prevGreenBars <= -3) || (prevHighUpDays <= -3)) && (price <= 0.92 * SMA) && ((tail == TAIL_BOTTOMING) || (rangeBar == RANGE_BAR_NARROW) || (COG == COG_PLUS))) { pristineData->add("CBS", symbol, itoa(1)); } else if (((prevGreenBars >= 3) || (prevHighUpDays >= 3)) && (price >= 1.08 * SMA) && ((tail == TAIL_TOPPING) || (rangeBar == RANGE_BAR_NARROW) || (COG == COG_PLUS))) { pristineData->add("CSS", symbol, itoa(1)); } // Daily Bullish/Bearish 20/20 double closePositionInRange = (bars[lastBar].close - bars[lastBar].low)/ range; double openPositionInRange = (bars[lastBar].open - bars[lastBar].low)/ range; if ((openPositionInRange <= 0.20) && (closePositionInRange >= 0.80) && (prevGreenBars >= 1) && (rangeBar == RANGE_BAR_WIDE)) { pristineData->add("D20+", symbol, itoa(1)); } else if ((closePositionInRange <= 0.20) && (openPositionInRange >= 0.80) && (prevGreenBars <= -1) && (rangeBar == RANGE_BAR_WIDE)) { pristineData->add("D20-", symbol, itoa(1)); } } } } ///////////////////////////////////////////////////////////////////////// // Correlation Routines ///////////////////////////////////////////////////////////////////////// /* For each record we find the % change between the open and the close. For each date with a record for both lists, we add that point to the correlation. */ void ProcessData::doOpenCloseCorrelation(const BarList &listX, const BarList &listY, bool &success, double &M, double &R2) { double x, y; double weight = 1.0; double sumXY = 0.0; double sumXX = 0.0; double sumYY = 0.0; unsigned int indexX = 0; unsigned int indexY = 0; int n = 0; success = false; while (true) { if (indexX >= listX.size() || indexY >= listY.size()) { break; } if (listX[indexX].startTime > listY[indexY].startTime) { indexY++; } else if (listX[indexX].startTime < listY[indexY].startTime) { indexX++; } else { x = (listX[indexX].close - listX[indexX].open) / listX[indexX].open; y = (listY[indexY].close - listY[indexY].open) / listY[indexY].open; sumXY += x * y * weight; sumXX += x * x * weight; sumYY += y * y * weight; n++; indexY++; indexX++; weight = weight * dailyWeight; } } if (n >= 7) { M = sumXY / sumXX; R2 = sumXY * sumXY / sumXX / sumYY; success = true; } } /* For each pair of adjacent records we find the % change between the first close and the second close. We only look at pairs if a pair with the same dates exists in both lists. If two lists have the same dates, except exactly one day is missing in the middle, two pairs will be ignored from the longer list, and one pair from the shorter list. */ void ProcessData::doCloseCloseCorrelation(const BarList &listX, const BarList &listY, bool &success, double &M, double &R2) { double x, y; double weight = 1.0; double sumXY = 0.0; double sumXX = 0.0; double sumYY = 0.0; unsigned int indexX1 = 0; unsigned int indexY1 = 0; unsigned int indexX2 = 0; unsigned int indexY2 = 0; int n = 0; success = false; while (true) { indexX2 = indexX1 +1; indexY2 = indexY1 +1; if (indexX2 >= listX.size() || indexY2 >= listY.size()) { break; } if (listX[indexX1].startTime > listY[indexY1].startTime) { indexY1++; } else if (listX[indexX1].startTime < listY[indexY1].startTime) { indexX1++; } else if (listX[indexX2].startTime != listY[indexY2].startTime) { indexX1++; indexY1++; } else { x = (listX[indexX2].close - listX[indexX1].close) / listX[indexX1].close; y = (listY[indexY2].close - listY[indexY1].close) / listY[indexY1].close; sumXY += x * y * weight; sumXX += x * x * weight; sumYY += y * y * weight; n++; indexY1++; indexX1++; weight = weight * dailyWeight; } } if (n >= 7) { success = true; M = sumXY / sumXX; R2 = sumXY * sumXY / sumXX / sumYY; } } void ProcessData::findBestCorrelation(const std::string &symbol, const BarList &data1Day, bool &success, std::string &matchesSymbol, double &M, double &R2) { success = false; for (unsigned int i = 0; i < possibleCorrelations.size(); i++) { if (possibleCorrelations[i].first != symbol) { bool currentSuccess = false; double currentM = 0.0; double currentR2 = 0.0; doOpenCloseCorrelation(possibleCorrelations[i].second, data1Day, currentSuccess, currentM, currentR2); if (currentSuccess) { if (!success) { success = true; matchesSymbol = possibleCorrelations[i].first; M = currentM; R2 = currentR2; } else if (currentR2 > R2) { matchesSymbol = possibleCorrelations[i].first; M = currentM; R2 = currentR2; } } } } } void ProcessData::addFuturesCorrelation(const BarList &data1Day) { _futuresDailyData = data1Day; } void ProcessData::addPossibleCorrelation(const std::string &symbol, const BarList &data1Day) { possibleCorrelations.push_back(std::make_pair(symbol, data1Day)); } time_t incMonth(time_t time, int monthsToAdd) { struct tm brokenDown; localtime_r(&time, &brokenDown); brokenDown.tm_isdst = -1; brokenDown.tm_mon += monthsToAdd; return mktime(&brokenDown); } time_t incDay(time_t time, int daysToAdd) { struct tm brokenDown; localtime_r(&time, &brokenDown); brokenDown.tm_isdst = -1; brokenDown.tm_mday += daysToAdd; return mktime(&brokenDown); } time_t getYear(time_t time) { struct tm brokenDown; localtime_r(&time, &brokenDown); return brokenDown.tm_year; } // Doesn't conform to any of the standards, but it should be good enough for the // charts which only care if the week has changed. This is based on the END of the // week. For example, if Jan 1st is a Saturday, then all the preceeding days of // the week from the end of Dec in the previous year will be week 0 int weekOfTheYear(time_t time) { struct tm brokenDown; localtime_r(&time, &brokenDown); if (getYear(incDay(time, (6 - brokenDown.tm_wday))) != getYear(time)) { return 0; } return (brokenDown.tm_yday + (6 - brokenDown.tm_wday)) / 7; } void ProcessData::buildCharts(const std::string &symbol, const BarList &data1Day) { if (data1Day.size() > 0) { // Daily Charts ChartPrices prices; time_t dateCutoff = (incMonth(data1Day.back().startTime, -3)); for(BarList::const_reverse_iterator rit = data1Day.rbegin(); rit != data1Day.rend(); rit++) { if (rit->startTime <= dateCutoff) break; if (rit->close <= 0) break; prices.push_back(rit->close); } createChart(_chartsDirectory + "/D_" + encodeFilename(symbol) + ".gif", prices, 66); // Weekly Charts prices.clear(); int previousWeek; int currentWeek = INT_MAX; dateCutoff = (incMonth(data1Day.back().startTime, -12)); int days = 0, weeks = 0; for(BarList::const_reverse_iterator rit = data1Day.rbegin(); rit != data1Day.rend(); rit++) { days++; if (rit->startTime <= dateCutoff) break; if (rit->close <= 0) break; previousWeek = currentWeek; currentWeek = weekOfTheYear(rit->startTime); if (currentWeek != previousWeek){ prices.push_back(rit->close);weeks++;} } createChart(_chartsDirectory + "/W_" + encodeFilename(symbol) + ".gif", prices, 53); } }