/* * This program is responsible for reading stock data from CSV files and * copying them to the database. For simplicity the program which handles * the overnight formulas writes most of it's output to CSV files. Those * can be read by a number of sources, including the realtime alerts server * and this program. This program copies selected fields into various database * tables. * * There are several database tables for historical reasons. symbol_info_t and * symbol_info_f predate alerts_daily. They should be avoided in new * development. * * This program replaces UpdateStockData.dpr. It serves the same purpose, * although it works a different way. This new program is mostly table driven. * That makes it easier to add new fields. Also, this program will merge all * of the CVS files into one big table. We assume that the same field won't * appear in two different fields. That assumption may be enforced in some * future version. * * Command Line Inputs: * o database_RW - the name of the database which will receive the data. * o fields_file - This is the name of a file containing the structure of the * data. What fields to copy where. The default should work if you've set * up the standard directory structure. * o input_files - This is a list of file names, seperated by commas. These * files contain the actual data that we want to copy into the database. The * default should work if you've set up the standard directory structure. * o date - This is the date that we'll use for the alerts_daily field table. * This is typically tomorow, because we do the overnight analysis in the * late afternoon to go with the alerts that we will generate tomorrow. This * should be a format appropriate for mysql, including quotes if this is a * literal. The default is a formula which guesses an appropriate date and * should usually work. * * This process has very little output. The database sends some standard * messages to the standard out. If you are running a cron job, you should * probably pipe standard out to null, or at least somewhere. Error messages * from the main program will appear on standard error, so cron should mail them * to you. The database will retry forever, so cron couldn't mail those errors * to you. Those would also be on the standard output. There is not a lot * of error checking in the program. In particular, trying to read from a file * which doesn't exist is like trying to read from any empty file. */ #include #include #include "../../shared/GlobalConfigFile.h" #include "../../shared/TwoDLookup.h" #include "../../shared/MiscSupport.h" #include "../../shared/DatabaseSupport.h" #include "../../shared/DatabaseWithRetry.h" #include "../misc_framework/AccumulateInsert.h" std::string quoteOrNull(std::string s) { if (s.empty()) return "NULL"; return '"' + mysqlEscapeString(s) + '"'; } int main(int argc, char *argv[]) { if (!addConfigItemsFromCommandLine(argv+1)) { std::cerr<<"Error parsing command line.\n"; exit(1); } configItemsComplete(); // This is the structure. This is the part which is only changed by the // programmer and used to be part of the Pascal code. The structure is // as follows: // o The first column, the row headers, contain id numbers. These have no // intrinsic meaning. They are just unique identifiers. That's required // to use a TwoDArray. I used consecutive integers only because the // spreadsheet program makes it easy to do that. // o The second column, "Instructions", says what type of data we're talking // about. This is a rule that helps us interpret the third column, // "Source Field". The values in the second column can be: // . literal - That means to copy the value in the next column as is (no // quotes or anything) to the sql. // . file - That means to copy the value from the allValues table. The // column header in that table will match the source field in this // table. // . special - those are special values that we compute in the code. // The source field says which value to select. "symbol" and "date" are // two examples. // o The third column, "Dest Table", is name name of the mysql table to // receive this data. // o The fourth column, "Dest Column", is the name of the column in the mysql // table. We assume that the table name and the column name are both // valid without needing backquotes. Any database column which is not // named here will get the default value from mysql. TwoDArray allFields; allFields.loadFromCSV(getConfigItem("fields_file", "data/FieldsToCopy.csv")); // This is a collection of all values that we read from the overnight // analysis program. The row headings are all symbols. The column headings // all match the "Source Field" described above. There will be extra columns // because the CSV files are used by multiple programs. Some fields we don't // care about, but other programs do. TwoDArray allValues; { // We read in a list of files, and we put them all together. Some types of // data are split into different files for reasons that are not relevant // to this program. (For example, F_OvernightData, was designed for data // only used by this program, to save memory in the other programs that // don't need this data.) Also, we sometimes have the same files in // different directories, each with different symbols. This program has // the option of merging those together. const std::string inputFileList = getConfigItem("input_files", "data/F_OvernightData.csv," "data/OvernightData.csv,data/TC_OvernightData.csv"); const std::vector< std::string > inputFileVector = explode(",", inputFileList); for (std::vector< std::string >::const_iterator it = inputFileVector.begin(); it != inputFileVector.end(); it++) allValues.appendFromCSV(*it); } // This is specifically aimed at the date column in the alerts_daily table. std::string dateString = getConfigItem("date"); if (dateString.empty()) { // Add 12 hours. Anything in the afternoon will apply to tomorrow. // Anything in the morning will apply to today. const time_t base = time(NULL) + 12*60*60; tm brokenDown; localtime_r(&base, &brokenDown); char buffer[20]; sprintf(buffer, "'%04d-%02i-%02i'", brokenDown.tm_year+1900, brokenDown.tm_mon+1, brokenDown.tm_mday); dateString = buffer; switch (brokenDown.tm_wday) { case 0: // Sunday. Apply this to Monday. dateString += " + INTERVAL 1 DAY"; break; case 6: // Saturday. Apply this to Monday. dateString += " + INTERVAL 2 DAY"; break; } } // The destination. This should be the master database. DatabaseWithRetry database(false, "database"); // First, go though the list of fields and group them by the dedtination // table. std::set< std::string > tables; for (TwoDArray::StringList::const_iterator it = allFields.getRowHeaders().begin(); it != allFields.getRowHeaders().end(); it++) tables.insert(allFields.get("Dest Table", *it)); // Iterate over the list of tables, first. for (std::set< std::string >::const_iterator it = tables.begin(); it != tables.end(); it++) { std::string const &tableName = *it; if (tableName.empty()) continue; // Create the AccumulateInsert for this table by iterating over the list // of fields. std::string common; for (TwoDArray::StringList::const_iterator fieldIt = allFields.getRowHeaders().begin(); fieldIt != allFields.getRowHeaders().end(); fieldIt++) if (allFields.get("Dest Table", *fieldIt) == tableName) { if (common.empty()) common = "REPLACE INTO " + tableName + "("; else common += ", "; common += allFields.get("Dest Column", *fieldIt); } common += ")"; AccumulateInsert accumulator(common); // Iterate over the list of symbols. For each combination of symbol and // mysql table, we will add one mysql row. for (TwoDArray::StringList::const_iterator symbolIt = allValues.getRowHeaders().begin(); symbolIt != allValues.getRowHeaders().end(); symbolIt++) { std::string const &symbol = *symbolIt; if (symbol.empty()) continue; std::string values; // Find all of the fields. These are in the same order as when we // built the acumulator. for (TwoDArray::StringList::const_iterator fieldIt = allFields.getRowHeaders().begin(); fieldIt != allFields.getRowHeaders().end(); fieldIt++) if (allFields.get("Dest Table", *fieldIt) == tableName) { if (values.empty()) values = "("; else values += ", "; std::string const &instructions = allFields.get("Instructions", *fieldIt); std::string const &sourceField = allFields.get("Source Field", *fieldIt); if (instructions == "file") values += quoteOrNull(allValues.get(sourceField, symbol)); else if (instructions == "literal") values += sourceField; else if ((instructions == "special") && (sourceField == "symbol")) values += quoteOrNull(symbol); else if ((instructions == "special") && (sourceField == "date")) values += dateString; else { std::cerr<<"Invalid field. Instructions=“" <