#include #include #include #include #include #include #include #include #include #include #define RAPIDXML_NO_EXCEPTIONS #include "rapidxml.hpp" //#include "../other_parsers/pugixml/pugixml.hpp" //#include "../other_parsers/tinyxml/tinyxml.h" #define PUGAPI_VARIANT 0x58475560 #define PUGAPI_VERSION_MAJOR 1 #define PUGAPI_VERSION_MINOR 2 //#include "../other_parsers/pugxml/pugxml.h" using namespace std; #define ulong unsigned long long // Buffer used for parsing vector buffer; namespace rapidxml { void parse_error_handler(const char *what, void *where_void) { // Print data around error char *where = reinterpret_cast(where_void); const char *begin = (std::max)(where - 50, &buffer.front()); const char *end = (std::min)(where + 50, &buffer.back() + 1); int ch = *where; std::cout << ch << "\n"; *where = '#'; std::cout << "rapidxml::parse_error: " << what << " at \n*** BEGIN (error position marked by # character)\n" << string(begin, end) << "\n*** END\n"; std::exit(1); } } /////////////////////////////////////////////////////////////////////////////// // Timing, only works on x86, MSVC or gcc #if defined(_MSC_VER) typedef __int64 tick_t; inline tick_t ticks() { __asm __emit 0fh __asm __emit 031h // RDTSC, result conveniently in EDX:EAX } #elif defined(__GNUC__) typedef long long tick_t; inline tick_t ticks() { tick_t result; __asm__ __volatile__ ("rdtsc" : "=A"(result)); return result; } #else #error "This test is only supported on MSVC or gcc, under x86" #endif /////////////////////////////////////////////////////////////////////////////// // Parsers template struct rapidxml_parser { rapidxml::xml_document doc; void parse(char *data) { doc.parse(data); } }; /* struct tinyxml_parser { TiXmlDocument doc; void parse(char *data) { doc.Parse(data, 0, TIXML_ENCODING_UTF8); } }; template struct pugixml_parser { pugi::xml_document doc; void parse(char *data) { doc.parse(data, Flags); } }; template struct pugxml_parser { pug::xml_parser parser; void parse(char *data) { parser.parse(data, Flags); } }; */ struct strlen_parser { static size_t length; // static to prevent it from being optimized away along with the timed strlen() call void parse(char *data) { length = strlen(data); } }; size_t strlen_parser::length; /////////////////////////////////////////////////////////////////////////////// // Test template void test(const char *filename, const char *description) { // Load data ifstream stream(filename, ios::binary); if (!stream) throw runtime_error(string("cannot open file ") + filename); stream.unsetf(ios::skipws); stream.seekg(0, ios::end); size_t size = stream.tellg(); stream.seekg(0); vector data(size + 1); stream.read(&data.front(), static_cast(size)); data[size] = '\0'; // Determine minimum CPU cycles it takes to parse test data // A very large number of mesurements is taken over several seconds. // The smallest one wins. // If parsing time is less than average time between interrupts, // there is a good chance that there will be at least one measurement // undisturbed by context switching and other OS activity. // A file of 50kB at 10 cycles/char takes 500,000 cycles to parse. // On 2 GHz CPU this is 1/4000 of a second. // During 2 seconds (taking into account restoring of the data), this file is parsed // several thousands of times. tick_t min = 0; clock_t start = std::clock(); while (std::clock() < start + 2 * CLOCKS_PER_SEC) // 2 seconds { buffer = data; // Make a copy of data (this must be done every time because parsing destroys the data) Parser parser; // Creation and destruction of parser not timed char *xml = &buffer.front(); tick_t t1 = ticks(); // 1st timing parser.parse(xml); tick_t t2 = ticks(); // 2nd timing if (min == 0 || t2 - t1 < min) min = t2 - t1; } // Return minimum cycles/character cout << " " << fixed << setprecision(1) << double(min) / size << " cycles/char " << description << "\n"; } ulong timer() { timeval tv; if( gettimeofday( &tv, NULL) ) throw new std::exception; return ((ulong) tv.tv_sec * 1000000) + tv.tv_usec; } const double multiplier = 1.0 / 1000000.0; struct Timer { private: ulong started; public: Timer() : started(0) {} void start() { started = timer(); } double stop() { return multiplier * (timer() - started); } }; vector load(const char* filename) { ifstream stream(filename, ios::binary); if (!stream) throw runtime_error(string("cannot open file ") + filename); stream.unsetf(ios::skipws); stream.seekg(0, ios::end); size_t size = stream.tellg(); assert(size); printf("Document Length: %d bytes\n", size); stream.seekg(0); vector data(size + 1); stream.read(&data.front(), static_cast(size)); data[size] = '\0'; printf("Data Length: %d bytes\n", data.size()); return data; } double rapidXmlFastest(const char* data, int iterations) { rapidxml_parser parser; size_t len = strlen(data) + 1; char* xml = (char*)malloc(len); strncpy(xml, data, len); Timer t; t.start(); for(int i = 0; i < iterations; ++i) { //strncpy(xml, data, len); parser.parse(xml); } double time = t.stop(); free(xml); return time; } double rapidXmlDefault(const char* data, int iterations) { rapidxml_parser parser; size_t len = strlen(data) + 1; char* xml = (char*)malloc(len); strncpy(xml, data, len); Timer t; t.start(); for(int i = 0; i < iterations; ++i) { strncpy(xml, data, len); parser.parse(xml); } double time = t.stop(); free(xml); return time; } double testStrlen(char* data, int iterations) { strlen_parser parser; Timer t; t.start(); for(int i = 0; i < iterations; ++i) { parser.parse(data); } double time = t.stop(); return time; } void test_all(const char *filename) { using namespace rapidxml; printf("\nFile %s\n", filename); printf(" strlen:\n"); test(filename, "strlen() on XML data"); printf(" rapidxml:\n"); test >(filename, "mode=fastest"); test >(filename, "mode=default"); //printf(" pugixml:\n"); //test >(filename, "mode=fastest"); // test >(filename, "mode=default"); // printf(" pugxml:\n"); // test >(filename, "mode=fastest"); //printf(" tinyxml:\n"); //test(filename, "mode=default"); } int main() { try { vector data = load("speed_test.xml"); char* xml; size_t len = data.size(); xml = (char*)malloc(len); strncpy(xml, (const char*)&data.front(), len); int iterations = 1000; //test_all("../xml_files/hamlet.xml"); for(int i = 0; i < 10; ++i) { double time = rapidXmlFastest(xml, iterations); printf("Fastest:%f MB/s\n", (len * iterations) / (time * (1024 * 1024) ) ); } for(int i = 0; i < 10; ++i) { double time = rapidXmlDefault(xml, iterations); printf("Default:%f MB/s\n", (len * iterations) / (time * (1024 * 1024) ) ); } for(int i = 0; i < 10; ++i) { double time = testStrlen(xml, iterations); printf("strlen:%f MB/s\n", (len * iterations) / (time * (1024 * 1024) ) ); } //for(int i = 0; i < 10; ++i) // printf("Fastest:%d\n", rapidXmlDefault(data, 1)); return 0; } catch (exception &e) { std::cout << "Error: " << e.what() << "\n"; return 1; } }