#include #include #include #include #include #define DUMP_MATCHES_TO_FILE class REBenchmark : public QObject { Q_OBJECT QString data; QByteArray utf8Data; private: void provideData() { QTest::addColumn("regexp"); QTest::newRow("URI") << "([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?"; QTest::newRow("Email") << "([^ @]+)@([^ @]+)"; QTest::newRow("Date") << "([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)"; QTest::newRow("URI|Email") << "([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)"; } private slots: void initTestCase() { QFile file("howto"); QVERIFY(file.open(QIODevice::ReadOnly)); QByteArray contents = file.readAll(); data = QString::fromLatin1(contents.constData()); utf8Data = data.toUtf8(); file.close(); } void PCREBenchmark_data() { provideData(); } void PCREBenchmark() { QFETCH(QString, regexp); QByteArray utf8Regexp = regexp.toUtf8(); const char *errorPtr; int errOffset; int captureCount; QVector ovector; int offset; int result; pcre *code = 0; pcre_extra *extra = 0; #ifdef DUMP_MATCHES_TO_FILE QString filename = QString("%1-%2.txt").arg(QTest::currentTestFunction(), QTest::currentDataTag()); QFile dump(filename); QVERIFY(dump.open(QIODevice::WriteOnly | QIODevice::Truncate)); QTextStream ts(&dump); #endif QBENCHMARK { code = pcre_compile(utf8Regexp.constData(), PCRE_UTF8 | PCRE_UCP | PCRE_NO_UTF8_CHECK, &errorPtr, &errOffset, NULL); QVERIFY(code); extra = pcre_study(code, PCRE_STUDY_JIT_COMPILE, &errorPtr); QVERIFY(!errorPtr); pcre_fullinfo(code, extra, PCRE_INFO_CAPTURECOUNT, &captureCount); ovector.resize((captureCount + 1) * 3); offset = 0; do { result = pcre_exec(code, extra, utf8Data.constData(), utf8Data.size(), offset, PCRE_NO_UTF8_CHECK, ovector.data(), ovector.size()); QVERIFY(result); // either match or error is fine -- but bail out in case of error offset = ovector[1]; #ifdef DUMP_MATCHES_TO_FILE if (result > 0) ts << QString::fromUtf8(utf8Data.constData() + ovector[0], ovector[1] - ovector[0]) << endl; #endif } while (result > 0); QVERIFY(result == PCRE_ERROR_NOMATCH); pcre_free_study(extra); pcre_free(code); } } void ICUBenchmark_data() { provideData(); } void ICUBenchmark() { QFETCH(QString, regexp); // avoid deep copies icu::UnicodeString rx(true, regexp.utf16(), regexp.length()); icu::UnicodeString subject(true, data.utf16(), data.length()); UErrorCode status = U_ZERO_ERROR; UParseError pe; icu::RegexPattern *pattern = 0; icu::RegexMatcher *matcher = 0; #ifdef DUMP_MATCHES_TO_FILE QString filename = QString("%1-%2.txt").arg(QTest::currentTestFunction(), QTest::currentDataTag()); QFile dump(filename); QVERIFY(dump.open(QIODevice::WriteOnly | QIODevice::Truncate)); QTextStream ts(&dump); #endif QBENCHMARK { pattern = icu::RegexPattern::compile(rx, pe, status); QVERIFY(pattern); QVERIFY(!status); matcher = pattern->matcher(subject, status); QVERIFY(matcher); QVERIFY(!status); while (matcher->find()) { #ifdef DUMP_MATCHES_TO_FILE int start = matcher->start(status); QVERIFY(!status); int end = matcher->end(status); QVERIFY(!status); ts << data.mid(start, end - start) << endl; #endif } delete matcher; delete pattern; } } void QRegExpBenchmark_data() { provideData(); } void QRegExpBenchmark() { QFETCH(QString, regexp); #ifdef DUMP_MATCHES_TO_FILE QString filename = QString("%1-%2.txt").arg(QTest::currentTestFunction(), QTest::currentDataTag()); QFile dump(filename); QVERIFY(dump.open(QIODevice::WriteOnly | QIODevice::Truncate)); QTextStream ts(&dump); #endif QBENCHMARK { QRegExp rx(regexp); QVERIFY(rx.isValid()); int pos = 0; while ((pos = rx.indexIn(data, pos)) != -1) { #ifdef DUMP_MATCHES_TO_FILE ts << rx.cap(0) << endl; #endif pos += rx.matchedLength(); } } } }; QTEST_MAIN(REBenchmark) #include "main.moc"