From 3a11ccb85480158428e8e271f596d4f44c95bdcf Mon Sep 17 00:00:00 2001 From: RickyRister <42636155+RickyRister@users.noreply.github.com> Date: Fri, 14 Mar 2025 18:44:13 -0700 Subject: [PATCH] Update cipt parsing (#5712) * refactor * move thing out * write unit tests * get thing to work * optimization? * fix build failure --- oracle/CMakeLists.txt | 1 + oracle/src/oracleimporter.cpp | 5 +- oracle/src/parsehelpers.cpp | 63 +++++++++ oracle/src/parsehelpers.h | 8 ++ tests/CMakeLists.txt | 1 + tests/oracle/CMakeLists.txt | 11 ++ tests/oracle/parse_cipt_test.cpp | 219 +++++++++++++++++++++++++++++++ 7 files changed, 305 insertions(+), 3 deletions(-) create mode 100644 oracle/src/parsehelpers.cpp create mode 100644 oracle/src/parsehelpers.h create mode 100644 tests/oracle/CMakeLists.txt create mode 100644 tests/oracle/parse_cipt_test.cpp diff --git a/oracle/CMakeLists.txt b/oracle/CMakeLists.txt index 6f8a7043a..130dfdaab 100644 --- a/oracle/CMakeLists.txt +++ b/oracle/CMakeLists.txt @@ -15,6 +15,7 @@ set(oracle_SOURCES src/oraclewizard.cpp src/oracleimporter.cpp src/pagetemplates.cpp + src/parsehelpers.cpp src/qt-json/json.cpp ../cockatrice/src/game/cards/card_database.cpp ../cockatrice/src/game/cards/card_database_manager.cpp diff --git a/oracle/src/oracleimporter.cpp b/oracle/src/oracleimporter.cpp index 07aad804d..31cf0e9a5 100644 --- a/oracle/src/oracleimporter.cpp +++ b/oracle/src/oracleimporter.cpp @@ -1,6 +1,7 @@ #include "oracleimporter.h" #include "game/cards/card_database_parser/cockatrice_xml_4.h" +#include "parsehelpers.h" #include "qt-json/json.h" #include @@ -157,9 +158,7 @@ CardInfoPtr OracleImporter::addCard(QString name, // DETECT CARD POSITIONING INFO // cards that enter the field tapped - QRegularExpression ciptRegex("( it|" + QRegularExpression::escape(name) + - ") enters( the battlefield)? tapped(?! unless)"); - bool cipt = ciptRegex.match(text).hasMatch(); + bool cipt = parseCipt(name, text); bool landscapeOrientation = properties.value("maintype").toString() == "Battle" || properties.value("layout").toString() == "split" || diff --git a/oracle/src/parsehelpers.cpp b/oracle/src/parsehelpers.cpp new file mode 100644 index 000000000..c7f340139 --- /dev/null +++ b/oracle/src/parsehelpers.cpp @@ -0,0 +1,63 @@ +#include "parsehelpers.h" + +#include +#include + +/** + * Parses the card text to determine if the card should have the cipt tag + * + * The parsing logic is able to handle the following cases: + * - " enters tapped" + * - " enters tapped", if the card name starts with the shortname + * - "This enters tapped" + * - "..., it enters tapped" + * - Any naming scheme that appends a non-alphanumeric character plus extra text to the end of the name. + * (e.g. name is "Card Name_SET" or "Card Name (Set)" and text contains "Card Name enters tapped") + * + * However, it will still miss on certain cases: + * - shortnames that aren't the at the beginning of the card name + * + * Note that "...enters tapped unless..." returns false. + * + * @param name The name of the card + * @param text The oracle text of the card + */ +bool parseCipt(const QString &name, const QString &text) +{ + // Try to split shortname on most non-alphanumeric characters (including _) + auto isShortnameDivider = [](const QChar &c) { + return c == '_' || (!c.isLetterOrNumber() && c != '\'' && c != '\"'); + }; + + // Try all possible shortnames. + // This also handles the case of extra text appended at end. + QStringList possibleNames; + bool inAlphanumericPart = true; + for (int i = 0; i < name.length(); ++i) { + if (isShortnameDivider(name.at(i))) { + if (inAlphanumericPart) { + // only add to names on a "falling edge", in order to reduce the amount of redundant splits + possibleNames.append(QRegularExpression::escape(name.left(i))); + inAlphanumericPart = false; + } + } else { + inAlphanumericPart = true; + } + } + + // and the full name + possibleNames.append(QRegularExpression::escape(name)); + + QString subject = "(it|" // "..., it enters tapped" + "(T|t)his [^ ]+|" // "This enters tapped" + + possibleNames.join("|") + ")"; + + auto ciptPattern = QRegularExpression( + // cipt phrase is either first sentence of line, or is after a punctuation mark + "(^|(, |\\. ))" + subject + + // support old wording, and exclude the "unless" case + " enters( the battlefield)? tapped(?! unless)", + QRegularExpression::MultilineOption); + + return ciptPattern.match(text).hasMatch(); +} diff --git a/oracle/src/parsehelpers.h b/oracle/src/parsehelpers.h new file mode 100644 index 000000000..40a36c753 --- /dev/null +++ b/oracle/src/parsehelpers.h @@ -0,0 +1,8 @@ +#ifndef PARSEHELPERS_H +#define PARSEHELPERS_H + +#include + +bool parseCipt(const QString &name, const QString &text); + +#endif // PARSEHELPERS_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d9e6b6345..f6b2d12d2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -51,3 +51,4 @@ target_link_libraries(password_hash_test cockatrice_common Threads::Threads ${GT add_subdirectory(carddatabase) add_subdirectory(loading_from_clipboard) +add_subdirectory(oracle) diff --git a/tests/oracle/CMakeLists.txt b/tests/oracle/CMakeLists.txt new file mode 100644 index 000000000..0c11eb751 --- /dev/null +++ b/tests/oracle/CMakeLists.txt @@ -0,0 +1,11 @@ +add_executable(parse_cipt_test ../../oracle/src/parsehelpers.cpp parse_cipt_test.cpp) + +if(NOT GTEST_FOUND) + add_dependencies(parse_cipt_test gtest) +endif() + +set(TEST_QT_MODULES ${COCKATRICE_QT_VERSION_NAME}::Widgets) + +target_link_libraries(parse_cipt_test cockatrice_common Threads::Threads ${GTEST_BOTH_LIBRARIES} ${TEST_QT_MODULES}) + +add_test(NAME parse_cipt_test COMMAND parse_cipt_test) diff --git a/tests/oracle/parse_cipt_test.cpp b/tests/oracle/parse_cipt_test.cpp new file mode 100644 index 000000000..f5036f962 --- /dev/null +++ b/tests/oracle/parse_cipt_test.cpp @@ -0,0 +1,219 @@ +#include "../../oracle/src/parsehelpers.h" + +#include "gtest/gtest.h" + +TEST(ParseCiptTest, parsesThisEntersTapped) +{ + auto name = "Boring Fields"; + auto text = "This land enters tapped."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesThisEntersTheBattlefieldTapped) +{ + auto name = "Boring Fields"; + auto text = "This land enters the battlefield tapped.\n" + "{T}: Add {G}."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesItEntersTappedAtEndOfSentence) +{ + auto name = "Shocking Fields"; + auto text = "As this land enters, you may pay 2 life. If you don't, it enters tapped.\n" + "{T}: Add {G}."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesThisEntersTappedWhenNotOnFirstLine) +{ + auto name = "Boring Fields"; + auto text = "Flying\n" + "This land enters tapped.\n" + "{T}: Add {G}."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesFullNameWithUnderscoreAppendedText) +{ + auto name = "Boring Fields_SL50"; + auto text = "Boring Fields enters tapped.\n" + "{T}: Add {G}."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesFullNameWithBracketsAppendedText) +{ + auto name = "Boring Fields (SL50)"; + auto text = "Boring Fields enters tapped.\n" + "{T}: Add {G}."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesFullNameWithComma) +{ + auto name = "Bob, the Legend"; + auto text = "Bob, the Legend enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesFullNameWithCommaAtEndOfSentence) +{ + auto name = "Bob, the Legend"; + auto text = "As Bob, the Legend enters, you may pay 2 life. If you don't, Bob, the Legend enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesFullNameWithApostropheAtEndOfSentence) +{ + auto name = "Bob's Bobber"; + auto text = "As Bob's Bobber enters, you may pay 2 life. If you don't, Bob's Bobber enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesShortnameEndingWithComma) +{ + auto name = "Bob, the Legend"; + auto text = "Bob enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesShortnameEndingWithSpace) +{ + auto name = "Bob the Legend"; + auto text = "Bob enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithComma) +{ + auto name = "Bob Dod, the Legend"; + auto text = "Bob Dod enters tapped.\n" + "Whenever Bob Dod attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithSpace) +{ + auto name = "Bob Dod the Legend"; + auto text = "Bob Dod enters tapped.\n" + "Whenever Bob Dod attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesShortnameEndingWithSpaceWithUnderscoreAppendedText) +{ + auto name = "Bob the Legend_SL50"; + auto text = "Bob enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesShortnameEndingWithSpaceWithBracketsAppendedText) +{ + auto name = "Bob the Legend (SL50)"; + auto text = "Bob enters tapped.\n" + "Whenever Bob attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithSpaceWithUnderscoreAppendedText) +{ + auto name = "Bob Dod the Legend_SL50"; + auto text = "Bob Dod enters tapped.\n" + "Whenever Bob Dod attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithSpaceWithBracketsAppendedText) +{ + auto name = "Bob Dod the Legend (SL50)"; + auto text = "Bob Dod enters tapped.\n" + "Whenever Bob Dod attacks, you win the game."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, rejectsEmptyText) +{ + auto name = "Vanilla Dude"; + auto text = ""; + + ASSERT_FALSE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, rejectsEntersTappedUnless) +{ + auto name = "Fast Fields"; + auto text = "This land enters tapped unless you control another land."; + + ASSERT_FALSE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, rejectsWhenNameIsDifferent) +{ + auto name = "Boring Fields"; + auto text = "Fast Fields enters tapped."; + + ASSERT_FALSE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, rejectsOtherCreaturesEnterTapped) +{ + auto name = "Imposing Guy"; + auto text = "Other creatures enter tapped."; + + ASSERT_FALSE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, rejectsAbilityGrantingEntersTapped) +{ + auto name = "Imposing Guy"; + auto text = "Other creatures have \"This creature enters tapped\"."; + + ASSERT_FALSE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, parsesEntersTappedAndAbilityGrantingEntersTappedOnSameCard) +{ + auto name = "Imposing Guy"; + auto text = "This creature enters tapped." + "Other creatures have \"This creature enters tapped\"."; + + ASSERT_TRUE(parseCipt(name, text)); +} + +TEST(ParseCiptTest, rejectsItEntersTappedAndAttacking) +{ + auto name = "Token Maker"; + auto text = "When Token Maker attacks, create a token. It enters tapped and attacking."; + + ASSERT_FALSE(parseCipt(name, text)); +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}