#include #include #include void empty_symbol() { } template struct identify; template struct number_id; #if !CTRE_CNTTP_COMPILER_CHECK #define CTRE_CREATE(pattern) (pattern ## _ctre) #define CTRE_SYNTAX(pattern) (pattern ## _ctre_syntax) #define CTRE_GEN(pattern) decltype(pattern ## _ctre_gen) #else template constexpr auto create() { constexpr auto _input = input; using tmp = typename ctll::parser::template output>; static_assert(tmp(), "Regular Expression contains syntax error."); using re = decltype(front(typename tmp::output_type::stack_type())); return ctre::regular_expression(re()); } template constexpr bool syntax() { constexpr auto _input = input; return ctll::parser::template correct_with>; } template constexpr auto gen() { constexpr auto _input = input; using tmp = typename ctll::parser::template output>; static_assert(tmp(), "Regular Expression contains syntax error."); return typename tmp::output_type::stack_type(); } #define CTRE_GEN(pattern) decltype(gen()) #define CTRE_CREATE(pattern) create() #define CTRE_SYNTAX(pattern) syntax() #endif using namespace ctre::literals; using namespace ctre::test_literals; using namespace std::string_view_literals; // UTS #18 Level 1: RL1.1: Hex Notation static_assert(CTRE_CREATE(U"\\u{1F92A}").match(U"๐Ÿคช")); static_assert(CTRE_CREATE(U"\\u20AC").match(U"โ‚ฌ")); // TODO multiple character inside \u{AA BB CC} // TODO deal with normalization 1.1.1 // UTS #18 Level 1: RL1.2: Properties // TODO only \p and \P is not supported static_assert(CTRE_SYNTAX(U"\\p{L}")); static_assert(CTRE_CREATE(U"[\\p{L}]").match("A")); static_assert(CTRE_CREATE(U"[\\p{L}]+").match("ABC")); static_assert(CTRE_CREATE(U"[\\P{L}]").match("1")); static_assert(CTRE_CREATE(U"[\\P{L}]+").match("123")); static_assert(CTRE_SYNTAX(U"\\p{Letter}")); static_assert(CTRE_CREATE(U"\\P{Letter}").match(U"1")); static_assert(CTRE_CREATE(U"\\P{latin}").match(U"ะ„")); static_assert(CTRE_CREATE(U"[^\\p{latin}\\p{script=Greek}]").match(U"ืฉ")); static_assert(CTRE_CREATE(U"\\p{Letter}+").match(u8"abcDEF")); static_assert(CTRE_CREATE(U"\\p{Letter}+").match(U"abcDEF")); static_assert(CTRE_CREATE(U"\\p{Ll}+").match(U"abcdef")); static_assert(CTRE_CREATE(U"\\p{Lu}+").match(U"ABCD")); static_assert(!CTRE_CREATE(U"\\p{Lu}+").match(U"ABcD")); static_assert(CTRE_CREATE(U"\\p{Nd}+").match(U"1234567890")); static_assert(!CTRE_CREATE(U"\\p{Nd}+").match(U"1234567890h")); static_assert(CTRE_CREATE(U"\\p{script=Latin}+").match(U"abcd")); static_assert(CTRE_CREATE(U"\\p{script=Greek}+").match(U"ฮฒฮฉ")); static_assert(!CTRE_CREATE(U"\\p{script=Latin}+").match(U"ฮฒฮฉ")); static_assert(!CTRE_CREATE(U"\\p{script=Greek}+").match(U"abcd")); #if __cpp_char8_t >= 201811 static_assert(CTRE_CREATE(U"\\p{emoji}+").match(u8"๐Ÿคช๐Ÿ˜")); static_assert(CTRE_CREATE("\\p{emoji}+").match(u8"๐Ÿคช๐Ÿ˜")); #endif static_assert(CTRE_CREATE(U"\\p{emoji}+").match(U"๐Ÿคช๐Ÿ˜โœจ\U0001F3F3")); static_assert(CTRE_SYNTAX(U"\\p{sc=greek}+?\\p{Emoji}\\p{sc=greek}+?")); static_assert(CTRE_CREATE(U"\\p{sc=greek}+?\\p{Emoji}").match(U"ฮฑฮฉ๐Ÿ˜")); static_assert(CTRE_CREATE(U"\\p{sc=greek}+?\\p{Emoji}\\p{sc=greek}+?").match(U"ฮฑ๐Ÿ˜ฮฉ")); static_assert(CTRE_SYNTAX(U"\\p{age=10.0}")); static_assert(CTRE_CREATE(U"\\p{age=10.0}").match(U"๐Ÿคฉ")); static_assert(CTRE_CREATE(U"\\p{block=misc_pictographs}").match(U"๐ŸŽ‰")); static_assert(CTRE_CREATE(U"\\p{scx=Hira}+").match(U"ใ‚–")); //identify a; //identify i; //identify a; //identify b; #if __cpp_char8_t static_assert(CTRE_SYNTAX(u8"a+")); static_assert(CTRE_SYNTAX(u8"๐Ÿ˜+")); static_assert(CTRE_CREATE(u8"๐Ÿ˜").match(U"๐Ÿ˜")); static_assert(CTRE_CREATE(u8"๐Ÿ˜+").match(U"๐Ÿ˜")); static_assert(CTRE_CREATE(u8"๐Ÿ˜+").match(U"๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜")); static_assert(CTRE_CREATE(u8"[๐Ÿ˜a\\x{1F92A}]+").match(U"๐Ÿ˜a๐Ÿ˜aa๐Ÿ˜๐Ÿ˜a๐Ÿคช")); static_assert(!CTRE_CREATE(u8"[๐Ÿ˜a\\x{1F92A}]+").match(U"๐Ÿ˜a๐Ÿ˜aa๐Ÿ˜๐Ÿ˜a๐Ÿคชx")); constexpr auto m1 = CTRE_CREATE(u8"[๐Ÿ˜a-z\\x{1F92A}]+").match(U"abc๐Ÿ˜๐Ÿ˜xyz"); static_assert(m1.to_view().length() == 8); #endif static_assert(CTRE_SYNTAX(U"a+")); static_assert(CTRE_SYNTAX(U"๐Ÿ˜+")); static_assert(CTRE_CREATE(U"๐Ÿ˜").match(U"๐Ÿ˜")); static_assert(CTRE_CREATE(U"๐Ÿ˜+").match(U"๐Ÿ˜")); static_assert(CTRE_CREATE(U"๐Ÿ˜+").match(U"๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜")); static_assert(CTRE_CREATE(U"[๐Ÿ˜a\\x{1F92A}]+").match(U"๐Ÿ˜a๐Ÿ˜aa๐Ÿ˜๐Ÿ˜a๐Ÿคช")); static_assert(!CTRE_CREATE(U"[๐Ÿ˜a\\x{1F92A}]+").match(U"๐Ÿ˜a๐Ÿ˜aa๐Ÿ˜๐Ÿ˜a๐Ÿคชx")); constexpr auto m2 = CTRE_CREATE(U"[๐Ÿ˜a-z\\x{1F92A}]+").match(U"abc๐Ÿ˜๐Ÿ˜xyz"); static_assert(m2.to_view().length() == 8); #if __cpp_char8_t >= 201811 static_assert(CTRE_CREATE(u8"๐Ÿ˜+").match(u8"๐Ÿ˜๐Ÿ˜๐Ÿ˜")); static_assert(CTRE_CREATE(U"[ฤ›ลกฤล™abc]+").match(U"ฤ›ฤ›cฤ›ฤ›aล™")); static_assert(CTRE_CREATE(u"ฤ›ลกฤล™").match(u8"ฤ›ลกฤล™")); static_assert(CTRE_CREATE(L"ฤ›ลกฤล™").match(u8"ฤ›ลกฤล™")); static_assert(CTRE_CREATE(u8"ฤ›ลกฤล™").match(u8"ฤ›ลกฤล™")); #endif static_assert(CTRE_SYNTAX("\\p{Latin}")); static_assert(!CTRE_SYNTAX("\\p{Latin42}")); static_assert(CTRE_CREATE("\\p{Latin}").match("a"sv)); static_assert(!CTRE_CREATE("\\p{Emoji}").match("a"sv));