diff --git a/build.sh b/build.sh index 12648298..bdfd1760 100644 --- a/build.sh +++ b/build.sh @@ -21,6 +21,11 @@ echo "> Running prebuild.py... " python3 prebuild.py +if [ $? -ne 0 ]; then + echo "prebuild.py failed." + exit 1 +fi + SRC=$(find src/ -name "*.cpp") echo "> Compiling and linking source files... " diff --git a/include/pocketpy/lexer.h b/include/pocketpy/lexer.h index b2652f79..68b5f460 100644 --- a/include/pocketpy/lexer.h +++ b/include/pocketpy/lexer.h @@ -48,7 +48,6 @@ const std::map kTokenKwMap = [](){ return map; }(); - struct Token{ TokenIndex type; const char* start; @@ -139,4 +138,6 @@ struct Lexer { std::vector run(); }; +bool parse_int(std::string_view text, i64* out, int base=10); + } // namespace pkpy diff --git a/prebuild.py b/prebuild.py index b19723b4..013b3c6b 100644 --- a/prebuild.py +++ b/prebuild.py @@ -4,7 +4,8 @@ from datetime import datetime def generate_python_sources(): sources = {} for file in os.listdir("python"): - assert file.endswith(".py") + if not file.endswith(".py"): + continue key = file.split(".")[0] with open("python/" + file) as f: value = f.read() diff --git a/src/lexer.cpp b/src/lexer.cpp index f033d504..8c5eae45 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -260,7 +260,7 @@ static bool is_unicode_Lo_char(uint32_t c) { } void Lexer::eat_number() { - PK_LOCAL_STATIC const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?(L)?"); + PK_LOCAL_STATIC const std::regex pattern("^(0[xob])?[0-9a-fA-F]+(\\.[0-9]+)?(L)?"); std::smatch m; const char* i = token_start; @@ -278,22 +278,38 @@ static bool is_unicode_Lo_char(uint32_t c) { } if(m[1].matched && m[2].matched){ - SyntaxError("hex literal should not contain a dot"); + SyntaxError("binary/hex/octal literal should not contain a dot"); } - try{ - int base = 10; - size_t size; - if (m[1].matched) base = 16; - if (m[2].matched) { - PK_ASSERT(base == 10); - add_token(TK("@num"), Number::stof(m[0], &size)); - } else { - add_token(TK("@num"), (i64)std::stoll(m[0], &size, base)); + int base = 10; + if (m[1].matched) { + char tag = m[1].first.base()[1]; + switch (tag) { + case 'x': base = 16; break; + case 'o': base = 8; break; + case 'b': base = 2; break; + default: FATAL_ERROR(); } - PK_ASSERT((int)size == (int)m.length()); - }catch(...){ - SyntaxError("invalid number literal"); + } + if (m[2].matched) { + // float point number + f64 out; + size_t size; + try{ + out = Number::stof(m[0], &size); + PK_ASSERT((int)size == (int)m[0].length()); + }catch(...){ + SyntaxError("invalid number literal"); + } + add_token(TK("@num"), out); + } else { + std::string_view text(m[0].first.base(), m[0].length()); + i64 out; + bool ok = parse_int(text, &out, base); + if(!ok){ + SyntaxError("invalid number literal for base " + std::to_string(base)); + } + add_token(TK("@num"), out); } } @@ -466,4 +482,68 @@ static bool is_unicode_Lo_char(uint32_t c) { return std::move(nexts); } -} // namespace pkpy \ No newline at end of file +bool parse_int(std::string_view text, i64* out, int base){ + // TODO: detect overflow + *out = 0; + + const auto f_startswith_2 = [](std::string_view t, const char* prefix) -> bool{ + if(t.length() < 2) return false; + return t[0] == prefix[0] && t[1] == prefix[1]; + }; + + if(base == 10){ + // 10-base 12334 + if(text.length() == 0) return false; + for(char c : text){ + if(c >= '0' && c <= '9'){ + *out = (*out * 10) + (c - '0'); + }else{ + return false; + } + } + return true; + }else if(base == 2){ + // 2-base 0b101010 + if(f_startswith_2(text, "0b")) text.remove_prefix(2); + if(text.length() == 0) return false; + for(char c : text){ + if(c == '0' || c == '1'){ + *out = (*out << 1) | (c - '0'); + }else{ + return false; + } + } + return true; + }else if(base == 8){ + // 8-base 0o123 + if(f_startswith_2(text, "0o")) text.remove_prefix(2); + if(text.length() == 0) return false; + for(char c : text){ + if(c >= '0' && c <= '7'){ + *out = (*out << 3) | (c - '0'); + }else{ + return false; + } + } + return true; + }else if(base == 16){ + // 16-base 0x123 + if(f_startswith_2(text, "0x")) text.remove_prefix(2); + if(text.length() == 0) return false; + for(char c : text){ + if(c >= '0' && c <= '9'){ + *out = (*out << 4) | (c - '0'); + }else if(c >= 'a' && c <= 'f'){ + *out = (*out << 4) | (c - 'a' + 10); + }else if(c >= 'A' && c <= 'F'){ + *out = (*out << 4) | (c - 'A' + 10); + }else{ + return false; + } + } + return true; + } + return false; +} + +} // namespace pkpy diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index c42ead36..5de99ff2 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -434,14 +434,11 @@ void init_builtins(VM* _vm) { int base = 10; if(args.size() == 1+2) base = CAST(i64, args[2]); const Str& s = CAST(Str&, args[1]); - try{ - size_t parsed = 0; - i64 val = std::stoll(s.str(), &parsed, base); - PK_ASSERT(parsed == s.length()); - return VAR(val); - }catch(...){ + i64 val; + if(!parse_int(s.sv(), &val, base)){ vm->ValueError("invalid literal for int(): " + s.escape()); } + return VAR(val); } vm->TypeError("invalid arguments for int()"); return vm->None; diff --git a/tests/01_int.py b/tests/01_int.py index 07b8d793..3a27aeb4 100644 --- a/tests/01_int.py +++ b/tests/01_int.py @@ -5,6 +5,14 @@ assert 0x7fffffff == 2147483647 # test 64-bit assert 2**60-1 + 546 - 0xfffffffffffff == 1148417904979477026 +# test oct literals +assert 0o1234 == 668 +assert 0o17777777777 == 2147483647 + +# test binary literals +assert 0b10010 == 18 +assert 0b11111111111111111111111111111111 == 4294967295 + # test == != >= <= < > assert -1 == -1 assert -1 != 1