From 57beebdfd90e7b7469a3c4a2d2b4130b4ce672f0 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 11 Oct 2023 13:05:07 +0800 Subject: [PATCH] ... --- build.sh | 5 ++ include/pocketpy/lexer.h | 3 +- prebuild.py | 3 +- src/lexer.cpp | 109 ++++++++++++++++++++++++++++++++------- src/pocketpy.cpp | 9 ++-- 5 files changed, 102 insertions(+), 27 deletions(-) diff --git a/build.sh b/build.sh index 473b1f7d..a545c611 100644 --- a/build.sh +++ b/build.sh @@ -21,6 +21,11 @@ echo "> Running prebuild.py... " python3 prebuild.py +if [ $? -ne 0 ]; then + echo "prebuild.py failed." + exit 1 +fi + SRC=$(find src/ -name "*.cpp") echo "> Compiling and linking source files... " diff --git a/include/pocketpy/lexer.h b/include/pocketpy/lexer.h index b2652f79..68b5f460 100644 --- a/include/pocketpy/lexer.h +++ b/include/pocketpy/lexer.h @@ -48,7 +48,6 @@ const std::map kTokenKwMap = [](){ return map; }(); - struct Token{ TokenIndex type; const char* start; @@ -139,4 +138,6 @@ struct Lexer { std::vector run(); }; +bool parse_int(std::string_view text, i64* out, int base=10); + } // namespace pkpy diff --git a/prebuild.py b/prebuild.py index b19723b4..013b3c6b 100644 --- a/prebuild.py +++ b/prebuild.py @@ -4,7 +4,8 @@ from datetime import datetime def generate_python_sources(): sources = {} for file in os.listdir("python"): - assert file.endswith(".py") + if not file.endswith(".py"): + continue key = file.split(".")[0] with open("python/" + file) as f: value = f.read() diff --git a/src/lexer.cpp b/src/lexer.cpp index b44eb602..8c5eae45 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -281,28 +281,35 @@ static bool is_unicode_Lo_char(uint32_t c) { SyntaxError("binary/hex/octal literal should not contain a dot"); } - try{ - int base = 10; + int base = 10; + if (m[1].matched) { + char tag = m[1].first.base()[1]; + switch (tag) { + case 'x': base = 16; break; + case 'o': base = 8; break; + case 'b': base = 2; break; + default: FATAL_ERROR(); + } + } + if (m[2].matched) { + // float point number + f64 out; size_t size; - if (m[1].matched) { - if (m[1].str() == "0b") base = 2; - else if (m[1].str() == "0o") base = 8; - else base = 16; + try{ + out = Number::stof(m[0], &size); + PK_ASSERT((int)size == (int)m[0].length()); + }catch(...){ + SyntaxError("invalid number literal"); } - if (m[2].matched) { - PK_ASSERT(base == 10); - add_token(TK("@num"), Number::stof(m[0], &size)); - } else { - // If we're base 8/2, chop off the "o" - std::string match = m[0].str(); - if (base == 8 || base == 2) match.erase(1, 1); - add_token(TK("@num"), (i64)std::stoll(match, &size, base)); + add_token(TK("@num"), out); + } else { + std::string_view text(m[0].first.base(), m[0].length()); + i64 out; + bool ok = parse_int(text, &out, base); + if(!ok){ + SyntaxError("invalid number literal for base " + std::to_string(base)); } - // HACK: We need to check length-1 for octal since python octals are "0o..." and c/c++ octals are "0..." - if (base == 8 || base == 2) {PK_ASSERT((int)size == (int)m.length()-1);} - else {PK_ASSERT((int)size == (int)m.length());} - }catch(...){ - SyntaxError("invalid number literal"); + add_token(TK("@num"), out); } } @@ -475,4 +482,68 @@ static bool is_unicode_Lo_char(uint32_t c) { return std::move(nexts); } +bool parse_int(std::string_view text, i64* out, int base){ + // TODO: detect overflow + *out = 0; + + const auto f_startswith_2 = [](std::string_view t, const char* prefix) -> bool{ + if(t.length() < 2) return false; + return t[0] == prefix[0] && t[1] == prefix[1]; + }; + + if(base == 10){ + // 10-base 12334 + if(text.length() == 0) return false; + for(char c : text){ + if(c >= '0' && c <= '9'){ + *out = (*out * 10) + (c - '0'); + }else{ + return false; + } + } + return true; + }else if(base == 2){ + // 2-base 0b101010 + if(f_startswith_2(text, "0b")) text.remove_prefix(2); + if(text.length() == 0) return false; + for(char c : text){ + if(c == '0' || c == '1'){ + *out = (*out << 1) | (c - '0'); + }else{ + return false; + } + } + return true; + }else if(base == 8){ + // 8-base 0o123 + if(f_startswith_2(text, "0o")) text.remove_prefix(2); + if(text.length() == 0) return false; + for(char c : text){ + if(c >= '0' && c <= '7'){ + *out = (*out << 3) | (c - '0'); + }else{ + return false; + } + } + return true; + }else if(base == 16){ + // 16-base 0x123 + if(f_startswith_2(text, "0x")) text.remove_prefix(2); + if(text.length() == 0) return false; + for(char c : text){ + if(c >= '0' && c <= '9'){ + *out = (*out << 4) | (c - '0'); + }else if(c >= 'a' && c <= 'f'){ + *out = (*out << 4) | (c - 'a' + 10); + }else if(c >= 'A' && c <= 'F'){ + *out = (*out << 4) | (c - 'A' + 10); + }else{ + return false; + } + } + return true; + } + return false; +} + } // namespace pkpy diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index c42ead36..5de99ff2 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -434,14 +434,11 @@ void init_builtins(VM* _vm) { int base = 10; if(args.size() == 1+2) base = CAST(i64, args[2]); const Str& s = CAST(Str&, args[1]); - try{ - size_t parsed = 0; - i64 val = std::stoll(s.str(), &parsed, base); - PK_ASSERT(parsed == s.length()); - return VAR(val); - }catch(...){ + i64 val; + if(!parse_int(s.sv(), &val, base)){ vm->ValueError("invalid literal for int(): " + s.escape()); } + return VAR(val); } vm->TypeError("invalid arguments for int()"); return vm->None;