diff --git a/.gitignore b/.gitignore index 340a340a..91ed038b 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ pocketpy.exe main.obj pocketpy.exp pocketpy.lib -APPS \ No newline at end of file +APPS +build \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..48b145c0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.10) + +project(pocketpy) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +execute_process( + COMMAND python prebuild.py + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + RESULT_VARIABLE PREBUILD_RESULT +) + +if(NOT ${PREBUILD_RESULT} EQUAL 0) + message(FATAL_ERROR "Prebuild failed with code ${PREBUILD_RESULT}") +endif() + +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR- /EHsc /utf-8 /O2") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -fexceptions -O2") +endif() + +find_program(CLANGPP clang++) +if(CLANGPP) + message(STATUS "Using clang with libc++") + set(CMAKE_CXX_COMPILER ${CLANGPP}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") +endif() + +include_directories(${CMAKE_CURRENT_LIST_DIR}/include) + +aux_source_directory(${CMAKE_CURRENT_LIST_DIR}/src POCKETPY_SRC) + +option(BUILD_EXE "Build executable" ON) + +if(BUILD_EXE) + message(STATUS "Building executable") + add_executable(${PROJECT_NAME} ${POCKETPY_SRC} src2/main.cpp) +else() + message(STATUS "Building library") + add_library(${PROJECT_NAME} SHARED ${POCKETPY_SRC} src2/lib.cpp) +endif() \ No newline at end of file diff --git a/amalgamate.py b/amalgamate.py index 9763af0e..17c49faa 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -1,14 +1,14 @@ import os -os.system("python3 preprocess.py") +os.system("python3 prebuild.py") -with open("src/opcodes.h", "rt", encoding='utf-8') as f: - OPCODES_TEXT = f.read() +with open("include/pocketpy/opcodes.h", "rt", encoding='utf-8') as f: + OPCODES_TEXT = '\n' + f.read() + '\n' pipeline = [ ["config.h", "common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], ["obj.h", "dict.h", "codeobject.h", "frame.h"], - ["gc.h", "vm.h", "expr.h", "compiler.h", "repl.h"], + ["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"], ["_generated.h", "cffi.h", "iter.h", "base64.h", "random.h", "re.h", "linalg.h", "easing.h", "io.h"], ["export.h", "pocketpy.h"] ] @@ -24,24 +24,41 @@ import time if os.path.exists("amalgamated"): shutil.rmtree("amalgamated") - time.sleep(0.6) + time.sleep(0.5) os.mkdir("amalgamated") def remove_copied_include(text): text = text.replace("#pragma once", "") + + def _replace(m): + key = m.group(1) + if key.startswith("pocketpy/"): + key = key[9:] + if key == "user_config.h": + return m.group(0) + if key == "opcodes.h": + return OPCODES_TEXT + assert key in copied, f"include {key} not found" + return "" + text = re.sub( r'#include\s+"(.+)"\s*', - lambda m: "" if m.group(1) in copied else m.group(0), + _replace, text ) - text = text.replace('#include "opcodes.h"', OPCODES_TEXT) return text for seq in pipeline: for j in seq: - with open("src/"+j, "rt", encoding='utf-8') as f: + print(j) + with open("include/pocketpy/"+j, "rt", encoding='utf-8') as f: text += remove_copied_include(f.read()) + '\n' copied.add(j) + j = j.replace(".h", ".cpp") + if os.path.exists("src/"+j): + with open("src/"+j, "rt", encoding='utf-8') as f: + text += remove_copied_include(f.read()) + '\n' + copied.add(j) with open("amalgamated/pocketpy.h", "wt", encoding='utf-8') as f: final_text = \ @@ -56,7 +73,12 @@ r'''/* ''' + text + '\n#endif // POCKETPY_H' f.write(final_text) -shutil.copy("src/main.cpp", "amalgamated/main.cpp") +shutil.copy("src2/main.cpp", "amalgamated/main.cpp") +with open("amalgamated/main.cpp", "rt", encoding='utf-8') as f: + text = f.read() +text = text.replace('#include "pocketpy/pocketpy.h"', '#include "pocketpy.h"') +with open("amalgamated/main.cpp", "wt", encoding='utf-8') as f: + f.write(text) if sys.platform == 'linux': ok = os.system("clang++ -o pocketpy amalgamated/main.cpp --std=c++17 -stdlib=libc++") @@ -67,7 +89,7 @@ if sys.platform == 'linux': print("amalgamated/pocketpy.h") content = [] -for i in ["src/export.h", "c_bindings/pocketpy_c.h", "c_bindings/pocketpy_c.cpp"]: +for i in ["include/pocketpy/export.h", "c_bindings/pocketpy_c.h", "c_bindings/pocketpy_c.cpp"]: with open(i, "rt", encoding='utf-8') as g: content.append(g.read()) @@ -90,11 +112,4 @@ if os.path.exists(unity_ios_root): shutil.copy("amalgamated/pocketpy.h", unity_ios_root) shutil.copy("amalgamated/pocketpy.cpp", unity_ios_root) -# my custom things... -if os.path.exists("/mnt/e/PainterEngine/project/pocketpy.h"): - shutil.copy("amalgamated/pocketpy.h", "/mnt/e/PainterEngine/project/pocketpy.h") - shutil.copy("src/easing.pyi", "/mnt/e/PainterEngine/game/pype/easing.pyi") - shutil.copy("src/linalg.pyi", "/mnt/e/PainterEngine/game/pype/linalg.pyi") - shutil.copy("src/c.pyi", "/mnt/e/PainterEngine/game/pype/c.pyi") - diff --git a/include/pocketpy/c.pyi b/include/c.pyi similarity index 100% rename from include/pocketpy/c.pyi rename to include/c.pyi diff --git a/include/pocketpy/easing.pyi b/include/easing.pyi similarity index 100% rename from include/pocketpy/easing.pyi rename to include/easing.pyi diff --git a/include/pocketpy/linalg.pyi b/include/linalg.pyi similarity index 100% rename from include/pocketpy/linalg.pyi rename to include/linalg.pyi diff --git a/include/pocketpy/base64.h b/include/pocketpy/base64.h index 9d6a1703..44f5f3e8 100644 --- a/include/pocketpy/base64.h +++ b/include/pocketpy/base64.h @@ -1,20 +1,9 @@ #pragma once -#include "common.h" - -#if PK_MODULE_BASE64 - #include "cffi.h" namespace pkpy { void add_module_base64(VM* vm); -} // namespace pkpy - - -#else - -ADD_MODULE_PLACEHOLDER(base64) - -#endif \ No newline at end of file +} // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/ceval.h b/include/pocketpy/ceval.h new file mode 100644 index 00000000..d7a10dce --- /dev/null +++ b/include/pocketpy/ceval.h @@ -0,0 +1,4 @@ +#pragma once + +#include "vm.h" +// dummy header for ceval.cpp \ No newline at end of file diff --git a/include/pocketpy/cffi.h b/include/pocketpy/cffi.h index c8f5d76d..09ce2794 100644 --- a/include/pocketpy/cffi.h +++ b/include/pocketpy/cffi.h @@ -29,7 +29,7 @@ namespace pkpy { #define VAR_T(T, ...) vm->heap.gcnew(T::_type(vm), T(__VA_ARGS__)) -static int c99_sizeof(VM*, const Str&); +int c99_sizeof(VM*, const Str&); inline PyObject* py_var(VM* vm, void* p); inline PyObject* py_var(VM* vm, char* p); @@ -60,126 +60,7 @@ struct VoidP{ return "0x" + ss.str(); } - static void _register(VM* vm, PyObject* mod, PyObject* type){ - vm->bind_default_constructor(type); - - vm->bind_func<1>(type, "from_hex", [](VM* vm, ArgsView args){ - std::string s = CAST(Str&, args[0]).str(); - size_t size; - intptr_t ptr = std::stoll(s, &size, 16); - if(size != s.size()) vm->ValueError("invalid literal for void_p(): " + s); - return VAR_T(VoidP, (void*)ptr); - }); - vm->bind_method<0>(type, "hex", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - return VAR(self.hex()); - }); - - vm->bind__repr__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj){ - VoidP& self = _CAST(VoidP&, obj); - std::stringstream ss; - ss << ""; - return VAR(ss.str()); - }); - -#define BIND_CMP(name, op) \ - vm->bind##name(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ \ - if(!is_non_tagged_type(rhs, VoidP::_type(vm))) return vm->NotImplemented; \ - return VAR(_CAST(VoidP&, lhs) op _CAST(VoidP&, rhs)); \ - }); - - BIND_CMP(__eq__, ==) - BIND_CMP(__lt__, <) - BIND_CMP(__le__, <=) - BIND_CMP(__gt__, >) - BIND_CMP(__ge__, >=) - -#undef BIND_CMP - - vm->bind__hash__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj){ - VoidP& self = _CAST(VoidP&, obj); - return reinterpret_cast(self.ptr); - }); - - vm->bind_method<1>(type, "set_base_offset", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - if(is_non_tagged_type(args[1], vm->tp_str)){ - const Str& type = _CAST(Str&, args[1]); - self.base_offset = c99_sizeof(vm, type); - }else{ - self.base_offset = CAST(int, args[1]); - } - return vm->None; - }); - - vm->bind_method<0>(type, "get_base_offset", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - return VAR(self.base_offset); - }); - - vm->bind_method<1>(type, "offset", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - i64 offset = CAST(i64, args[1]); - return VAR_T(VoidP, (char*)self.ptr + offset * self.base_offset); - }); - - vm->bind__add__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ - VoidP& self = _CAST(VoidP&, lhs); - i64 offset = CAST(i64, rhs); - return VAR_T(VoidP, (char*)self.ptr + offset); - }); - - vm->bind__sub__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ - VoidP& self = _CAST(VoidP&, lhs); - i64 offset = CAST(i64, rhs); - return VAR_T(VoidP, (char*)self.ptr - offset); - }); - -#define BIND_SETGET(T, name) \ - vm->bind_method<0>(type, "read_" name, [](VM* vm, ArgsView args){ \ - VoidP& self = _CAST(VoidP&, args[0]); \ - return VAR(*(T*)self.ptr); \ - }); \ - vm->bind_method<1>(type, "write_" name, [](VM* vm, ArgsView args){ \ - VoidP& self = _CAST(VoidP&, args[0]); \ - *(T*)self.ptr = CAST(T, args[1]); \ - return vm->None; \ - }); - - BIND_SETGET(char, "char") - BIND_SETGET(unsigned char, "uchar") - BIND_SETGET(short, "short") - BIND_SETGET(unsigned short, "ushort") - BIND_SETGET(int, "int") - BIND_SETGET(unsigned int, "uint") - BIND_SETGET(long, "long") - BIND_SETGET(unsigned long, "ulong") - BIND_SETGET(long long, "longlong") - BIND_SETGET(unsigned long long, "ulonglong") - BIND_SETGET(float, "float") - BIND_SETGET(double, "double") - BIND_SETGET(bool, "bool") - BIND_SETGET(void*, "void_p") - - vm->bind_method<1>(type, "read_bytes", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - i64 size = CAST(i64, args[1]); - std::vector buffer(size); - memcpy(buffer.data(), self.ptr, size); - return VAR(Bytes(std::move(buffer))); - }); - - vm->bind_method<1>(type, "write_bytes", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - Bytes& bytes = CAST(Bytes&, args[1]); - memcpy(self.ptr, bytes.data(), bytes.size()); - return vm->None; - }); - } - -#undef BIND_SETGET + static void _register(VM* vm, PyObject* mod, PyObject* type); }; struct C99Struct{ @@ -215,116 +96,7 @@ struct C99Struct{ ~C99Struct(){ if(p!=_inlined) free(p); } - static void _register(VM* vm, PyObject* mod, PyObject* type){ - vm->bind_constructor<-1>(type, [](VM* vm, ArgsView args){ - if(args.size() == 1+1){ - if(is_int(args[1])){ - int size = _CAST(int, args[1]); - return VAR_T(C99Struct, size); - } - if(is_non_tagged_type(args[1], vm->tp_str)){ - const Str& s = _CAST(Str&, args[1]); - return VAR_T(C99Struct, (void*)s.data, s.size); - } - if(is_non_tagged_type(args[1], vm->tp_bytes)){ - const Bytes& b = _CAST(Bytes&, args[1]); - return VAR_T(C99Struct, (void*)b.data(), b.size()); - } - vm->TypeError("expected int, str or bytes"); - return vm->None; - } - if(args.size() == 1+2){ - void* p = CAST(void*, args[1]); - int size = CAST(int, args[2]); - return VAR_T(C99Struct, p, size); - } - vm->TypeError("expected 1 or 2 arguments"); - return vm->None; - }); - - vm->bind_method<0>(type, "addr", [](VM* vm, ArgsView args){ - C99Struct& self = _CAST(C99Struct&, args[0]); - return VAR_T(VoidP, self.p); - }); - - vm->bind_method<0>(type, "size", [](VM* vm, ArgsView args){ - C99Struct& self = _CAST(C99Struct&, args[0]); - return VAR(self.size); - }); - - vm->bind_method<0>(type, "copy", [](VM* vm, ArgsView args){ - const C99Struct& self = _CAST(C99Struct&, args[0]); - return VAR_T(C99Struct, self); - }); - - vm->bind_method<0>(type, "to_string", [](VM* vm, ArgsView args){ - C99Struct& self = _CAST(C99Struct&, args[0]); - return VAR(Str(self.p, self.size)); - }); - - vm->bind_method<0>(type, "to_bytes", [](VM* vm, ArgsView args){ - C99Struct& self = _CAST(C99Struct&, args[0]); - std::vector buffer(self.size); - memcpy(buffer.data(), self.p, self.size); - return VAR(Bytes(std::move(buffer))); - }); - - vm->bind__eq__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ - C99Struct& self = _CAST(C99Struct&, lhs); - if(!is_non_tagged_type(rhs, C99Struct::_type(vm))) return vm->NotImplemented; - C99Struct& other = _CAST(C99Struct&, rhs); - bool ok = self.size == other.size && memcmp(self.p, other.p, self.size) == 0; - return VAR(ok); - }); - -#define BIND_SETGET(T, name) \ - vm->bind(type, "read_" name "(self, offset=0)", [](VM* vm, ArgsView args){ \ - C99Struct& self = _CAST(C99Struct&, args[0]); \ - i64 offset = CAST(i64, args[1]); \ - void* ptr = self.p + offset; \ - return VAR(*(T*)ptr); \ - }); \ - vm->bind(type, "write_" name "(self, value, offset=0)", [](VM* vm, ArgsView args){ \ - C99Struct& self = _CAST(C99Struct&, args[0]); \ - i64 offset = CAST(i64, args[2]); \ - void* ptr = self.p + offset; \ - *(T*)ptr = CAST(T, args[1]); \ - return vm->None; \ - }); - - BIND_SETGET(char, "char") - BIND_SETGET(unsigned char, "uchar") - BIND_SETGET(short, "short") - BIND_SETGET(unsigned short, "ushort") - BIND_SETGET(int, "int") - BIND_SETGET(unsigned int, "uint") - BIND_SETGET(long, "long") - BIND_SETGET(unsigned long, "ulong") - BIND_SETGET(long long, "longlong") - BIND_SETGET(unsigned long long, "ulonglong") - BIND_SETGET(float, "float") - BIND_SETGET(double, "double") - BIND_SETGET(bool, "bool") - BIND_SETGET(void*, "void_p") -#undef BIND_SETGET - - // patch VoidP - type = vm->_t(VoidP::_type(vm)); - - vm->bind_method<1>(type, "read_struct", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - const Str& type = CAST(Str&, args[1]); - int size = c99_sizeof(vm, type); - return VAR_T(C99Struct, self.ptr, size); - }); - - vm->bind_method<1>(type, "write_struct", [](VM* vm, ArgsView args){ - VoidP& self = _CAST(VoidP&, args[0]); - C99Struct& other = CAST(C99Struct&, args[1]); - memcpy(self.ptr, other.p, other.size); - return vm->None; - }); - } + static void _register(VM* vm, PyObject* mod, PyObject* type); }; struct ReflField{ @@ -351,13 +123,6 @@ inline void add_refl_type(std::string_view name, size_t size, std::vectorsecond.size; - vm->ValueError("not a valid c99 type"); - return 0; -} - struct C99ReflType final: ReflType{ PY_CLASS(C99ReflType, c, _refl) @@ -367,40 +132,7 @@ struct C99ReflType final: ReflType{ this->fields = type.fields; } - static void _register(VM* vm, PyObject* mod, PyObject* type){ - vm->bind_notimplemented_constructor(type); - - vm->bind_method<0>(type, "__call__", [](VM* vm, ArgsView args){ - C99ReflType& self = _CAST(C99ReflType&, args[0]); - return VAR_T(C99Struct, nullptr, self.size); - }); - - vm->bind_method<0>(type, "__repr__", [](VM* vm, ArgsView args){ - C99ReflType& self = _CAST(C99ReflType&, args[0]); - return VAR(""); - }); - - vm->bind_method<0>(type, "name", [](VM* vm, ArgsView args){ - C99ReflType& self = _CAST(C99ReflType&, args[0]); - return VAR(self.name); - }); - - vm->bind_method<0>(type, "size", [](VM* vm, ArgsView args){ - C99ReflType& self = _CAST(C99ReflType&, args[0]); - return VAR(self.size); - }); - - vm->bind__getitem__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj, PyObject* key){ - C99ReflType& self = _CAST(C99ReflType&, obj); - const Str& name = CAST(Str&, key); - auto it = std::lower_bound(self.fields.begin(), self.fields.end(), name.sv()); - if(it == self.fields.end() || it->name != name.sv()){ - vm->KeyError(key); - return vm->None; - } - return VAR(it->offset); - }); - } + static void _register(VM* vm, PyObject* mod, PyObject* type); }; static_assert(sizeof(Py_) <= 64); @@ -483,73 +215,6 @@ inline void bind_any_c_fp(VM* vm, PyObject* obj, Str name, T fp){ obj->attr().set(name, func); } -inline void add_module_c(VM* vm){ - PyObject* mod = vm->new_module("c"); - - vm->bind_func<1>(mod, "malloc", [](VM* vm, ArgsView args){ - i64 size = CAST(i64, args[0]); - return VAR(malloc(size)); - }); - - vm->bind_func<1>(mod, "free", [](VM* vm, ArgsView args){ - void* p = CAST(void*, args[0]); - free(p); - return vm->None; - }); - - vm->bind_func<1>(mod, "sizeof", [](VM* vm, ArgsView args){ - const Str& type = CAST(Str&, args[0]); - i64 size = c99_sizeof(vm, type); - return VAR(size); - }); - - vm->bind_func<1>(mod, "refl", [](VM* vm, ArgsView args){ - const Str& key = CAST(Str&, args[0]); - auto it = _refl_types.find(key.sv()); - if(it == _refl_types.end()) vm->ValueError("reflection type not found"); - const ReflType& rt = it->second; - return VAR_T(C99ReflType, rt); - }); - - vm->bind_func<3>(mod, "memset", [](VM* vm, ArgsView args){ - void* p = CAST(void*, args[0]); - memset(p, CAST(int, args[1]), CAST(size_t, args[2])); - return vm->None; - }); - - vm->bind_func<3>(mod, "memcpy", [](VM* vm, ArgsView args){ - void* dst = CAST(void*, args[0]); - void* src = CAST(void*, args[1]); - i64 size = CAST(i64, args[2]); - memcpy(dst, src, size); - return vm->None; - }); - - VoidP::register_class(vm, mod); - C99Struct::register_class(vm, mod); - C99ReflType::register_class(vm, mod); - mod->attr().set("NULL", VAR_T(VoidP, nullptr)); - - add_refl_type("char", sizeof(char), {}); - add_refl_type("uchar", sizeof(unsigned char), {}); - add_refl_type("short", sizeof(short), {}); - add_refl_type("ushort", sizeof(unsigned short), {}); - add_refl_type("int", sizeof(int), {}); - add_refl_type("uint", sizeof(unsigned int), {}); - add_refl_type("long", sizeof(long), {}); - add_refl_type("ulong", sizeof(unsigned long), {}); - add_refl_type("longlong", sizeof(long long), {}); - add_refl_type("ulonglong", sizeof(unsigned long long), {}); - add_refl_type("float", sizeof(float), {}); - add_refl_type("double", sizeof(double), {}); - add_refl_type("bool", sizeof(bool), {}); - add_refl_type("void_p", sizeof(void*), {}); - - PyObject* void_p_t = mod->attr("void_p"); - for(const char* t: {"char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "longlong", "ulonglong", "float", "double", "bool"}){ - mod->attr().set(Str(t) + "_", VAR_T(C99ReflType, _refl_types[t])); - mod->attr().set(Str(t) + "_p", void_p_t); - } -} +void add_module_c(VM* vm); } // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/codeobject.h b/include/pocketpy/codeobject.h index 67eaa817..79ce2b7e 100644 --- a/include/pocketpy/codeobject.h +++ b/include/pocketpy/codeobject.h @@ -33,8 +33,8 @@ enum CodeBlockType { TRY_EXCEPT, }; -#define BC_NOARG -1 -#define BC_KEEPLINE -1 +inline const int BC_NOARG = -1; +inline const int BC_KEEPLINE = -1; struct CodeBlock { CodeBlockType type; diff --git a/include/pocketpy/compiler.h b/include/pocketpy/compiler.h index 77602acb..e1a7fd5d 100644 --- a/include/pocketpy/compiler.h +++ b/include/pocketpy/compiler.h @@ -38,155 +38,25 @@ class Compiler { CodeEmitContext* ctx() { return &contexts.top(); } CompileMode mode() const{ return lexer->src->mode; } - NameScope name_scope() const { - auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; - if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN; - return s; - } + NameScope name_scope() const; + CodeObject_ push_global_context(); + FuncDecl_ push_f_context(Str name); + void pop_context(); - CodeObject_ push_global_context(){ - CodeObject_ co = make_sp(lexer->src, lexer->src->filename); - contexts.push(CodeEmitContext(vm, co, contexts.size())); - return co; - } + static void init_pratt_rules(); - FuncDecl_ push_f_context(Str name){ - FuncDecl_ decl = make_sp(); - decl->code = make_sp(lexer->src, name); - decl->nested = name_scope() == NAME_LOCAL; - contexts.push(CodeEmitContext(vm, decl->code, contexts.size())); - return decl; - } + bool match(TokenIndex expected); + void consume(TokenIndex expected); + bool match_newlines_repl(); - void pop_context(){ - if(!ctx()->s_expr.empty()){ - throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr()); - } - // add a `return None` in the end as a guard - // previously, we only do this if the last opcode is not a return - // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return - ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); - // ctx()->co->optimize(vm); - if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){ - SyntaxError("maximum number of local variables exceeded"); - } - contexts.pop(); - } - - static void init_pratt_rules(){ - if(rules[TK(".")].precedence != PREC_NONE) return; -// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ -#define METHOD(name) &Compiler::name -#define NO_INFIX nullptr, PREC_NONE - for(TokenIndex i=0; i")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; - rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR }; - rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR }; - rules[TK("@")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; - rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; - rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; - rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; - rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; - rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; - rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; - rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; - rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; - rules[TK("@long")] = { METHOD(exprLong), NO_INFIX }; -#undef METHOD -#undef NO_INFIX - } - - bool match(TokenIndex expected) { - if (curr().type != expected) return false; - advance(); - return true; - } - - void consume(TokenIndex expected) { - if (!match(expected)){ - SyntaxError( - fmt("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'") - ); - } - } - - bool match_newlines_repl(){ - return match_newlines(mode()==REPL_MODE); - } - - bool match_newlines(bool repl_throw=false) { - bool consumed = false; - if (curr().type == TK("@eol")) { - while (curr().type == TK("@eol")) advance(); - consumed = true; - } - if (repl_throw && curr().type == TK("@eof")){ - throw NeedMoreLines(ctx()->is_compiling_class); - } - return consumed; - } - - bool match_end_stmt() { - if (match(TK(";"))) { match_newlines(); return true; } - if (match_newlines() || curr().type == TK("@eof")) return true; - if (curr().type == TK("@dedent")) return true; - return false; - } - - void consume_end_stmt() { - if (!match_end_stmt()) SyntaxError("expected statement end"); - } + bool match_newlines(bool repl_throw=false); + bool match_end_stmt(); + void consume_end_stmt(); /*************************************************/ - - void EXPR(bool push_stack=true) { - parse_expression(PREC_TUPLE+1, push_stack); - } - - void EXPR_TUPLE(bool push_stack=true) { - parse_expression(PREC_TUPLE, push_stack); - } - - // special case for `for loop` and `comp` - Expr_ EXPR_VARS(){ - std::vector items; - do { - consume(TK("@id")); - items.push_back(make_expr(prev().str(), name_scope())); - } while(match(TK(","))); - if(items.size()==1) return std::move(items[0]); - return make_expr(std::move(items)); - } + void EXPR(bool push_stack=true); + void EXPR_TUPLE(bool push_stack=true); + Expr_ EXPR_VARS(); // special case for `for loop` and `comp` template std::unique_ptr make_expr(Args&&... args) { @@ -195,117 +65,6 @@ class Compiler { return expr; } - void exprLiteral(){ - ctx()->s_expr.push(make_expr(prev().value)); - } - - void exprLong(){ - ctx()->s_expr.push(make_expr(prev().str())); - } - - void exprFString(){ - ctx()->s_expr.push(make_expr(std::get(prev().value))); - } - - void exprLambda(){ - FuncDecl_ decl = push_f_context(""); - auto e = make_expr(decl); - if(!match(TK(":"))){ - _compile_f_args(e->decl, false); - consume(TK(":")); - } - // https://github.com/blueloveTH/pocketpy/issues/37 - parse_expression(PREC_LAMBDA + 1, false); - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); - pop_context(); - ctx()->s_expr.push(std::move(e)); - } - - void exprTuple(){ - std::vector items; - items.push_back(ctx()->s_expr.popx()); - do { - if(curr().brackets_level) match_newlines_repl(); - if(!is_expression()) break; - EXPR(); - items.push_back(ctx()->s_expr.popx()); - if(curr().brackets_level) match_newlines_repl(); - } while(match(TK(","))); - ctx()->s_expr.push(make_expr( - std::move(items) - )); - } - - void exprOr(){ - auto e = make_expr(); - e->lhs = ctx()->s_expr.popx(); - parse_expression(PREC_LOGICAL_OR + 1); - e->rhs = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(e)); - } - - void exprAnd(){ - auto e = make_expr(); - e->lhs = ctx()->s_expr.popx(); - parse_expression(PREC_LOGICAL_AND + 1); - e->rhs = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(e)); - } - - void exprTernary(){ - auto e = make_expr(); - e->true_expr = ctx()->s_expr.popx(); - // cond - parse_expression(PREC_TERNARY + 1); - e->cond = ctx()->s_expr.popx(); - consume(TK("else")); - // if false - parse_expression(PREC_TERNARY + 1); - e->false_expr = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(e)); - } - - void exprBinaryOp(){ - auto e = make_expr(); - e->op = prev().type; - e->lhs = ctx()->s_expr.popx(); - parse_expression(rules[e->op].precedence + 1); - e->rhs = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(e)); - } - - void exprNot() { - parse_expression(PREC_LOGICAL_NOT + 1); - ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); - } - - void exprUnaryOp(){ - TokenIndex op = prev().type; - parse_expression(PREC_UNARY + 1); - switch(op){ - case TK("-"): - ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); - break; - case TK("*"): - ctx()->s_expr.push(make_expr(1, ctx()->s_expr.popx())); - break; - case TK("**"): - ctx()->s_expr.push(make_expr(2, ctx()->s_expr.popx())); - break; - default: FATAL_ERROR(); - } - } - - void exprGroup(){ - match_newlines_repl(); - EXPR_TUPLE(); // () is just for change precedence - match_newlines_repl(); - consume(TK(")")); - if(ctx()->s_expr.top()->is_tuple()) return; - Expr_ g = make_expr(ctx()->s_expr.popx()); - ctx()->s_expr.push(std::move(g)); - } - template void _consume_comp(Expr_ expr){ static_assert(std::is_base_of::value); @@ -324,792 +83,58 @@ class Compiler { match_newlines_repl(); } - void exprList() { - int line = prev().line; - std::vector items; - do { - match_newlines_repl(); - if (curr().type == TK("]")) break; - EXPR(); - items.push_back(ctx()->s_expr.popx()); - match_newlines_repl(); - if(items.size()==1 && match(TK("for"))){ - _consume_comp(std::move(items[0])); - consume(TK("]")); - return; - } - match_newlines_repl(); - } while (match(TK(","))); - consume(TK("]")); - auto e = make_expr(std::move(items)); - e->line = line; // override line - ctx()->s_expr.push(std::move(e)); - } + void exprLiteral(); + void exprLong(); + void exprFString(); + void exprLambda(); + void exprTuple(); + void exprOr(); + void exprAnd(); + void exprTernary(); + void exprBinaryOp(); + void exprNot(); + void exprUnaryOp(); + void exprGroup(); + void exprList(); + void exprMap(); + void exprCall(); + void exprName(); + void exprAttrib(); + void exprSubscr(); + void exprLiteral0(); - void exprMap() { - bool parsing_dict = false; // {...} may be dict or set - std::vector items; - do { - match_newlines_repl(); - if (curr().type == TK("}")) break; - EXPR(); - int star_level = ctx()->s_expr.top()->star_level(); - if(star_level==2 || curr().type == TK(":")){ - parsing_dict = true; - } - if(parsing_dict){ - auto dict_item = make_expr(); - if(star_level == 2){ - dict_item->key = nullptr; - dict_item->value = ctx()->s_expr.popx(); - }else{ - consume(TK(":")); - EXPR(); - dict_item->key = ctx()->s_expr.popx(); - dict_item->value = ctx()->s_expr.popx(); - } - items.push_back(std::move(dict_item)); - }else{ - items.push_back(ctx()->s_expr.popx()); - } - match_newlines_repl(); - if(items.size()==1 && match(TK("for"))){ - if(parsing_dict) _consume_comp(std::move(items[0])); - else _consume_comp(std::move(items[0])); - consume(TK("}")); - return; - } - match_newlines_repl(); - } while (match(TK(","))); - consume(TK("}")); - if(items.size()==0 || parsing_dict){ - auto e = make_expr(std::move(items)); - ctx()->s_expr.push(std::move(e)); - }else{ - auto e = make_expr(std::move(items)); - ctx()->s_expr.push(std::move(e)); - } - } + void compile_block_body(); + Str _compile_import(); + void compile_normal_import(); + void compile_from_import(); + bool is_expression(); + void parse_expression(int precedence, bool push_stack=true); + void compile_if_stmt(); + void compile_while_loop(); + void compile_for_loop(); + void compile_try_except(); + void compile_decorated(); - void exprCall() { - auto e = make_expr(); - e->callable = ctx()->s_expr.popx(); - do { - match_newlines_repl(); - if (curr().type==TK(")")) break; - if(curr().type==TK("@id") && next().type==TK("=")) { - consume(TK("@id")); - Str key = prev().str(); - consume(TK("=")); - EXPR(); - e->kwargs.push_back({key, ctx()->s_expr.popx()}); - } else{ - EXPR(); - if(ctx()->s_expr.top()->star_level() == 2){ - // **kwargs - e->kwargs.push_back({"**", ctx()->s_expr.popx()}); - }else{ - // positional argument - if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument"); - e->args.push_back(ctx()->s_expr.popx()); - } - } - match_newlines_repl(); - } while (match(TK(","))); - consume(TK(")")); - if(e->args.size() > 32767) SyntaxError("too many positional arguments"); - if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments"); - ctx()->s_expr.push(std::move(e)); - } + bool try_compile_assignment(); + void compile_stmt(); + void consume_type_hints(); + void compile_class(); + void _compile_f_args(FuncDecl_ decl, bool enable_type_hints); + void compile_function(const std::vector& decorators={}); - void exprName(){ - Str name = prev().str(); - NameScope scope = name_scope(); - if(ctx()->global_names.count(name)){ - scope = NAME_GLOBAL; - } - ctx()->s_expr.push(make_expr(name, scope)); - } - - void exprAttrib() { - consume(TK("@id")); - ctx()->s_expr.push( - make_expr(ctx()->s_expr.popx(), prev().str()) - ); - } - - void exprSubscr() { - auto e = make_expr(); - e->a = ctx()->s_expr.popx(); - auto slice = make_expr(); - bool is_slice = false; - // a[<0> : state<3> : state<5>] - int state = 0; - do{ - switch(state){ - case 0: - if(match(TK(":"))){ - is_slice=true; - state=2; - break; - } - if(match(TK("]"))) SyntaxError(); - EXPR_TUPLE(); - slice->start = ctx()->s_expr.popx(); - state=1; - break; - case 1: - if(match(TK(":"))){ - is_slice=true; - state=2; - break; - } - if(match(TK("]"))) goto __SUBSCR_END; - SyntaxError("expected ':' or ']'"); - break; - case 2: - if(match(TK(":"))){ - state=4; - break; - } - if(match(TK("]"))) goto __SUBSCR_END; - EXPR_TUPLE(); - slice->stop = ctx()->s_expr.popx(); - state=3; - break; - case 3: - if(match(TK(":"))){ - state=4; - break; - } - if(match(TK("]"))) goto __SUBSCR_END; - SyntaxError("expected ':' or ']'"); - break; - case 4: - if(match(TK("]"))) goto __SUBSCR_END; - EXPR_TUPLE(); - slice->step = ctx()->s_expr.popx(); - state=5; - break; - case 5: consume(TK("]")); goto __SUBSCR_END; - } - }while(true); -__SUBSCR_END: - if(is_slice){ - e->b = std::move(slice); - }else{ - if(state != 1) FATAL_ERROR(); - e->b = std::move(slice->start); - } - ctx()->s_expr.push(std::move(e)); - } - - void exprLiteral0() { - ctx()->s_expr.push(make_expr(prev().type)); - } - - void compile_block_body() { - consume(TK(":")); - if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ - compile_stmt(); // inline block - return; - } - if(!match_newlines(mode()==REPL_MODE)){ - SyntaxError("expected a new line after ':'"); - } - consume(TK("@indent")); - while (curr().type != TK("@dedent")) { - match_newlines(); - compile_stmt(); - match_newlines(); - } - consume(TK("@dedent")); - } - - Str _compile_import() { - if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope"); - Opcode op = OP_IMPORT_NAME; - if(match(TK("."))) op = OP_IMPORT_NAME_REL; - consume(TK("@id")); - Str name = prev().str(); - ctx()->emit(op, StrName(name).index, prev().line); - return name; - } - - // import a as b - void compile_normal_import() { - do { - Str name = _compile_import(); - if (match(TK("as"))) { - consume(TK("@id")); - name = prev().str(); - } - ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line); - } while (match(TK(","))); - consume_end_stmt(); - } - - // from a import b as c, d as e - void compile_from_import() { - _compile_import(); - consume(TK("import")); - if (match(TK("*"))) { - ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line); - consume_end_stmt(); - return; - } - do { - ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); - consume(TK("@id")); - Str name = prev().str(); - ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line); - if (match(TK("as"))) { - consume(TK("@id")); - name = prev().str(); - } - ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line); - } while (match(TK(","))); - ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); - consume_end_stmt(); - } - - bool is_expression(){ - PrattCallback prefix = rules[curr().type].prefix; - return prefix != nullptr; - } - - void parse_expression(int precedence, bool push_stack=true) { - PrattCallback prefix = rules[curr().type].prefix; - if (prefix == nullptr) SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type)); - advance(); - (this->*prefix)(); - while (rules[curr().type].precedence >= precedence) { - TokenIndex op = curr().type; - advance(); - PrattCallback infix = rules[op].infix; - PK_ASSERT(infix != nullptr); - (this->*infix)(); - } - if(!push_stack) ctx()->emit_expr(); - } - - void compile_if_stmt() { - EXPR(false); // condition - int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); - compile_block_body(); - if (match(TK("elif"))) { - int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); - ctx()->patch_jump(patch); - compile_if_stmt(); - ctx()->patch_jump(exit_patch); - } else if (match(TK("else"))) { - int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); - ctx()->patch_jump(patch); - compile_block_body(); - ctx()->patch_jump(exit_patch); - } else { - ctx()->patch_jump(patch); - } - } - - void compile_while_loop() { - ctx()->enter_block(WHILE_LOOP); - EXPR(false); // condition - int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); - compile_block_body(); - ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); - ctx()->patch_jump(patch); - ctx()->exit_block(); - } - - void compile_for_loop() { - Expr_ vars = EXPR_VARS(); - consume(TK("in")); - EXPR_TUPLE(false); - ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); - ctx()->enter_block(FOR_LOOP); - ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); - bool ok = vars->emit_store(ctx()); - if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind - compile_block_body(); - ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); - ctx()->exit_block(); - } - - void compile_try_except() { - ctx()->enter_block(TRY_EXCEPT); - compile_block_body(); - std::vector patches = { - ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) - }; - ctx()->exit_block(); - do { - consume(TK("except")); - if(match(TK("@id"))){ - ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line); - }else{ - ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE); - } - int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); - // pop the exception on match - ctx()->emit(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE); - compile_block_body(); - patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)); - ctx()->patch_jump(patch); - }while(curr().type == TK("except")); - // no match, re-raise - ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE); - for (int patch : patches) ctx()->patch_jump(patch); - } - - void compile_decorated(){ - std::vector decorators; - do{ - EXPR(); - decorators.push_back(ctx()->s_expr.popx()); - if(!match_newlines_repl()) SyntaxError(); - }while(match(TK("@"))); - consume(TK("def")); - compile_function(decorators); - } - - bool try_compile_assignment(){ - switch (curr().type) { - case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): - case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { - Expr* lhs_p = ctx()->s_expr.top().get(); - if(lhs_p->is_starred()) SyntaxError(); - if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition"); - advance(); - auto e = make_expr(); - e->op = prev().type - 1; // -1 to remove = - e->lhs = ctx()->s_expr.popx(); - EXPR_TUPLE(); - e->rhs = ctx()->s_expr.popx(); - if(e->is_starred()) SyntaxError(); - e->emit(ctx()); - bool ok = lhs_p->emit_store(ctx()); - if(!ok) SyntaxError(); - } return true; - case TK("="): { - int n = 0; - while(match(TK("="))){ - EXPR_TUPLE(); - Expr* _tp = ctx()->s_expr.top().get(); - if(ctx()->is_compiling_class && _tp->is_tuple()){ - SyntaxError("can't use unpack tuple in class definition"); - } - n += 1; - } - if(ctx()->is_compiling_class && n>1){ - SyntaxError("can't assign to multiple targets in class definition"); - } - // stack size is n+1 - Expr_ val = ctx()->s_expr.popx(); - val->emit(ctx()); - for(int j=1; jemit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); - for(int j=0; js_expr.popx(); - if(e->is_starred()) SyntaxError(); - bool ok = e->emit_store(ctx()); - if(!ok) SyntaxError(); - } - } return true; - default: return false; - } - } - - void compile_stmt() { - advance(); - int kw_line = prev().line; // backup line number - switch(prev().type){ - case TK("break"): - if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); - ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line); - consume_end_stmt(); - break; - case TK("continue"): - if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); - ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line); - consume_end_stmt(); - break; - case TK("yield"): - if (contexts.size() <= 1) SyntaxError("'yield' outside function"); - EXPR_TUPLE(false); - // if yield present, mark the function as generator - ctx()->co->is_generator = true; - ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); - consume_end_stmt(); - break; - case TK("yield from"): - if (contexts.size() <= 1) SyntaxError("'yield from' outside function"); - EXPR_TUPLE(false); - // if yield from present, mark the function as generator - ctx()->co->is_generator = true; - ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line); - ctx()->enter_block(FOR_LOOP); - ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); - ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE); - ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); - ctx()->exit_block(); - consume_end_stmt(); - break; - case TK("return"): - if (contexts.size() <= 1) SyntaxError("'return' outside function"); - if(match_end_stmt()){ - ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); - }else{ - EXPR_TUPLE(false); - consume_end_stmt(); - } - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line); - break; - /*************************************************/ - case TK("if"): compile_if_stmt(); break; - case TK("while"): compile_while_loop(); break; - case TK("for"): compile_for_loop(); break; - case TK("import"): compile_normal_import(); break; - case TK("from"): compile_from_import(); break; - case TK("def"): compile_function(); break; - case TK("@"): compile_decorated(); break; - case TK("try"): compile_try_except(); break; - case TK("pass"): consume_end_stmt(); break; - /*************************************************/ - case TK("++"):{ - consume(TK("@id")); - StrName name(prev().sv()); - switch(name_scope()){ - case NAME_LOCAL: - ctx()->emit(OP_INC_FAST, ctx()->add_varname(name), prev().line); - break; - case NAME_GLOBAL: - ctx()->emit(OP_INC_GLOBAL, name.index, prev().line); - break; - default: SyntaxError(); break; - } - consume_end_stmt(); - break; - } - case TK("--"):{ - consume(TK("@id")); - StrName name(prev().sv()); - switch(name_scope()){ - case NAME_LOCAL: - ctx()->emit(OP_DEC_FAST, ctx()->add_varname(name), prev().line); - break; - case NAME_GLOBAL: - ctx()->emit(OP_DEC_GLOBAL, name.index, prev().line); - break; - default: SyntaxError(); break; - } - consume_end_stmt(); - break; - } - case TK("assert"): - EXPR_TUPLE(false); - ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); - consume_end_stmt(); - break; - case TK("global"): - do { - consume(TK("@id")); - ctx()->global_names.insert(prev().str()); - } while (match(TK(","))); - consume_end_stmt(); - break; - case TK("raise"): { - consume(TK("@id")); - int dummy_t = StrName(prev().str()).index; - if(match(TK("(")) && !match(TK(")"))){ - EXPR(false); consume(TK(")")); - }else{ - ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); - } - ctx()->emit(OP_RAISE, dummy_t, kw_line); - consume_end_stmt(); - } break; - case TK("del"): { - EXPR_TUPLE(); - Expr_ e = ctx()->s_expr.popx(); - bool ok = e->emit_del(ctx()); - if(!ok) SyntaxError(); - consume_end_stmt(); - } break; - case TK("with"): { - EXPR(false); - consume(TK("as")); - consume(TK("@id")); - Expr_ e = make_expr(prev().str(), name_scope()); - bool ok = e->emit_store(ctx()); - if(!ok) SyntaxError(); - e->emit(ctx()); - ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line); - compile_block_body(); - e->emit(ctx()); - ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line); - } break; - /*************************************************/ - case TK("$label"): { - if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); - consume(TK("@id")); - bool ok = ctx()->add_label(prev().str()); - if(!ok) SyntaxError("label " + prev().str().escape() + " already exists"); - consume_end_stmt(); - } break; - case TK("$goto"): - if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); - consume(TK("@id")); - ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line); - consume_end_stmt(); - break; - /*************************************************/ - // handle dangling expression or assignment - default: { - advance(-1); // do revert since we have pre-called advance() at the beginning - EXPR_TUPLE(); - // eat variable's type hint - if(match(TK(":"))) consume_type_hints(); - if(!try_compile_assignment()){ - if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){ - SyntaxError(); - } - ctx()->emit_expr(); - if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){ - ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); - }else{ - ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); - } - } - consume_end_stmt(); - } - } - } - - void consume_type_hints(){ - EXPR(); - ctx()->s_expr.pop(); - } - - void compile_class(){ - consume(TK("@id")); - int namei = StrName(prev().str()).index; - int super_namei = -1; - if(match(TK("("))){ - if(match(TK("@id"))){ - super_namei = StrName(prev().str()).index; - } - consume(TK(")")); - } - if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); - else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line); - ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE); - ctx()->is_compiling_class = true; - compile_block_body(); - ctx()->is_compiling_class = false; - ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE); - } - - void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){ - int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs - do { - if(state > 3) SyntaxError(); - if(state == 3) SyntaxError("**kwargs should be the last argument"); - match_newlines(); - if(match(TK("*"))){ - if(state < 1) state = 1; - else SyntaxError("*args should be placed before **kwargs"); - } - else if(match(TK("**"))){ - state = 3; - } - consume(TK("@id")); - StrName name = prev().str(); - - // check duplicate argument name - for(int j: decl->args){ - if(decl->code->varnames[j] == name) { - SyntaxError("duplicate argument name"); - } - } - for(auto& kv: decl->kwargs){ - if(decl->code->varnames[kv.key] == name){ - SyntaxError("duplicate argument name"); - } - } - if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){ - SyntaxError("duplicate argument name"); - } - if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){ - SyntaxError("duplicate argument name"); - } - - // eat type hints - if(enable_type_hints && match(TK(":"))) consume_type_hints(); - if(state == 0 && curr().type == TK("=")) state = 2; - int index = ctx()->add_varname(name); - switch (state) - { - case 0: - decl->args.push_back(index); - break; - case 1: - decl->starred_arg = index; - state+=1; - break; - case 2: { - consume(TK("=")); - PyObject* value = read_literal(); - if(value == nullptr){ - SyntaxError(Str("default argument must be a literal")); - } - decl->kwargs.push_back(FuncDecl::KwArg{index, value}); - } break; - case 3: - decl->starred_kwarg = index; - state+=1; - break; - } - } while (match(TK(","))); - } - - void compile_function(const std::vector& decorators={}){ - const char* _start = curr().start; - consume(TK("@id")); - Str decl_name = prev().str(); - FuncDecl_ decl = push_f_context(decl_name); - consume(TK("(")); - if (!match(TK(")"))) { - _compile_f_args(decl, true); - consume(TK(")")); - } - if(match(TK("->"))) consume_type_hints(); - const char* _end = curr().start; - decl->signature = Str(_start, _end-_start); - compile_block_body(); - pop_context(); - - PyObject* docstring = nullptr; - if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){ - PyObject* c = decl->code->consts[decl->code->codes[0].arg]; - if(is_type(c, vm->tp_str)){ - decl->code->codes[0].op = OP_NO_OP; - decl->code->codes[1].op = OP_NO_OP; - docstring = c; - } - } - if(docstring != nullptr){ - decl->docstring = PK_OBJ_GET(Str, docstring); - } - ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line); - - // add decorators - for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){ - (*it)->emit(ctx()); - ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line); - ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); - ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); - ctx()->emit(OP_CALL, 1, (*it)->line); - } - if(!ctx()->is_compiling_class){ - auto e = make_expr(decl_name, name_scope()); - e->emit_store(ctx()); - }else{ - int index = StrName(decl_name).index; - ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line); - } - } - - PyObject* to_object(const TokenValue& value){ - PyObject* obj = nullptr; - if(std::holds_alternative(value)){ - obj = VAR(std::get(value)); - } - if(std::holds_alternative(value)){ - obj = VAR(std::get(value)); - } - if(std::holds_alternative(value)){ - obj = VAR(std::get(value)); - } - if(obj == nullptr) FATAL_ERROR(); - return obj; - } - - PyObject* read_literal(){ - advance(); - switch(prev().type){ - case TK("-"): { - consume(TK("@num")); - PyObject* val = to_object(prev().value); - return vm->py_negate(val); - } - case TK("@num"): return to_object(prev().value); - case TK("@str"): return to_object(prev().value); - case TK("True"): return VAR(true); - case TK("False"): return VAR(false); - case TK("None"): return vm->None; - case TK("..."): return vm->Ellipsis; - default: break; - } - return nullptr; - } + PyObject* to_object(const TokenValue& value); + PyObject* read_literal(); void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); } void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); } void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); } public: - Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false){ - this->vm = vm; - this->used = false; - this->unknown_global_scope = unknown_global_scope; - this->lexer = std::make_unique( - make_sp(source, filename, mode) - ); - init_pratt_rules(); - } + Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false); + CodeObject_ compile(); - CodeObject_ compile(){ - if(used) FATAL_ERROR(); - used = true; - - tokens = lexer->run(); - // if(lexer->src->filename == ""){ - // for(auto& t: tokens) std::cout << t.info() << std::endl; - // } - - CodeObject_ code = push_global_context(); - - advance(); // skip @sof, so prev() is always valid - match_newlines(); // skip possible leading '\n' - - if(mode()==EVAL_MODE) { - EXPR_TUPLE(false); - consume(TK("@eof")); - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); - pop_context(); - return code; - }else if(mode()==JSON_MODE){ - EXPR(); - Expr_ e = ctx()->s_expr.popx(); - if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array"); - consume(TK("@eof")); - e->emit(ctx()); - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); - pop_context(); - return code; - } - - while (!match(TK("@eof"))) { - if (match(TK("class"))) { - compile_class(); - } else { - compile_stmt(); - } - match_newlines(); - } - pop_context(); - return code; - } + Compiler(const Compiler&) = delete; + Compiler& operator=(const Compiler&) = delete; }; #undef BC_NOARG diff --git a/include/pocketpy/config.h b/include/pocketpy/config.h index 1ee91ba9..3ad233c6 100644 --- a/include/pocketpy/config.h +++ b/include/pocketpy/config.h @@ -88,8 +88,6 @@ namespace pkpy{ #define PK_MODULE_RE 1 #define PK_MODULE_RANDOM 1 -#define PK_MODULE_BASE64 1 -#define PK_MODULE_LINALG 1 #define PK_MODULE_EASING 1 #endif \ No newline at end of file diff --git a/include/pocketpy/easing.h b/include/pocketpy/easing.h index 7c58b234..4b6acacf 100644 --- a/include/pocketpy/easing.h +++ b/include/pocketpy/easing.h @@ -1,19 +1,9 @@ #pragma once -#include "common.h" - -#if PK_MODULE_EASING - #include "cffi.h" namespace pkpy{ void add_module_easing(VM* vm); -} // namespace pkpy - -#else - -ADD_MODULE_PLACEHOLDER(easing) - -#endif \ No newline at end of file +} // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/io.h b/include/pocketpy/io.h index 5a5dbcf9..019a33db 100644 --- a/include/pocketpy/io.h +++ b/include/pocketpy/io.h @@ -2,6 +2,12 @@ #include "cffi.h" +namespace pkpy{ + Bytes _default_import_handler(const Str& name); + void add_module_os(VM* vm); + void add_module_io(VM* vm); +} + #if PK_ENABLE_OS #include @@ -9,21 +15,6 @@ namespace pkpy{ -inline Bytes _default_import_handler(const Str& name){ - std::filesystem::path path(name.sv()); - bool exists = std::filesystem::exists(path); - if(!exists) return Bytes(); - std::string cname = name.str(); - FILE* fp = fopen(cname.c_str(), "rb"); - if(!fp) return Bytes(); - fseek(fp, 0, SEEK_END); - std::vector buffer(ftell(fp)); - fseek(fp, 0, SEEK_SET); - fread(buffer.data(), 1, buffer.size(), fp); - fclose(fp); - return Bytes(std::move(buffer)); -}; - struct FileIO { PY_CLASS(FileIO, io, FileIO) @@ -32,156 +23,11 @@ struct FileIO { FILE* fp; bool is_text() const { return mode != "rb" && mode != "wb" && mode != "ab"; } - - FileIO(VM* vm, std::string file, std::string mode): file(file), mode(mode) { - fp = fopen(file.c_str(), mode.c_str()); - if(!fp) vm->IOError(strerror(errno)); - } - - void close(){ - if(fp == nullptr) return; - fclose(fp); - fp = nullptr; - } - - static void _register(VM* vm, PyObject* mod, PyObject* type){ - vm->bind_constructor<3>(type, [](VM* vm, ArgsView args){ - return VAR_T(FileIO, - vm, CAST(Str&, args[1]).str(), CAST(Str&, args[2]).str() - ); - }); - - vm->bind_method<0>(type, "read", [](VM* vm, ArgsView args){ - FileIO& io = CAST(FileIO&, args[0]); - fseek(io.fp, 0, SEEK_END); - std::vector buffer(ftell(io.fp)); - fseek(io.fp, 0, SEEK_SET); - fread(buffer.data(), 1, buffer.size(), io.fp); - Bytes b(std::move(buffer)); - if(io.is_text()) return VAR(Str(b.str())); - return VAR(std::move(b)); - }); - - vm->bind_method<1>(type, "write", [](VM* vm, ArgsView args){ - FileIO& io = CAST(FileIO&, args[0]); - if(io.is_text()){ - Str& s = CAST(Str&, args[1]); - fwrite(s.data, 1, s.length(), io.fp); - }else{ - Bytes& buffer = CAST(Bytes&, args[1]); - fwrite(buffer.data(), 1, buffer.size(), io.fp); - } - return vm->None; - }); - - vm->bind_method<0>(type, "close", [](VM* vm, ArgsView args){ - FileIO& io = CAST(FileIO&, args[0]); - io.close(); - return vm->None; - }); - - vm->bind_method<0>(type, "__exit__", [](VM* vm, ArgsView args){ - FileIO& io = CAST(FileIO&, args[0]); - io.close(); - return vm->None; - }); - - vm->bind_method<0>(type, "__enter__", PK_LAMBDA(vm->None)); - } + FileIO(VM* vm, std::string file, std::string mode); + void close(); + static void _register(VM* vm, PyObject* mod, PyObject* type); }; -inline void add_module_io(VM* vm){ - PyObject* mod = vm->new_module("io"); - FileIO::register_class(vm, mod); - vm->bind_builtin_func<2>("open", [](VM* vm, ArgsView args){ - static StrName m_io("io"); - static StrName m_FileIO("FileIO"); - return vm->call(vm->_modules[m_io]->attr(m_FileIO), args[0], args[1]); - }); -} +#endif -inline void add_module_os(VM* vm){ - PyObject* mod = vm->new_module("os"); - PyObject* path_obj = vm->heap.gcnew(vm->tp_object, {}); - mod->attr().set("path", path_obj); - - // Working directory is shared by all VMs!! - vm->bind_func<0>(mod, "getcwd", [](VM* vm, ArgsView args){ - return VAR(std::filesystem::current_path().string()); - }); - - vm->bind_func<1>(mod, "chdir", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - std::filesystem::current_path(path); - return vm->None; - }); - - vm->bind_func<1>(mod, "listdir", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - std::filesystem::directory_iterator di; - try{ - di = std::filesystem::directory_iterator(path); - }catch(std::filesystem::filesystem_error& e){ - std::string msg = e.what(); - auto pos = msg.find_last_of(":"); - if(pos != std::string::npos) msg = msg.substr(pos + 1); - vm->IOError(Str(msg).lstrip()); - } - List ret; - for(auto& p: di) ret.push_back(VAR(p.path().filename().string())); - return VAR(ret); - }); - - vm->bind_func<1>(mod, "remove", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - bool ok = std::filesystem::remove(path); - if(!ok) vm->IOError("operation failed"); - return vm->None; - }); - - vm->bind_func<1>(mod, "mkdir", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - bool ok = std::filesystem::create_directory(path); - if(!ok) vm->IOError("operation failed"); - return vm->None; - }); - - vm->bind_func<1>(mod, "rmdir", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - bool ok = std::filesystem::remove(path); - if(!ok) vm->IOError("operation failed"); - return vm->None; - }); - - vm->bind_func<-1>(path_obj, "join", [](VM* vm, ArgsView args){ - std::filesystem::path path; - for(int i=0; ibind_func<1>(path_obj, "exists", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - bool exists = std::filesystem::exists(path); - return VAR(exists); - }); - - vm->bind_func<1>(path_obj, "basename", [](VM* vm, ArgsView args){ - std::filesystem::path path(CAST(Str&, args[0]).sv()); - return VAR(path.filename().string()); - }); -} - -} // namespace pkpy - - -#else - -namespace pkpy{ -inline void add_module_io(void* vm){} -inline void add_module_os(void* vm){} -inline Bytes _default_import_handler(const Str& name) { return Bytes(); } -} // namespace pkpy - -#endif \ No newline at end of file +} // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/lexer.h b/include/pocketpy/lexer.h index 30ad2353..fc699efd 100644 --- a/include/pocketpy/lexer.h +++ b/include/pocketpy/lexer.h @@ -113,449 +113,32 @@ struct Lexer { bool used = false; char peekchar() const{ return *curr_char; } + bool match_n_chars(int n, char c0); + bool match_string(const char* s); + int eat_spaces(); - bool match_n_chars(int n, char c0){ - const char* c = curr_char; - for(int i=0; i 0) return true; - int spaces = eat_spaces(); - if(peekchar() == '#') skip_line_comment(); - if(peekchar() == '\0' || peekchar() == '\n') return true; - // https://docs.python.org/3/reference/lexical_analysis.html#indentation - if(spaces > indents.top()){ - indents.push(spaces); - nexts.push_back(Token{TK("@indent"), token_start, 0, current_line, brackets_level}); - } else if(spaces < indents.top()){ - while(spaces < indents.top()){ - indents.pop(); - nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line, brackets_level}); - } - if(spaces != indents.top()){ - return false; - } - } - return true; - } - - char eatchar() { - char c = peekchar(); - if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); - curr_char++; - return c; - } - - char eatchar_include_newline() { - char c = peekchar(); - curr_char++; - if (c == '\n'){ - current_line++; - src->line_starts.push_back(curr_char); - } - return c; - } - - int eat_name() { - curr_char--; - while(true){ - unsigned char c = peekchar(); - int u8bytes = utf8len(c, true); - if(u8bytes == 0) return 1; - if(u8bytes == 1){ - if(isalpha(c) || c=='_' || isdigit(c)) { - curr_char++; - continue; - }else{ - break; - } - } - // handle multibyte char - std::string u8str(curr_char, u8bytes); - if(u8str.size() != u8bytes) return 2; - uint32_t value = 0; - for(int k=0; k < u8bytes; k++){ - uint8_t b = u8str[k]; - if(k==0){ - if(u8bytes == 2) value = (b & 0b00011111) << 6; - else if(u8bytes == 3) value = (b & 0b00001111) << 12; - else if(u8bytes == 4) value = (b & 0b00000111) << 18; - }else{ - value |= (b & 0b00111111) << (6*(u8bytes-k-1)); - } - } - if(is_unicode_Lo_char(value)) curr_char += u8bytes; - else break; - } - - int length = (int)(curr_char - token_start); - if(length == 0) return 3; - std::string_view name(token_start, length); - - if(src->mode == JSON_MODE){ - if(name == "true"){ - add_token(TK("True")); - } else if(name == "false"){ - add_token(TK("False")); - } else if(name == "null"){ - add_token(TK("None")); - } else { - return 4; - } - return 0; - } - - if(kTokenKwMap.count(name)){ - add_token(kTokenKwMap.at(name)); - } else { - add_token(TK("@id")); - } - return 0; - } - - void skip_line_comment() { - char c; - while ((c = peekchar()) != '\0') { - if (c == '\n') return; - eatchar(); - } - } - - bool matchchar(char c) { - if (peekchar() != c) return false; - eatchar_include_newline(); - return true; - } - - void add_token(TokenIndex type, TokenValue value={}) { - switch(type){ - case TK("{"): case TK("["): case TK("("): brackets_level++; break; - case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; - } - auto token = Token{ - type, - token_start, - (int)(curr_char - token_start), - current_line - ((type == TK("@eol")) ? 1 : 0), - brackets_level, - value - }; - // handle "not in", "is not", "yield from" - if(!nexts.empty()){ - auto& back = nexts.back(); - if(back.type == TK("not") && type == TK("in")){ - back.type = TK("not in"); - return; - } - if(back.type == TK("is") && type == TK("not")){ - back.type = TK("is not"); - return; - } - if(back.type == TK("yield") && type == TK("from")){ - back.type = TK("yield from"); - return; - } - nexts.push_back(token); - } - } - - void add_token_2(char c, TokenIndex one, TokenIndex two) { - if (matchchar(c)) add_token(two); - else add_token(one); - } - - Str eat_string_until(char quote, bool raw) { - bool quote3 = match_n_chars(2, quote); - std::vector buff; - while (true) { - char c = eatchar_include_newline(); - if (c == quote){ - if(quote3 && !match_n_chars(2, quote)){ - buff.push_back(c); - continue; - } - break; - } - if (c == '\0'){ - if(quote3 && src->mode == REPL_MODE){ - throw NeedMoreLines(false); - } - SyntaxError("EOL while scanning string literal"); - } - if (c == '\n'){ - if(!quote3) SyntaxError("EOL while scanning string literal"); - else{ - buff.push_back(c); - continue; - } - } - if (!raw && c == '\\') { - switch (eatchar_include_newline()) { - case '"': buff.push_back('"'); break; - case '\'': buff.push_back('\''); break; - case '\\': buff.push_back('\\'); break; - case 'n': buff.push_back('\n'); break; - case 'r': buff.push_back('\r'); break; - case 't': buff.push_back('\t'); break; - case 'x': { - char hex[3] = {eatchar(), eatchar(), '\0'}; - size_t parsed; - char code; - try{ - code = (char)Number::stoi(hex, &parsed, 16); - }catch(std::invalid_argument&){ - SyntaxError("invalid hex char"); - } - if (parsed != 2) SyntaxError("invalid hex char"); - buff.push_back(code); - } break; - default: SyntaxError("invalid escape char"); - } - } else { - buff.push_back(c); - } - } - return Str(buff.data(), buff.size()); - } - - void eat_string(char quote, StringType type) { - Str s = eat_string_until(quote, type == RAW_STRING); - if(type == F_STRING){ - add_token(TK("@fstr"), s); - }else{ - add_token(TK("@str"), s); - } - } - - void eat_number() { - static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?(L)?"); - std::smatch m; - - const char* i = token_start; - while(*i != '\n' && *i != '\0') i++; - std::string s = std::string(token_start, i); - - bool ok = std::regex_search(s, m, pattern); - PK_ASSERT(ok); - // here is m.length()-1, since the first char was eaten by lex_token() - for(int j=0; j': { - if(matchchar('=')) add_token(TK(">=")); - else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>=")); - else add_token(TK(">")); - return true; - } - case '<': { - if(matchchar('=')) add_token(TK("<=")); - else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<=")); - else add_token(TK("<")); - return true; - } - case '-': { - if(matchchar('-')){ - add_token(TK("--")); - }else{ - if(matchchar('=')) add_token(TK("-=")); - else if(matchchar('>')) add_token(TK("->")); - else add_token(TK("-")); - } - return true; - } - case '!': - if(matchchar('=')) add_token(TK("!=")); - else SyntaxError("expected '=' after '!'"); - break; - case '*': - if (matchchar('*')) { - add_token(TK("**")); // '**' - } else { - add_token_2('=', TK("*"), TK("*=")); - } - return true; - case '/': - if(matchchar('/')) { - add_token_2('=', TK("//"), TK("//=")); - } else { - add_token_2('=', TK("/"), TK("/=")); - } - return true; - case ' ': case '\t': eat_spaces(); break; - case '\n': { - add_token(TK("@eol")); - if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level"); - return true; - } - default: { - if(c == 'f'){ - if(matchchar('\'')) {eat_string('\'', F_STRING); return true;} - if(matchchar('"')) {eat_string('"', F_STRING); return true;} - }else if(c == 'r'){ - if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;} - if(matchchar('"')) {eat_string('"', RAW_STRING); return true;} - } - if (c >= '0' && c <= '9') { - eat_number(); - return true; - } - switch (eat_name()) - { - case 0: break; - case 1: SyntaxError("invalid char: " + std::string(1, c)); break; - case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); break; - case 3: SyntaxError("@id contains invalid char"); break; - case 4: SyntaxError("invalid JSON token"); break; - default: FATAL_ERROR(); - } - return true; - } - } - } - - token_start = curr_char; - while(indents.size() > 1){ - indents.pop(); - add_token(TK("@dedent")); - return true; - } - add_token(TK("@eof")); - return false; - } + void eat_number(); + bool lex_one_token(); /***** Error Reporter *****/ - void throw_err(Str type, Str msg){ - int lineno = current_line; - const char* cursor = curr_char; - if(peekchar() == '\n'){ - lineno--; - cursor--; - } - throw_err(type, msg, lineno, cursor); - } - - void throw_err(Str type, Str msg, int lineno, const char* cursor){ - auto e = Exception(type, msg); - e.st_push(src->snapshot(lineno, cursor)); - throw e; - } + void throw_err(Str type, Str msg); + void throw_err(Str type, Str msg, int lineno, const char* cursor); void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } void IndentationError(Str msg){ throw_err("IndentationError", msg); } - - Lexer(shared_ptr src) { - this->src = src; - this->token_start = src->source.c_str(); - this->curr_char = src->source.c_str(); - this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line, brackets_level}); - this->indents.push(0); - } - - std::vector run() { - if(used) FATAL_ERROR(); - used = true; - while (lex_one_token()); - return std::move(nexts); - } + Lexer(shared_ptr src); + std::vector run(); }; } // namespace pkpy diff --git a/include/pocketpy/linalg.h b/include/pocketpy/linalg.h index 660e87e0..49e07b81 100644 --- a/include/pocketpy/linalg.h +++ b/include/pocketpy/linalg.h @@ -1,9 +1,5 @@ #pragma once -#include "common.h" - -#if PK_MODULE_LINALG - #include "cffi.h" namespace pkpy{ @@ -378,10 +374,4 @@ inline void add_module_linalg(VM* vm){ static_assert(sizeof(Py_) <= 64); -} // namespace pkpy - -#else - -ADD_MODULE_PLACEHOLDER(linalg) - -#endif \ No newline at end of file +} // namespace pkpy \ No newline at end of file diff --git a/preprocess.py b/prebuild.py similarity index 100% rename from preprocess.py rename to prebuild.py diff --git a/src/ceval.cpp b/src/ceval.cpp index a8e889cd..5d0eda8f 100644 --- a/src/ceval.cpp +++ b/src/ceval.cpp @@ -1,6 +1,4 @@ -#include "pocketpy/common.h" -#include "pocketpy/namedict.h" -#include "pocketpy/vm.h" +#include "pocketpy/ceval.h" namespace pkpy{ diff --git a/src/cffi.cpp b/src/cffi.cpp new file mode 100644 index 00000000..1d99237b --- /dev/null +++ b/src/cffi.cpp @@ -0,0 +1,348 @@ +#include "pocketpy/cffi.h" + +namespace pkpy{ + + void VoidP::_register(VM* vm, PyObject* mod, PyObject* type){ + vm->bind_default_constructor(type); + + vm->bind_func<1>(type, "from_hex", [](VM* vm, ArgsView args){ + std::string s = CAST(Str&, args[0]).str(); + size_t size; + intptr_t ptr = std::stoll(s, &size, 16); + if(size != s.size()) vm->ValueError("invalid literal for void_p(): " + s); + return VAR_T(VoidP, (void*)ptr); + }); + vm->bind_method<0>(type, "hex", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + return VAR(self.hex()); + }); + + vm->bind__repr__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj){ + VoidP& self = _CAST(VoidP&, obj); + std::stringstream ss; + ss << ""; + return VAR(ss.str()); + }); + +#define BIND_CMP(name, op) \ + vm->bind##name(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ \ + if(!is_non_tagged_type(rhs, VoidP::_type(vm))) return vm->NotImplemented; \ + return VAR(_CAST(VoidP&, lhs) op _CAST(VoidP&, rhs)); \ + }); + + BIND_CMP(__eq__, ==) + BIND_CMP(__lt__, <) + BIND_CMP(__le__, <=) + BIND_CMP(__gt__, >) + BIND_CMP(__ge__, >=) + +#undef BIND_CMP + + vm->bind__hash__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj){ + VoidP& self = _CAST(VoidP&, obj); + return reinterpret_cast(self.ptr); + }); + + vm->bind_method<1>(type, "set_base_offset", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + if(is_non_tagged_type(args[1], vm->tp_str)){ + const Str& type = _CAST(Str&, args[1]); + self.base_offset = c99_sizeof(vm, type); + }else{ + self.base_offset = CAST(int, args[1]); + } + return vm->None; + }); + + vm->bind_method<0>(type, "get_base_offset", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + return VAR(self.base_offset); + }); + + vm->bind_method<1>(type, "offset", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + i64 offset = CAST(i64, args[1]); + return VAR_T(VoidP, (char*)self.ptr + offset * self.base_offset); + }); + + vm->bind__add__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ + VoidP& self = _CAST(VoidP&, lhs); + i64 offset = CAST(i64, rhs); + return VAR_T(VoidP, (char*)self.ptr + offset); + }); + + vm->bind__sub__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ + VoidP& self = _CAST(VoidP&, lhs); + i64 offset = CAST(i64, rhs); + return VAR_T(VoidP, (char*)self.ptr - offset); + }); + +#define BIND_SETGET(T, name) \ + vm->bind_method<0>(type, "read_" name, [](VM* vm, ArgsView args){ \ + VoidP& self = _CAST(VoidP&, args[0]); \ + return VAR(*(T*)self.ptr); \ + }); \ + vm->bind_method<1>(type, "write_" name, [](VM* vm, ArgsView args){ \ + VoidP& self = _CAST(VoidP&, args[0]); \ + *(T*)self.ptr = CAST(T, args[1]); \ + return vm->None; \ + }); + + BIND_SETGET(char, "char") + BIND_SETGET(unsigned char, "uchar") + BIND_SETGET(short, "short") + BIND_SETGET(unsigned short, "ushort") + BIND_SETGET(int, "int") + BIND_SETGET(unsigned int, "uint") + BIND_SETGET(long, "long") + BIND_SETGET(unsigned long, "ulong") + BIND_SETGET(long long, "longlong") + BIND_SETGET(unsigned long long, "ulonglong") + BIND_SETGET(float, "float") + BIND_SETGET(double, "double") + BIND_SETGET(bool, "bool") + BIND_SETGET(void*, "void_p") + + vm->bind_method<1>(type, "read_bytes", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + i64 size = CAST(i64, args[1]); + std::vector buffer(size); + memcpy(buffer.data(), self.ptr, size); + return VAR(Bytes(std::move(buffer))); + }); + + vm->bind_method<1>(type, "write_bytes", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + Bytes& bytes = CAST(Bytes&, args[1]); + memcpy(self.ptr, bytes.data(), bytes.size()); + return vm->None; + }); + +#undef BIND_SETGET + } + + void C99Struct::_register(VM* vm, PyObject* mod, PyObject* type){ + vm->bind_constructor<-1>(type, [](VM* vm, ArgsView args){ + if(args.size() == 1+1){ + if(is_int(args[1])){ + int size = _CAST(int, args[1]); + return VAR_T(C99Struct, size); + } + if(is_non_tagged_type(args[1], vm->tp_str)){ + const Str& s = _CAST(Str&, args[1]); + return VAR_T(C99Struct, (void*)s.data, s.size); + } + if(is_non_tagged_type(args[1], vm->tp_bytes)){ + const Bytes& b = _CAST(Bytes&, args[1]); + return VAR_T(C99Struct, (void*)b.data(), b.size()); + } + vm->TypeError("expected int, str or bytes"); + return vm->None; + } + if(args.size() == 1+2){ + void* p = CAST(void*, args[1]); + int size = CAST(int, args[2]); + return VAR_T(C99Struct, p, size); + } + vm->TypeError("expected 1 or 2 arguments"); + return vm->None; + }); + + vm->bind_method<0>(type, "addr", [](VM* vm, ArgsView args){ + C99Struct& self = _CAST(C99Struct&, args[0]); + return VAR_T(VoidP, self.p); + }); + + vm->bind_method<0>(type, "size", [](VM* vm, ArgsView args){ + C99Struct& self = _CAST(C99Struct&, args[0]); + return VAR(self.size); + }); + + vm->bind_method<0>(type, "copy", [](VM* vm, ArgsView args){ + const C99Struct& self = _CAST(C99Struct&, args[0]); + return VAR_T(C99Struct, self); + }); + + vm->bind_method<0>(type, "to_string", [](VM* vm, ArgsView args){ + C99Struct& self = _CAST(C99Struct&, args[0]); + return VAR(Str(self.p, self.size)); + }); + + vm->bind_method<0>(type, "to_bytes", [](VM* vm, ArgsView args){ + C99Struct& self = _CAST(C99Struct&, args[0]); + std::vector buffer(self.size); + memcpy(buffer.data(), self.p, self.size); + return VAR(Bytes(std::move(buffer))); + }); + + vm->bind__eq__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* lhs, PyObject* rhs){ + C99Struct& self = _CAST(C99Struct&, lhs); + if(!is_non_tagged_type(rhs, C99Struct::_type(vm))) return vm->NotImplemented; + C99Struct& other = _CAST(C99Struct&, rhs); + bool ok = self.size == other.size && memcmp(self.p, other.p, self.size) == 0; + return VAR(ok); + }); + +#define BIND_SETGET(T, name) \ + vm->bind(type, "read_" name "(self, offset=0)", [](VM* vm, ArgsView args){ \ + C99Struct& self = _CAST(C99Struct&, args[0]); \ + i64 offset = CAST(i64, args[1]); \ + void* ptr = self.p + offset; \ + return VAR(*(T*)ptr); \ + }); \ + vm->bind(type, "write_" name "(self, value, offset=0)", [](VM* vm, ArgsView args){ \ + C99Struct& self = _CAST(C99Struct&, args[0]); \ + i64 offset = CAST(i64, args[2]); \ + void* ptr = self.p + offset; \ + *(T*)ptr = CAST(T, args[1]); \ + return vm->None; \ + }); + + BIND_SETGET(char, "char") + BIND_SETGET(unsigned char, "uchar") + BIND_SETGET(short, "short") + BIND_SETGET(unsigned short, "ushort") + BIND_SETGET(int, "int") + BIND_SETGET(unsigned int, "uint") + BIND_SETGET(long, "long") + BIND_SETGET(unsigned long, "ulong") + BIND_SETGET(long long, "longlong") + BIND_SETGET(unsigned long long, "ulonglong") + BIND_SETGET(float, "float") + BIND_SETGET(double, "double") + BIND_SETGET(bool, "bool") + BIND_SETGET(void*, "void_p") +#undef BIND_SETGET + + // patch VoidP + type = vm->_t(VoidP::_type(vm)); + + vm->bind_method<1>(type, "read_struct", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + const Str& type = CAST(Str&, args[1]); + int size = c99_sizeof(vm, type); + return VAR_T(C99Struct, self.ptr, size); + }); + + vm->bind_method<1>(type, "write_struct", [](VM* vm, ArgsView args){ + VoidP& self = _CAST(VoidP&, args[0]); + C99Struct& other = CAST(C99Struct&, args[1]); + memcpy(self.ptr, other.p, other.size); + return vm->None; + }); + } + + void C99ReflType::_register(VM* vm, PyObject* mod, PyObject* type){ + vm->bind_notimplemented_constructor(type); + + vm->bind_method<0>(type, "__call__", [](VM* vm, ArgsView args){ + C99ReflType& self = _CAST(C99ReflType&, args[0]); + return VAR_T(C99Struct, nullptr, self.size); + }); + + vm->bind_method<0>(type, "__repr__", [](VM* vm, ArgsView args){ + C99ReflType& self = _CAST(C99ReflType&, args[0]); + return VAR(""); + }); + + vm->bind_method<0>(type, "name", [](VM* vm, ArgsView args){ + C99ReflType& self = _CAST(C99ReflType&, args[0]); + return VAR(self.name); + }); + + vm->bind_method<0>(type, "size", [](VM* vm, ArgsView args){ + C99ReflType& self = _CAST(C99ReflType&, args[0]); + return VAR(self.size); + }); + + vm->bind__getitem__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj, PyObject* key){ + C99ReflType& self = _CAST(C99ReflType&, obj); + const Str& name = CAST(Str&, key); + auto it = std::lower_bound(self.fields.begin(), self.fields.end(), name.sv()); + if(it == self.fields.end() || it->name != name.sv()){ + vm->KeyError(key); + return vm->None; + } + return VAR(it->offset); + }); + } + +void add_module_c(VM* vm){ + PyObject* mod = vm->new_module("c"); + + vm->bind_func<1>(mod, "malloc", [](VM* vm, ArgsView args){ + i64 size = CAST(i64, args[0]); + return VAR(malloc(size)); + }); + + vm->bind_func<1>(mod, "free", [](VM* vm, ArgsView args){ + void* p = CAST(void*, args[0]); + free(p); + return vm->None; + }); + + vm->bind_func<1>(mod, "sizeof", [](VM* vm, ArgsView args){ + const Str& type = CAST(Str&, args[0]); + i64 size = c99_sizeof(vm, type); + return VAR(size); + }); + + vm->bind_func<1>(mod, "refl", [](VM* vm, ArgsView args){ + const Str& key = CAST(Str&, args[0]); + auto it = _refl_types.find(key.sv()); + if(it == _refl_types.end()) vm->ValueError("reflection type not found"); + const ReflType& rt = it->second; + return VAR_T(C99ReflType, rt); + }); + + vm->bind_func<3>(mod, "memset", [](VM* vm, ArgsView args){ + void* p = CAST(void*, args[0]); + memset(p, CAST(int, args[1]), CAST(size_t, args[2])); + return vm->None; + }); + + vm->bind_func<3>(mod, "memcpy", [](VM* vm, ArgsView args){ + void* dst = CAST(void*, args[0]); + void* src = CAST(void*, args[1]); + i64 size = CAST(i64, args[2]); + memcpy(dst, src, size); + return vm->None; + }); + + VoidP::register_class(vm, mod); + C99Struct::register_class(vm, mod); + C99ReflType::register_class(vm, mod); + mod->attr().set("NULL", VAR_T(VoidP, nullptr)); + + add_refl_type("char", sizeof(char), {}); + add_refl_type("uchar", sizeof(unsigned char), {}); + add_refl_type("short", sizeof(short), {}); + add_refl_type("ushort", sizeof(unsigned short), {}); + add_refl_type("int", sizeof(int), {}); + add_refl_type("uint", sizeof(unsigned int), {}); + add_refl_type("long", sizeof(long), {}); + add_refl_type("ulong", sizeof(unsigned long), {}); + add_refl_type("longlong", sizeof(long long), {}); + add_refl_type("ulonglong", sizeof(unsigned long long), {}); + add_refl_type("float", sizeof(float), {}); + add_refl_type("double", sizeof(double), {}); + add_refl_type("bool", sizeof(bool), {}); + add_refl_type("void_p", sizeof(void*), {}); + + PyObject* void_p_t = mod->attr("void_p"); + for(const char* t: {"char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "longlong", "ulonglong", "float", "double", "bool"}){ + mod->attr().set(Str(t) + "_", VAR_T(C99ReflType, _refl_types[t])); + mod->attr().set(Str(t) + "_p", void_p_t); + } +} + +int c99_sizeof(VM* vm, const Str& type){ + auto it = _refl_types.find(type.sv()); + if(it != _refl_types.end()) return it->second.size; + vm->ValueError("not a valid c99 type"); + return 0; +} + +} // namespace pkpy \ No newline at end of file diff --git a/src/compiler.cpp b/src/compiler.cpp new file mode 100644 index 00000000..458c44a2 --- /dev/null +++ b/src/compiler.cpp @@ -0,0 +1,1047 @@ +#include "pocketpy/compiler.h" + +namespace pkpy{ + + NameScope Compiler::name_scope() const { + auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; + if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN; + return s; + } + + CodeObject_ Compiler::push_global_context(){ + CodeObject_ co = make_sp(lexer->src, lexer->src->filename); + contexts.push(CodeEmitContext(vm, co, contexts.size())); + return co; + } + + FuncDecl_ Compiler::push_f_context(Str name){ + FuncDecl_ decl = make_sp(); + decl->code = make_sp(lexer->src, name); + decl->nested = name_scope() == NAME_LOCAL; + contexts.push(CodeEmitContext(vm, decl->code, contexts.size())); + return decl; + } + + void Compiler::pop_context(){ + if(!ctx()->s_expr.empty()){ + throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr()); + } + // add a `return None` in the end as a guard + // previously, we only do this if the last opcode is not a return + // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return + ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + // ctx()->co->optimize(vm); + if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){ + SyntaxError("maximum number of local variables exceeded"); + } + contexts.pop(); + } + + void Compiler::init_pratt_rules(){ + if(rules[TK(".")].precedence != PREC_NONE) return; +// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ +#define METHOD(name) &Compiler::name +#define NO_INFIX nullptr, PREC_NONE + for(TokenIndex i=0; i")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; + rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; + rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; + rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR }; + rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR }; + rules[TK("@")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; + rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; + rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; + rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; + rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; + rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; + rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; + rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; + rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; + rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; + rules[TK("@long")] = { METHOD(exprLong), NO_INFIX }; +#undef METHOD +#undef NO_INFIX + } + + bool Compiler::match(TokenIndex expected) { + if (curr().type != expected) return false; + advance(); + return true; + } + + void Compiler::consume(TokenIndex expected) { + if (!match(expected)){ + SyntaxError( + fmt("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'") + ); + } + } + + bool Compiler::match_newlines_repl(){ + return match_newlines(mode()==REPL_MODE); + } + + bool Compiler::match_newlines(bool repl_throw) { + bool consumed = false; + if (curr().type == TK("@eol")) { + while (curr().type == TK("@eol")) advance(); + consumed = true; + } + if (repl_throw && curr().type == TK("@eof")){ + throw NeedMoreLines(ctx()->is_compiling_class); + } + return consumed; + } + + bool Compiler::match_end_stmt() { + if (match(TK(";"))) { match_newlines(); return true; } + if (match_newlines() || curr().type == TK("@eof")) return true; + if (curr().type == TK("@dedent")) return true; + return false; + } + + void Compiler::consume_end_stmt() { + if (!match_end_stmt()) SyntaxError("expected statement end"); + } + + void Compiler::EXPR(bool push_stack) { + parse_expression(PREC_TUPLE+1, push_stack); + } + + void Compiler::EXPR_TUPLE(bool push_stack) { + parse_expression(PREC_TUPLE, push_stack); + } + + // special case for `for loop` and `comp` + Expr_ Compiler::EXPR_VARS(){ + std::vector items; + do { + consume(TK("@id")); + items.push_back(make_expr(prev().str(), name_scope())); + } while(match(TK(","))); + if(items.size()==1) return std::move(items[0]); + return make_expr(std::move(items)); + } + + void Compiler::exprLiteral(){ + ctx()->s_expr.push(make_expr(prev().value)); + } + + void Compiler::exprLong(){ + ctx()->s_expr.push(make_expr(prev().str())); + } + + void Compiler::exprFString(){ + ctx()->s_expr.push(make_expr(std::get(prev().value))); + } + + void Compiler::exprLambda(){ + FuncDecl_ decl = push_f_context(""); + auto e = make_expr(decl); + if(!match(TK(":"))){ + _compile_f_args(e->decl, false); + consume(TK(":")); + } + // https://github.com/blueloveTH/pocketpy/issues/37 + parse_expression(PREC_LAMBDA + 1, false); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + pop_context(); + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprTuple(){ + std::vector items; + items.push_back(ctx()->s_expr.popx()); + do { + if(curr().brackets_level) match_newlines_repl(); + if(!is_expression()) break; + EXPR(); + items.push_back(ctx()->s_expr.popx()); + if(curr().brackets_level) match_newlines_repl(); + } while(match(TK(","))); + ctx()->s_expr.push(make_expr( + std::move(items) + )); + } + + void Compiler::exprOr(){ + auto e = make_expr(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_OR + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprAnd(){ + auto e = make_expr(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_AND + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprTernary(){ + auto e = make_expr(); + e->true_expr = ctx()->s_expr.popx(); + // cond + parse_expression(PREC_TERNARY + 1); + e->cond = ctx()->s_expr.popx(); + consume(TK("else")); + // if false + parse_expression(PREC_TERNARY + 1); + e->false_expr = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprBinaryOp(){ + auto e = make_expr(); + e->op = prev().type; + e->lhs = ctx()->s_expr.popx(); + parse_expression(rules[e->op].precedence + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprNot() { + parse_expression(PREC_LOGICAL_NOT + 1); + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); + } + + void Compiler::exprUnaryOp(){ + TokenIndex op = prev().type; + parse_expression(PREC_UNARY + 1); + switch(op){ + case TK("-"): + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); + break; + case TK("*"): + ctx()->s_expr.push(make_expr(1, ctx()->s_expr.popx())); + break; + case TK("**"): + ctx()->s_expr.push(make_expr(2, ctx()->s_expr.popx())); + break; + default: FATAL_ERROR(); + } + } + + void Compiler::exprGroup(){ + match_newlines_repl(); + EXPR_TUPLE(); // () is just for change precedence + match_newlines_repl(); + consume(TK(")")); + if(ctx()->s_expr.top()->is_tuple()) return; + Expr_ g = make_expr(ctx()->s_expr.popx()); + ctx()->s_expr.push(std::move(g)); + } + + void Compiler::exprList() { + int line = prev().line; + std::vector items; + do { + match_newlines_repl(); + if (curr().type == TK("]")) break; + EXPR(); + items.push_back(ctx()->s_expr.popx()); + match_newlines_repl(); + if(items.size()==1 && match(TK("for"))){ + _consume_comp(std::move(items[0])); + consume(TK("]")); + return; + } + match_newlines_repl(); + } while (match(TK(","))); + consume(TK("]")); + auto e = make_expr(std::move(items)); + e->line = line; // override line + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprMap() { + bool parsing_dict = false; // {...} may be dict or set + std::vector items; + do { + match_newlines_repl(); + if (curr().type == TK("}")) break; + EXPR(); + int star_level = ctx()->s_expr.top()->star_level(); + if(star_level==2 || curr().type == TK(":")){ + parsing_dict = true; + } + if(parsing_dict){ + auto dict_item = make_expr(); + if(star_level == 2){ + dict_item->key = nullptr; + dict_item->value = ctx()->s_expr.popx(); + }else{ + consume(TK(":")); + EXPR(); + dict_item->key = ctx()->s_expr.popx(); + dict_item->value = ctx()->s_expr.popx(); + } + items.push_back(std::move(dict_item)); + }else{ + items.push_back(ctx()->s_expr.popx()); + } + match_newlines_repl(); + if(items.size()==1 && match(TK("for"))){ + if(parsing_dict) _consume_comp(std::move(items[0])); + else _consume_comp(std::move(items[0])); + consume(TK("}")); + return; + } + match_newlines_repl(); + } while (match(TK(","))); + consume(TK("}")); + if(items.size()==0 || parsing_dict){ + auto e = make_expr(std::move(items)); + ctx()->s_expr.push(std::move(e)); + }else{ + auto e = make_expr(std::move(items)); + ctx()->s_expr.push(std::move(e)); + } + } + + void Compiler::exprCall() { + auto e = make_expr(); + e->callable = ctx()->s_expr.popx(); + do { + match_newlines_repl(); + if (curr().type==TK(")")) break; + if(curr().type==TK("@id") && next().type==TK("=")) { + consume(TK("@id")); + Str key = prev().str(); + consume(TK("=")); + EXPR(); + e->kwargs.push_back({key, ctx()->s_expr.popx()}); + } else{ + EXPR(); + if(ctx()->s_expr.top()->star_level() == 2){ + // **kwargs + e->kwargs.push_back({"**", ctx()->s_expr.popx()}); + }else{ + // positional argument + if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument"); + e->args.push_back(ctx()->s_expr.popx()); + } + } + match_newlines_repl(); + } while (match(TK(","))); + consume(TK(")")); + if(e->args.size() > 32767) SyntaxError("too many positional arguments"); + if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments"); + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprName(){ + Str name = prev().str(); + NameScope scope = name_scope(); + if(ctx()->global_names.count(name)){ + scope = NAME_GLOBAL; + } + ctx()->s_expr.push(make_expr(name, scope)); + } + + void Compiler::exprAttrib() { + consume(TK("@id")); + ctx()->s_expr.push( + make_expr(ctx()->s_expr.popx(), prev().str()) + ); + } + + void Compiler::exprSubscr() { + auto e = make_expr(); + e->a = ctx()->s_expr.popx(); + auto slice = make_expr(); + bool is_slice = false; + // a[<0> : state<3> : state<5>] + int state = 0; + do{ + switch(state){ + case 0: + if(match(TK(":"))){ + is_slice=true; + state=2; + break; + } + if(match(TK("]"))) SyntaxError(); + EXPR_TUPLE(); + slice->start = ctx()->s_expr.popx(); + state=1; + break; + case 1: + if(match(TK(":"))){ + is_slice=true; + state=2; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + SyntaxError("expected ':' or ']'"); + break; + case 2: + if(match(TK(":"))){ + state=4; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + EXPR_TUPLE(); + slice->stop = ctx()->s_expr.popx(); + state=3; + break; + case 3: + if(match(TK(":"))){ + state=4; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + SyntaxError("expected ':' or ']'"); + break; + case 4: + if(match(TK("]"))) goto __SUBSCR_END; + EXPR_TUPLE(); + slice->step = ctx()->s_expr.popx(); + state=5; + break; + case 5: consume(TK("]")); goto __SUBSCR_END; + } + }while(true); +__SUBSCR_END: + if(is_slice){ + e->b = std::move(slice); + }else{ + if(state != 1) FATAL_ERROR(); + e->b = std::move(slice->start); + } + ctx()->s_expr.push(std::move(e)); + } + + void Compiler::exprLiteral0() { + ctx()->s_expr.push(make_expr(prev().type)); + } + + void Compiler::compile_block_body() { + consume(TK(":")); + if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ + compile_stmt(); // inline block + return; + } + if(!match_newlines(mode()==REPL_MODE)){ + SyntaxError("expected a new line after ':'"); + } + consume(TK("@indent")); + while (curr().type != TK("@dedent")) { + match_newlines(); + compile_stmt(); + match_newlines(); + } + consume(TK("@dedent")); + } + + Str Compiler::_compile_import() { + if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope"); + Opcode op = OP_IMPORT_NAME; + if(match(TK("."))) op = OP_IMPORT_NAME_REL; + consume(TK("@id")); + Str name = prev().str(); + ctx()->emit(op, StrName(name).index, prev().line); + return name; + } + + // import a as b + void Compiler::compile_normal_import() { + do { + Str name = _compile_import(); + if (match(TK("as"))) { + consume(TK("@id")); + name = prev().str(); + } + ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line); + } while (match(TK(","))); + consume_end_stmt(); + } + + // from a import b as c, d as e + void Compiler::compile_from_import() { + _compile_import(); + consume(TK("import")); + if (match(TK("*"))) { + ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line); + consume_end_stmt(); + return; + } + do { + ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); + consume(TK("@id")); + Str name = prev().str(); + ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line); + if (match(TK("as"))) { + consume(TK("@id")); + name = prev().str(); + } + ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line); + } while (match(TK(","))); + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); + consume_end_stmt(); + } + + bool Compiler::is_expression(){ + PrattCallback prefix = rules[curr().type].prefix; + return prefix != nullptr; + } + + void Compiler::parse_expression(int precedence, bool push_stack) { + PrattCallback prefix = rules[curr().type].prefix; + if (prefix == nullptr) SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type)); + advance(); + (this->*prefix)(); + while (rules[curr().type].precedence >= precedence) { + TokenIndex op = curr().type; + advance(); + PrattCallback infix = rules[op].infix; + PK_ASSERT(infix != nullptr); + (this->*infix)(); + } + if(!push_stack) ctx()->emit_expr(); + } + + void Compiler::compile_if_stmt() { + EXPR(false); // condition + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); + compile_block_body(); + if (match(TK("elif"))) { + int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(patch); + compile_if_stmt(); + ctx()->patch_jump(exit_patch); + } else if (match(TK("else"))) { + int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(patch); + compile_block_body(); + ctx()->patch_jump(exit_patch); + } else { + ctx()->patch_jump(patch); + } + } + + void Compiler::compile_while_loop() { + ctx()->enter_block(WHILE_LOOP); + EXPR(false); // condition + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); + compile_block_body(); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx()->patch_jump(patch); + ctx()->exit_block(); + } + + void Compiler::compile_for_loop() { + Expr_ vars = EXPR_VARS(); + consume(TK("in")); + EXPR_TUPLE(false); + ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); + ctx()->enter_block(FOR_LOOP); + ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + bool ok = vars->emit_store(ctx()); + if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind + compile_block_body(); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx()->exit_block(); + } + + void Compiler::compile_try_except() { + ctx()->enter_block(TRY_EXCEPT); + compile_block_body(); + std::vector patches = { + ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) + }; + ctx()->exit_block(); + do { + consume(TK("except")); + if(match(TK("@id"))){ + ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line); + }else{ + ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE); + } + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + // pop the exception on match + ctx()->emit(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE); + compile_block_body(); + patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)); + ctx()->patch_jump(patch); + }while(curr().type == TK("except")); + // no match, re-raise + ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE); + for (int patch : patches) ctx()->patch_jump(patch); + } + + void Compiler::compile_decorated(){ + std::vector decorators; + do{ + EXPR(); + decorators.push_back(ctx()->s_expr.popx()); + if(!match_newlines_repl()) SyntaxError(); + }while(match(TK("@"))); + consume(TK("def")); + compile_function(decorators); + } + + bool Compiler::try_compile_assignment(){ + switch (curr().type) { + case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): + case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { + Expr* lhs_p = ctx()->s_expr.top().get(); + if(lhs_p->is_starred()) SyntaxError(); + if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition"); + advance(); + auto e = make_expr(); + e->op = prev().type - 1; // -1 to remove = + e->lhs = ctx()->s_expr.popx(); + EXPR_TUPLE(); + e->rhs = ctx()->s_expr.popx(); + if(e->is_starred()) SyntaxError(); + e->emit(ctx()); + bool ok = lhs_p->emit_store(ctx()); + if(!ok) SyntaxError(); + } return true; + case TK("="): { + int n = 0; + while(match(TK("="))){ + EXPR_TUPLE(); + Expr* _tp = ctx()->s_expr.top().get(); + if(ctx()->is_compiling_class && _tp->is_tuple()){ + SyntaxError("can't use unpack tuple in class definition"); + } + n += 1; + } + if(ctx()->is_compiling_class && n>1){ + SyntaxError("can't assign to multiple targets in class definition"); + } + // stack size is n+1 + Expr_ val = ctx()->s_expr.popx(); + val->emit(ctx()); + for(int j=1; jemit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); + for(int j=0; js_expr.popx(); + if(e->is_starred()) SyntaxError(); + bool ok = e->emit_store(ctx()); + if(!ok) SyntaxError(); + } + } return true; + default: return false; + } + } + + void Compiler::compile_stmt() { + advance(); + int kw_line = prev().line; // backup line number + switch(prev().type){ + case TK("break"): + if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); + ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("continue"): + if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("yield"): + if (contexts.size() <= 1) SyntaxError("'yield' outside function"); + EXPR_TUPLE(false); + // if yield present, mark the function as generator + ctx()->co->is_generator = true; + ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("yield from"): + if (contexts.size() <= 1) SyntaxError("'yield from' outside function"); + EXPR_TUPLE(false); + // if yield from present, mark the function as generator + ctx()->co->is_generator = true; + ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line); + ctx()->enter_block(FOR_LOOP); + ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx()->exit_block(); + consume_end_stmt(); + break; + case TK("return"): + if (contexts.size() <= 1) SyntaxError("'return' outside function"); + if(match_end_stmt()){ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); + }else{ + EXPR_TUPLE(false); + consume_end_stmt(); + } + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line); + break; + /*************************************************/ + case TK("if"): compile_if_stmt(); break; + case TK("while"): compile_while_loop(); break; + case TK("for"): compile_for_loop(); break; + case TK("import"): compile_normal_import(); break; + case TK("from"): compile_from_import(); break; + case TK("def"): compile_function(); break; + case TK("@"): compile_decorated(); break; + case TK("try"): compile_try_except(); break; + case TK("pass"): consume_end_stmt(); break; + /*************************************************/ + case TK("++"):{ + consume(TK("@id")); + StrName name(prev().sv()); + switch(name_scope()){ + case NAME_LOCAL: + ctx()->emit(OP_INC_FAST, ctx()->add_varname(name), prev().line); + break; + case NAME_GLOBAL: + ctx()->emit(OP_INC_GLOBAL, name.index, prev().line); + break; + default: SyntaxError(); break; + } + consume_end_stmt(); + break; + } + case TK("--"):{ + consume(TK("@id")); + StrName name(prev().sv()); + switch(name_scope()){ + case NAME_LOCAL: + ctx()->emit(OP_DEC_FAST, ctx()->add_varname(name), prev().line); + break; + case NAME_GLOBAL: + ctx()->emit(OP_DEC_GLOBAL, name.index, prev().line); + break; + default: SyntaxError(); break; + } + consume_end_stmt(); + break; + } + case TK("assert"): + EXPR_TUPLE(false); + ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("global"): + do { + consume(TK("@id")); + ctx()->global_names.insert(prev().str()); + } while (match(TK(","))); + consume_end_stmt(); + break; + case TK("raise"): { + consume(TK("@id")); + int dummy_t = StrName(prev().str()).index; + if(match(TK("(")) && !match(TK(")"))){ + EXPR(false); consume(TK(")")); + }else{ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); + } + ctx()->emit(OP_RAISE, dummy_t, kw_line); + consume_end_stmt(); + } break; + case TK("del"): { + EXPR_TUPLE(); + Expr_ e = ctx()->s_expr.popx(); + bool ok = e->emit_del(ctx()); + if(!ok) SyntaxError(); + consume_end_stmt(); + } break; + case TK("with"): { + EXPR(false); + consume(TK("as")); + consume(TK("@id")); + Expr_ e = make_expr(prev().str(), name_scope()); + bool ok = e->emit_store(ctx()); + if(!ok) SyntaxError(); + e->emit(ctx()); + ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line); + compile_block_body(); + e->emit(ctx()); + ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line); + } break; + /*************************************************/ + case TK("$label"): { + if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); + consume(TK("@id")); + bool ok = ctx()->add_label(prev().str()); + if(!ok) SyntaxError("label " + prev().str().escape() + " already exists"); + consume_end_stmt(); + } break; + case TK("$goto"): + if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); + consume(TK("@id")); + ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line); + consume_end_stmt(); + break; + /*************************************************/ + // handle dangling expression or assignment + default: { + advance(-1); // do revert since we have pre-called advance() at the beginning + EXPR_TUPLE(); + // eat variable's type hint + if(match(TK(":"))) consume_type_hints(); + if(!try_compile_assignment()){ + if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){ + SyntaxError(); + } + ctx()->emit_expr(); + if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){ + ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); + }else{ + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); + } + } + consume_end_stmt(); + } + } + } + + void Compiler::consume_type_hints(){ + EXPR(); + ctx()->s_expr.pop(); + } + + void Compiler::compile_class(){ + consume(TK("@id")); + int namei = StrName(prev().str()).index; + int super_namei = -1; + if(match(TK("("))){ + if(match(TK("@id"))){ + super_namei = StrName(prev().str()).index; + } + consume(TK(")")); + } + if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); + else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line); + ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE); + ctx()->is_compiling_class = true; + compile_block_body(); + ctx()->is_compiling_class = false; + ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE); + } + + void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){ + int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs + do { + if(state > 3) SyntaxError(); + if(state == 3) SyntaxError("**kwargs should be the last argument"); + match_newlines(); + if(match(TK("*"))){ + if(state < 1) state = 1; + else SyntaxError("*args should be placed before **kwargs"); + } + else if(match(TK("**"))){ + state = 3; + } + consume(TK("@id")); + StrName name = prev().str(); + + // check duplicate argument name + for(int j: decl->args){ + if(decl->code->varnames[j] == name) { + SyntaxError("duplicate argument name"); + } + } + for(auto& kv: decl->kwargs){ + if(decl->code->varnames[kv.key] == name){ + SyntaxError("duplicate argument name"); + } + } + if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){ + SyntaxError("duplicate argument name"); + } + if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){ + SyntaxError("duplicate argument name"); + } + + // eat type hints + if(enable_type_hints && match(TK(":"))) consume_type_hints(); + if(state == 0 && curr().type == TK("=")) state = 2; + int index = ctx()->add_varname(name); + switch (state) + { + case 0: + decl->args.push_back(index); + break; + case 1: + decl->starred_arg = index; + state+=1; + break; + case 2: { + consume(TK("=")); + PyObject* value = read_literal(); + if(value == nullptr){ + SyntaxError(Str("default argument must be a literal")); + } + decl->kwargs.push_back(FuncDecl::KwArg{index, value}); + } break; + case 3: + decl->starred_kwarg = index; + state+=1; + break; + } + } while (match(TK(","))); + } + + void Compiler::compile_function(const std::vector& decorators){ + const char* _start = curr().start; + consume(TK("@id")); + Str decl_name = prev().str(); + FuncDecl_ decl = push_f_context(decl_name); + consume(TK("(")); + if (!match(TK(")"))) { + _compile_f_args(decl, true); + consume(TK(")")); + } + if(match(TK("->"))) consume_type_hints(); + const char* _end = curr().start; + decl->signature = Str(_start, _end-_start); + compile_block_body(); + pop_context(); + + PyObject* docstring = nullptr; + if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){ + PyObject* c = decl->code->consts[decl->code->codes[0].arg]; + if(is_type(c, vm->tp_str)){ + decl->code->codes[0].op = OP_NO_OP; + decl->code->codes[1].op = OP_NO_OP; + docstring = c; + } + } + if(docstring != nullptr){ + decl->docstring = PK_OBJ_GET(Str, docstring); + } + ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line); + + // add decorators + for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){ + (*it)->emit(ctx()); + ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line); + ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_CALL, 1, (*it)->line); + } + if(!ctx()->is_compiling_class){ + auto e = make_expr(decl_name, name_scope()); + e->emit_store(ctx()); + }else{ + int index = StrName(decl_name).index; + ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line); + } + } + + PyObject* Compiler::to_object(const TokenValue& value){ + PyObject* obj = nullptr; + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + if(obj == nullptr) FATAL_ERROR(); + return obj; + } + + PyObject* Compiler::read_literal(){ + advance(); + switch(prev().type){ + case TK("-"): { + consume(TK("@num")); + PyObject* val = to_object(prev().value); + return vm->py_negate(val); + } + case TK("@num"): return to_object(prev().value); + case TK("@str"): return to_object(prev().value); + case TK("True"): return VAR(true); + case TK("False"): return VAR(false); + case TK("None"): return vm->None; + case TK("..."): return vm->Ellipsis; + default: break; + } + return nullptr; + } + + Compiler::Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope){ + this->vm = vm; + this->used = false; + this->unknown_global_scope = unknown_global_scope; + this->lexer = std::make_unique( + make_sp(source, filename, mode) + ); + init_pratt_rules(); + } + + + CodeObject_ Compiler::compile(){ + if(used) FATAL_ERROR(); + used = true; + + tokens = lexer->run(); + // if(lexer->src->filename == ""){ + // for(auto& t: tokens) std::cout << t.info() << std::endl; + // } + + CodeObject_ code = push_global_context(); + + advance(); // skip @sof, so prev() is always valid + match_newlines(); // skip possible leading '\n' + + if(mode()==EVAL_MODE) { + EXPR_TUPLE(false); + consume(TK("@eof")); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + pop_context(); + return code; + }else if(mode()==JSON_MODE){ + EXPR(); + Expr_ e = ctx()->s_expr.popx(); + if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array"); + consume(TK("@eof")); + e->emit(ctx()); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + pop_context(); + return code; + } + + while (!match(TK("@eof"))) { + if (match(TK("class"))) { + compile_class(); + } else { + compile_stmt(); + } + match_newlines(); + } + pop_context(); + return code; + } + +} // namespace pkpy \ No newline at end of file diff --git a/src/easing.cpp b/src/easing.cpp index 816abe46..e5df787b 100644 --- a/src/easing.cpp +++ b/src/easing.cpp @@ -2,6 +2,8 @@ namespace pkpy{ +#if PK_MODULE_EASING + // https://easings.net/ static const double PI = 3.1415926545; @@ -250,4 +252,13 @@ void add_module_easing(VM* vm){ #undef EASE } + +#else + +void add_module_easing(VM* vm){ + PK_UNUSED(vm); +} + +#endif + } // namespace pkpy \ No newline at end of file diff --git a/src/io.cpp b/src/io.cpp new file mode 100644 index 00000000..6f250613 --- /dev/null +++ b/src/io.cpp @@ -0,0 +1,174 @@ +#include "pocketpy/io.h" +#include "pocketpy/common.h" + +namespace pkpy{ + +Bytes _default_import_handler(const Str& name){ +#if PK_ENABLE_OS + std::filesystem::path path(name.sv()); + bool exists = std::filesystem::exists(path); + if(!exists) return Bytes(); + std::string cname = name.str(); + FILE* fp = fopen(cname.c_str(), "rb"); + if(!fp) return Bytes(); + fseek(fp, 0, SEEK_END); + std::vector buffer(ftell(fp)); + fseek(fp, 0, SEEK_SET); + size_t sz = fread(buffer.data(), 1, buffer.size(), fp); + PK_UNUSED(sz); + fclose(fp); + return Bytes(std::move(buffer)); +#else + return Bytes(); +#endif +}; + + +#if PK_ENABLE_OS + void FileIO::_register(VM* vm, PyObject* mod, PyObject* type){ + vm->bind_constructor<3>(type, [](VM* vm, ArgsView args){ + return VAR_T(FileIO, + vm, CAST(Str&, args[1]).str(), CAST(Str&, args[2]).str() + ); + }); + + vm->bind_method<0>(type, "read", [](VM* vm, ArgsView args){ + FileIO& io = CAST(FileIO&, args[0]); + fseek(io.fp, 0, SEEK_END); + std::vector buffer(ftell(io.fp)); + fseek(io.fp, 0, SEEK_SET); + size_t sz = fread(buffer.data(), 1, buffer.size(), io.fp); + PK_UNUSED(sz); + Bytes b(std::move(buffer)); + if(io.is_text()) return VAR(Str(b.str())); + return VAR(std::move(b)); + }); + + vm->bind_method<1>(type, "write", [](VM* vm, ArgsView args){ + FileIO& io = CAST(FileIO&, args[0]); + if(io.is_text()){ + Str& s = CAST(Str&, args[1]); + fwrite(s.data, 1, s.length(), io.fp); + }else{ + Bytes& buffer = CAST(Bytes&, args[1]); + fwrite(buffer.data(), 1, buffer.size(), io.fp); + } + return vm->None; + }); + + vm->bind_method<0>(type, "close", [](VM* vm, ArgsView args){ + FileIO& io = CAST(FileIO&, args[0]); + io.close(); + return vm->None; + }); + + vm->bind_method<0>(type, "__exit__", [](VM* vm, ArgsView args){ + FileIO& io = CAST(FileIO&, args[0]); + io.close(); + return vm->None; + }); + + vm->bind_method<0>(type, "__enter__", PK_LAMBDA(vm->None)); + } + + FileIO::FileIO(VM* vm, std::string file, std::string mode): file(file), mode(mode) { + fp = fopen(file.c_str(), mode.c_str()); + if(!fp) vm->IOError(strerror(errno)); + } + + void FileIO::close(){ + if(fp == nullptr) return; + fclose(fp); + fp = nullptr; + } + +#endif + +void add_module_io(VM* vm){ +#if PK_ENABLE_OS + PyObject* mod = vm->new_module("io"); + FileIO::register_class(vm, mod); + vm->bind_builtin_func<2>("open", [](VM* vm, ArgsView args){ + static StrName m_io("io"); + static StrName m_FileIO("FileIO"); + return vm->call(vm->_modules[m_io]->attr(m_FileIO), args[0], args[1]); + }); +#endif +} + +void add_module_os(VM* vm){ +#if PK_ENABLE_OS + PyObject* mod = vm->new_module("os"); + PyObject* path_obj = vm->heap.gcnew(vm->tp_object, {}); + mod->attr().set("path", path_obj); + + // Working directory is shared by all VMs!! + vm->bind_func<0>(mod, "getcwd", [](VM* vm, ArgsView args){ + return VAR(std::filesystem::current_path().string()); + }); + + vm->bind_func<1>(mod, "chdir", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + std::filesystem::current_path(path); + return vm->None; + }); + + vm->bind_func<1>(mod, "listdir", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + std::filesystem::directory_iterator di; + try{ + di = std::filesystem::directory_iterator(path); + }catch(std::filesystem::filesystem_error& e){ + std::string msg = e.what(); + auto pos = msg.find_last_of(":"); + if(pos != std::string::npos) msg = msg.substr(pos + 1); + vm->IOError(Str(msg).lstrip()); + } + List ret; + for(auto& p: di) ret.push_back(VAR(p.path().filename().string())); + return VAR(ret); + }); + + vm->bind_func<1>(mod, "remove", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + bool ok = std::filesystem::remove(path); + if(!ok) vm->IOError("operation failed"); + return vm->None; + }); + + vm->bind_func<1>(mod, "mkdir", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + bool ok = std::filesystem::create_directory(path); + if(!ok) vm->IOError("operation failed"); + return vm->None; + }); + + vm->bind_func<1>(mod, "rmdir", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + bool ok = std::filesystem::remove(path); + if(!ok) vm->IOError("operation failed"); + return vm->None; + }); + + vm->bind_func<-1>(path_obj, "join", [](VM* vm, ArgsView args){ + std::filesystem::path path; + for(int i=0; ibind_func<1>(path_obj, "exists", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + bool exists = std::filesystem::exists(path); + return VAR(exists); + }); + + vm->bind_func<1>(path_obj, "basename", [](VM* vm, ArgsView args){ + std::filesystem::path path(CAST(Str&, args[0]).sv()); + return VAR(path.filename().string()); + }); +#endif +} + +} // namespace pkpy \ No newline at end of file diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 00000000..bd30e9c0 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,444 @@ +#include "pocketpy/lexer.h" + +namespace pkpy{ + + bool Lexer::match_n_chars(int n, char c0){ + const char* c = curr_char; + for(int i=0; i 0) return true; + int spaces = eat_spaces(); + if(peekchar() == '#') skip_line_comment(); + if(peekchar() == '\0' || peekchar() == '\n') return true; + // https://docs.python.org/3/reference/lexical_analysis.html#indentation + if(spaces > indents.top()){ + indents.push(spaces); + nexts.push_back(Token{TK("@indent"), token_start, 0, current_line, brackets_level}); + } else if(spaces < indents.top()){ + while(spaces < indents.top()){ + indents.pop(); + nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line, brackets_level}); + } + if(spaces != indents.top()){ + return false; + } + } + return true; + } + + char Lexer::eatchar() { + char c = peekchar(); + if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); + curr_char++; + return c; + } + + char Lexer::eatchar_include_newline() { + char c = peekchar(); + curr_char++; + if (c == '\n'){ + current_line++; + src->line_starts.push_back(curr_char); + } + return c; + } + + int Lexer::eat_name() { + curr_char--; + while(true){ + unsigned char c = peekchar(); + int u8bytes = utf8len(c, true); + if(u8bytes == 0) return 1; + if(u8bytes == 1){ + if(isalpha(c) || c=='_' || isdigit(c)) { + curr_char++; + continue; + }else{ + break; + } + } + // handle multibyte char + std::string u8str(curr_char, u8bytes); + if(u8str.size() != u8bytes) return 2; + uint32_t value = 0; + for(int k=0; k < u8bytes; k++){ + uint8_t b = u8str[k]; + if(k==0){ + if(u8bytes == 2) value = (b & 0b00011111) << 6; + else if(u8bytes == 3) value = (b & 0b00001111) << 12; + else if(u8bytes == 4) value = (b & 0b00000111) << 18; + }else{ + value |= (b & 0b00111111) << (6*(u8bytes-k-1)); + } + } + if(is_unicode_Lo_char(value)) curr_char += u8bytes; + else break; + } + + int length = (int)(curr_char - token_start); + if(length == 0) return 3; + std::string_view name(token_start, length); + + if(src->mode == JSON_MODE){ + if(name == "true"){ + add_token(TK("True")); + } else if(name == "false"){ + add_token(TK("False")); + } else if(name == "null"){ + add_token(TK("None")); + } else { + return 4; + } + return 0; + } + + if(kTokenKwMap.count(name)){ + add_token(kTokenKwMap.at(name)); + } else { + add_token(TK("@id")); + } + return 0; + } + + void Lexer::skip_line_comment() { + char c; + while ((c = peekchar()) != '\0') { + if (c == '\n') return; + eatchar(); + } + } + + bool Lexer::matchchar(char c) { + if (peekchar() != c) return false; + eatchar_include_newline(); + return true; + } + + void Lexer::add_token(TokenIndex type, TokenValue value) { + switch(type){ + case TK("{"): case TK("["): case TK("("): brackets_level++; break; + case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; + } + auto token = Token{ + type, + token_start, + (int)(curr_char - token_start), + current_line - ((type == TK("@eol")) ? 1 : 0), + brackets_level, + value + }; + // handle "not in", "is not", "yield from" + if(!nexts.empty()){ + auto& back = nexts.back(); + if(back.type == TK("not") && type == TK("in")){ + back.type = TK("not in"); + return; + } + if(back.type == TK("is") && type == TK("not")){ + back.type = TK("is not"); + return; + } + if(back.type == TK("yield") && type == TK("from")){ + back.type = TK("yield from"); + return; + } + nexts.push_back(token); + } + } + + void Lexer::add_token_2(char c, TokenIndex one, TokenIndex two) { + if (matchchar(c)) add_token(two); + else add_token(one); + } + + Str Lexer::eat_string_until(char quote, bool raw) { + bool quote3 = match_n_chars(2, quote); + std::vector buff; + while (true) { + char c = eatchar_include_newline(); + if (c == quote){ + if(quote3 && !match_n_chars(2, quote)){ + buff.push_back(c); + continue; + } + break; + } + if (c == '\0'){ + if(quote3 && src->mode == REPL_MODE){ + throw NeedMoreLines(false); + } + SyntaxError("EOL while scanning string literal"); + } + if (c == '\n'){ + if(!quote3) SyntaxError("EOL while scanning string literal"); + else{ + buff.push_back(c); + continue; + } + } + if (!raw && c == '\\') { + switch (eatchar_include_newline()) { + case '"': buff.push_back('"'); break; + case '\'': buff.push_back('\''); break; + case '\\': buff.push_back('\\'); break; + case 'n': buff.push_back('\n'); break; + case 'r': buff.push_back('\r'); break; + case 't': buff.push_back('\t'); break; + case 'x': { + char hex[3] = {eatchar(), eatchar(), '\0'}; + size_t parsed; + char code; + try{ + code = (char)Number::stoi(hex, &parsed, 16); + }catch(std::invalid_argument&){ + SyntaxError("invalid hex char"); + } + if (parsed != 2) SyntaxError("invalid hex char"); + buff.push_back(code); + } break; + default: SyntaxError("invalid escape char"); + } + } else { + buff.push_back(c); + } + } + return Str(buff.data(), buff.size()); + } + + void Lexer::eat_string(char quote, StringType type) { + Str s = eat_string_until(quote, type == RAW_STRING); + if(type == F_STRING){ + add_token(TK("@fstr"), s); + }else{ + add_token(TK("@str"), s); + } + } + + void Lexer::eat_number() { + static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?(L)?"); + std::smatch m; + + const char* i = token_start; + while(*i != '\n' && *i != '\0') i++; + std::string s = std::string(token_start, i); + + bool ok = std::regex_search(s, m, pattern); + PK_ASSERT(ok); + // here is m.length()-1, since the first char was eaten by lex_token() + for(int j=0; j': { + if(matchchar('=')) add_token(TK(">=")); + else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>=")); + else add_token(TK(">")); + return true; + } + case '<': { + if(matchchar('=')) add_token(TK("<=")); + else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<=")); + else add_token(TK("<")); + return true; + } + case '-': { + if(matchchar('-')){ + add_token(TK("--")); + }else{ + if(matchchar('=')) add_token(TK("-=")); + else if(matchchar('>')) add_token(TK("->")); + else add_token(TK("-")); + } + return true; + } + case '!': + if(matchchar('=')) add_token(TK("!=")); + else SyntaxError("expected '=' after '!'"); + break; + case '*': + if (matchchar('*')) { + add_token(TK("**")); // '**' + } else { + add_token_2('=', TK("*"), TK("*=")); + } + return true; + case '/': + if(matchchar('/')) { + add_token_2('=', TK("//"), TK("//=")); + } else { + add_token_2('=', TK("/"), TK("/=")); + } + return true; + case ' ': case '\t': eat_spaces(); break; + case '\n': { + add_token(TK("@eol")); + if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level"); + return true; + } + default: { + if(c == 'f'){ + if(matchchar('\'')) {eat_string('\'', F_STRING); return true;} + if(matchchar('"')) {eat_string('"', F_STRING); return true;} + }else if(c == 'r'){ + if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;} + if(matchchar('"')) {eat_string('"', RAW_STRING); return true;} + } + if (c >= '0' && c <= '9') { + eat_number(); + return true; + } + switch (eat_name()) + { + case 0: break; + case 1: SyntaxError("invalid char: " + std::string(1, c)); break; + case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); break; + case 3: SyntaxError("@id contains invalid char"); break; + case 4: SyntaxError("invalid JSON token"); break; + default: FATAL_ERROR(); + } + return true; + } + } + } + + token_start = curr_char; + while(indents.size() > 1){ + indents.pop(); + add_token(TK("@dedent")); + return true; + } + add_token(TK("@eof")); + return false; + } + + void Lexer::throw_err(Str type, Str msg){ + int lineno = current_line; + const char* cursor = curr_char; + if(peekchar() == '\n'){ + lineno--; + cursor--; + } + throw_err(type, msg, lineno, cursor); + } + + void Lexer::throw_err(Str type, Str msg, int lineno, const char* cursor){ + auto e = Exception(type, msg); + e.st_push(src->snapshot(lineno, cursor)); + throw e; + } + + Lexer::Lexer(shared_ptr src) { + this->src = src; + this->token_start = src->source.c_str(); + this->curr_char = src->source.c_str(); + this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line, brackets_level}); + this->indents.push(0); + } + + std::vector Lexer::run() { + if(used) FATAL_ERROR(); + used = true; + while (lex_one_token()); + return std::move(nexts); + } + +} // namespace pkpy \ No newline at end of file diff --git a/src/linalg.cpp b/src/linalg.cpp index 600ec996..fd38caf7 100644 --- a/src/linalg.cpp +++ b/src/linalg.cpp @@ -1,4 +1,4 @@ -#include "pocketpy/pocketpy.h" +#include "pocketpy/linalg.h" namespace pkpy{ diff --git a/src2/lib.cpp b/src2/lib.cpp new file mode 100644 index 00000000..9f1e34a4 --- /dev/null +++ b/src2/lib.cpp @@ -0,0 +1 @@ +#include "pocketpy/pocketpy.h" \ No newline at end of file diff --git a/src/main.cpp b/src2/main.cpp similarity index 100% rename from src/main.cpp rename to src2/main.cpp