diff --git a/amalgamate.py b/amalgamate.py index c0021b62..2dc4eedc 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -9,7 +9,7 @@ pipeline = [ ["config.h", "export.h", "common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], ["obj.h", "dict.h", "codeobject.h", "frame.h"], ["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"], - ["_generated.h", "cffi.h", "bindings.h", "iter.h", "base64.h", "random.h", "re.h", "linalg.h", "easing.h", "io.h"], + ["_generated.h", "cffi.h", "bindings.h", "iter.h", "base64.h", "random.h", "yaml.h", "re.h", "linalg.h", "easing.h", "io.h"], ["pocketpy.h", "pocketpy_c.h"] ] diff --git a/docs/modules/yaml.md b/docs/modules/yaml.md new file mode 100644 index 00000000..46312e8f --- /dev/null +++ b/docs/modules/yaml.md @@ -0,0 +1,12 @@ +--- +icon: package +label: yaml +--- + +### `yaml.loads(s) -> dict` + +Decode a YAML string into a python object. + +!!! +Only support a subset of YAML. The YAML to be parsed must have a equivalent JSON representation. +!!! \ No newline at end of file diff --git a/include/pocketpy/compiler.h b/include/pocketpy/compiler.h index afc2b8fb..f9d92bfb 100644 --- a/include/pocketpy/compiler.h +++ b/include/pocketpy/compiler.h @@ -16,25 +16,12 @@ struct PrattRule{ Precedence precedence; }; -class Compiler { +class Compiler: public CompilerBase { inline static PrattRule rules[kTokenCount]; - std::unique_ptr lexer; stack contexts; VM* vm; bool unknown_global_scope; // for eval/exec() call bool used; - // for parsing token stream - int i = 0; - std::vector tokens; - - const Token& prev() const{ return tokens.at(i-1); } - const Token& curr() const{ return tokens.at(i); } - const Token& next() const{ return tokens.at(i+1); } - const Token& err() const{ - if(i >= tokens.size()) return prev(); - return curr(); - } - void advance(int delta=1) { i += delta; } CodeEmitContext* ctx() { return &contexts.top(); } CompileMode mode() const{ return lexer->src->mode; } @@ -45,10 +32,7 @@ class Compiler { static void init_pratt_rules(); - bool match(TokenIndex expected); - void consume(TokenIndex expected); bool match_newlines_repl(); - bool match_newlines(bool repl_throw=false); bool match_end_stmt(); void consume_end_stmt(); @@ -125,17 +109,9 @@ class Compiler { PyObject* to_object(const TokenValue& value); PyObject* read_literal(); - - void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); } - void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); } - void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); } - public: Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false); CodeObject_ compile(); - - Compiler(const Compiler&) = delete; - Compiler& operator=(const Compiler&) = delete; }; } // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/lexer.h b/include/pocketpy/lexer.h index d35d44e8..46b3f568 100644 --- a/include/pocketpy/lexer.h +++ b/include/pocketpy/lexer.h @@ -141,4 +141,44 @@ struct Lexer { std::vector run(); }; +class CompilerBase{ +public: + std::unique_ptr lexer; + + // for parsing token stream + int i = 0; + std::vector tokens; + + const Token& prev() const{ return tokens.at(i-1); } + const Token& curr() const{ return tokens.at(i); } + const Token& next() const{ return tokens.at(i+1); } + const Token& err() const{ + if(i >= tokens.size()) return prev(); + return curr(); + } + void advance(int delta=1) { i += delta; } + + bool match(TokenIndex expected) { + if (curr().type != expected) return false; + advance(); + return true; + } + + void consume(TokenIndex expected) { + if (!match(expected)){ + SyntaxError( + fmt("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'") + ); + } + } + + void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); } + void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); } + void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); } + + CompilerBase(const CompilerBase&) = delete; + CompilerBase& operator=(const CompilerBase&) = delete; + CompilerBase() = default; +}; + } // namespace pkpy diff --git a/include/pocketpy/pocketpy.h b/include/pocketpy/pocketpy.h index c991e30e..75c4fba6 100644 --- a/include/pocketpy/pocketpy.h +++ b/include/pocketpy/pocketpy.h @@ -12,6 +12,7 @@ #include "_generated.h" #include "vm.h" #include "re.h" +#include "yaml.h" #include "random.h" #include "bindings.h" diff --git a/include/pocketpy/yaml.h b/include/pocketpy/yaml.h new file mode 100644 index 00000000..0ca69148 --- /dev/null +++ b/include/pocketpy/yaml.h @@ -0,0 +1,10 @@ +#pragma once + +#include "cffi.h" +#include "lexer.h" + +namespace pkpy{ + +void add_module_yaml(VM* vm); + +} // namespace pkpy \ No newline at end of file diff --git a/src/compiler.cpp b/src/compiler.cpp index ce4934c0..9be833a2 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -92,20 +92,6 @@ namespace pkpy{ #undef NO_INFIX } - bool Compiler::match(TokenIndex expected) { - if (curr().type != expected) return false; - advance(); - return true; - } - - void Compiler::consume(TokenIndex expected) { - if (!match(expected)){ - SyntaxError( - fmt("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'") - ); - } - } - bool Compiler::match_newlines_repl(){ return match_newlines(mode()==REPL_MODE); } diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 91214b5f..39678252 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1489,6 +1489,7 @@ void VM::post_init(){ add_module_random(this); add_module_base64(this); add_module_timeit(this); + add_module_yaml(this); for(const char* name: {"this", "functools", "collections", "heapq", "bisect", "pickle", "_long"}){ _lazy_modules[name] = kPythonLibs[name]; diff --git a/src/yaml.cpp b/src/yaml.cpp new file mode 100644 index 00000000..a6ab83a3 --- /dev/null +++ b/src/yaml.cpp @@ -0,0 +1,89 @@ +#include "pocketpy/yaml.h" + +namespace pkpy{ + +class YAMLCompiler: public CompilerBase{ +public: + VM* vm; + YAMLCompiler(VM* vm, const Str& s): vm(vm) { + auto src = std::make_shared(s, "", JSON_MODE); + this->lexer = std::make_unique(src); + } + + PyObject* EXPR(bool* valid_key){ + // BASIC: True False None @str @num + // CONTAINER: [] {} + *valid_key = false; + switch(curr().type){ + case TK("True"): advance(); return vm->True; break; + case TK("False"): advance(); return vm->False; break; + case TK("None"): advance(); return vm->None; break; + case TK("@num"):{ + advance(); + TokenValue value = prev().value; + if(std::holds_alternative(value)){ + return VAR(std::get(value)); + }else if(std::holds_alternative(value)){ + return VAR(std::get(value)); + } + FATAL_ERROR(); + } + case TK("@str"): + advance(); + *valid_key = true; + return VAR(std::get(prev().value)); + case TK("@id"): + *valid_key = true; + advance(); + return VAR(prev().sv()); + case TK("["): case TK("{"): { + // parse the whole line as json + return NULL; + } + default: SyntaxError(); + return NULL; + } + } + + PyObject* compile_block(){ + consume(TK(":")); + if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ + bool _; + return EXPR(&_); // inline block + } + + PyObject* block = VAR(Dict(vm)); + Dict& d = PK_OBJ_GET(Dict, block); + + consume(TK("@indent")); + while (curr().type != TK("@dedent")) { + bool valid_key; + PyObject* key = EXPR(&valid_key); + if(!valid_key) SyntaxError(); + } + consume(TK("@dedent")); + } + + Dict compile(){ + tokens = lexer->run(); + Dict d(vm); + advance(); // skip @sof, so prev() is always valid + + while (!match(TK("@eof"))) { + + } + return d; + } +}; + +void add_module_yaml(VM* vm){ + PyObject* mod = vm->new_module("yaml"); + + vm->bind(mod, "loads(s: str) -> dict", [](VM* vm, ArgsView args){ + const Str& s = CAST(Str&, args[0]); + YAMLCompiler compiler(vm, s); + return VAR(compiler.compile()); + }); +} + +} // namespace pkpy \ No newline at end of file