diff --git a/include/pocketpy/compiler.h b/include/pocketpy/compiler.h index c815beb8..afc2b8fb 100644 --- a/include/pocketpy/compiler.h +++ b/include/pocketpy/compiler.h @@ -85,6 +85,7 @@ class Compiler { void exprLiteral(); void exprLong(); + void exprBytes(); void exprFString(); void exprLambda(); void exprTuple(); diff --git a/include/pocketpy/expr.h b/include/pocketpy/expr.h index 3f7c1a83..1ec109dc 100644 --- a/include/pocketpy/expr.h +++ b/include/pocketpy/expr.h @@ -134,6 +134,13 @@ struct LongExpr: Expr{ void emit(CodeEmitContext* ctx) override; }; +struct BytesExpr: Expr{ + Str s; + BytesExpr(const Str& s): s(s) {} + std::string str() const override { return s.str(); } + void emit(CodeEmitContext* ctx) override; +}; + // @num, @str which needs to invoke OP_LOAD_CONST struct LiteralExpr: Expr{ TokenValue value; diff --git a/include/pocketpy/lexer.h b/include/pocketpy/lexer.h index 0f30588e..ce9b63e5 100644 --- a/include/pocketpy/lexer.h +++ b/include/pocketpy/lexer.h @@ -11,7 +11,7 @@ typedef uint8_t TokenIndex; constexpr const char* kTokens[] = { "is not", "not in", "yield from", "@eof", "@eol", "@sof", - "@id", "@num", "@str", "@fstr", "@long", + "@id", "@num", "@str", "@fstr", "@long", "@bytes", "@indent", "@dedent", /*****************************************/ "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed @@ -100,7 +100,7 @@ enum Precedence { PREC_PRIMARY, }; -enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; +enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES }; struct Lexer { shared_ptr src; diff --git a/include/pocketpy/opcodes.h b/include/pocketpy/opcodes.h index 20c8b28e..c04d7c96 100644 --- a/include/pocketpy/opcodes.h +++ b/include/pocketpy/opcodes.h @@ -39,6 +39,7 @@ OPCODE(DELETE_ATTR) OPCODE(DELETE_SUBSCR) /**************************/ OPCODE(BUILD_LONG) +OPCODE(BUILD_BYTES) OPCODE(BUILD_TUPLE) OPCODE(BUILD_LIST) OPCODE(BUILD_DICT) diff --git a/src/ceval.cpp b/src/ceval.cpp index 559aba2f..5657fb33 100644 --- a/src/ceval.cpp +++ b/src/ceval.cpp @@ -247,6 +247,12 @@ __NEXT_STEP:; if(_0 == nullptr) AttributeError(builtins, m_long); TOP() = call(_0, TOP()); } DISPATCH(); + TARGET(BUILD_BYTES) { + const Str& s = CAST(Str&, TOP()); + std::vector buffer(s.size); + memcpy(buffer.data(), s.data, s.size); + TOP() = VAR(Bytes(std::move(buffer))); + } DISPATCH(); TARGET(BUILD_TUPLE) _0 = VAR(STACK_VIEW(byte.arg).to_tuple()); STACK_SHRINK(byte.arg); diff --git a/src/compiler.cpp b/src/compiler.cpp index 323149cf..7dc30fcd 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -87,6 +87,7 @@ namespace pkpy{ rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; rules[TK("@long")] = { METHOD(exprLong), NO_INFIX }; + rules[TK("@bytes")] = { METHOD(exprBytes), NO_INFIX }; #undef METHOD #undef NO_INFIX } @@ -159,6 +160,10 @@ namespace pkpy{ ctx()->s_expr.push(make_expr(prev().str())); } + void Compiler::exprBytes(){ + ctx()->s_expr.push(make_expr(std::get(prev().value))); + } + void Compiler::exprFString(){ ctx()->s_expr.push(make_expr(std::get(prev().value))); } diff --git a/src/expr.cpp b/src/expr.cpp index 3b4a5f1e..665c066b 100644 --- a/src/expr.cpp +++ b/src/expr.cpp @@ -190,6 +190,12 @@ namespace pkpy{ ctx->emit(OP_BUILD_LONG, BC_NOARG, line); } + void BytesExpr::emit(CodeEmitContext* ctx) { + VM* vm = ctx->vm; + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(s)), line); + ctx->emit(OP_BUILD_BYTES, BC_NOARG, line); + } + std::string LiteralExpr::str() const{ if(std::holds_alternative(value)){ return std::to_string(std::get(value)); diff --git a/src/lexer.cpp b/src/lexer.cpp index ffbac892..1af3f7be 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -236,9 +236,13 @@ namespace pkpy{ Str s = eat_string_until(quote, type == RAW_STRING); if(type == F_STRING){ add_token(TK("@fstr"), s); - }else{ - add_token(TK("@str"), s); + return; } + if(type == NORMAL_BYTES){ + add_token(TK("@bytes"), s); + return; + } + add_token(TK("@str"), s); } void Lexer::eat_number() { @@ -385,6 +389,9 @@ namespace pkpy{ }else if(c == 'r'){ if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;} if(matchchar('"')) {eat_string('"', RAW_STRING); return true;} + }else if(c == 'b'){ + if(matchchar('\'')) {eat_string('\'', NORMAL_BYTES); return true;} + if(matchchar('"')) {eat_string('"', NORMAL_BYTES); return true;} } if (c >= '0' && c <= '9') { eat_number(); diff --git a/tests/10_bytes.py b/tests/10_bytes.py new file mode 100644 index 00000000..f982fa1e --- /dev/null +++ b/tests/10_bytes.py @@ -0,0 +1,5 @@ +a = '12345' +assert a.encode() == b'12345' + +assert b'\xff\xee' != b'1234' +assert b'\xff\xee' == b'\xff\xee'