blueloveTH 2025-05-19 14:03:18 +08:00
parent b0c1fe5b13
commit ae13cc6ea9
11 changed files with 278 additions and 81 deletions

View File

@ -164,6 +164,7 @@ __ERROR:
| Type Annotation | `def f(a:int, b:float=1)` | ✅ |
| Generator | `yield i` | ✅ |
| Decorator | `@cache` | ✅ |
| Match Case | `match code: case 200:` | ✅ |
## Performance

View File

@ -5,30 +5,30 @@ order: 100
---
The following table shows the basic features of pkpy with respect to [cpython](https://github.com/python/cpython).
The features marked with `YES` are supported, and the features marked with `NO` are not supported.
| Name | Example | Supported |
| --------------- | ------------------------------- | --------- |
| If Else | `if..else..elif` | YES |
| Loop | `for/while/break/continue` | YES |
| Function | `def f(x,*args,y=1):` | YES |
| Subclass | `class A(B):` | YES |
| List | `[1, 2, 'a']` | YES |
| ListComp | `[i for i in range(5)]` | YES |
| Slice | `a[1:2], a[:2], a[1:]` | YES |
| Tuple | `(1, 2, 'a')` | YES |
| Dict | `{'a': 1, 'b': 2}` | YES |
| F-String | `f'value is {x}'` | YES |
| Unpacking | `a, b = 1, 2` | YES |
| Star Unpacking | `a, *b = [1, 2, 3]` | YES |
| Exception | `raise/try..catch..finally` | YES |
| Dynamic Code | `eval()/exec()` | YES |
| Reflection | `hasattr()/getattr()/setattr()` | YES |
| Import | `import/from..import` | YES |
| Context Block | `with <expr> as <id>:` | YES |
| Type Annotation | `def f(a:int, b:float=1)` | YES |
| Generator | `yield i` | YES |
| Decorator | `@cache` | YES |
| If Else | `if..else..elif` | ✅ |
| Loop | `for/while/break/continue` | ✅ |
| Function | `def f(x,*args,y=1):` | ✅ |
| Subclass | `class A(B):` | ✅ |
| List | `[1, 2, 'a']` | ✅ |
| ListComp | `[i for i in range(5)]` | ✅ |
| Slice | `a[1:2], a[:2], a[1:]` | ✅ |
| Tuple | `(1, 2, 'a')` | ✅ |
| Dict | `{'a': 1, 'b': 2}` | ✅ |
| F-String | `f'value is {x}'` | ✅ |
| Unpacking | `a, b = 1, 2` | ✅ |
| Star Unpacking | `a, *b = [1, 2, 3]` | ✅ |
| Exception | `raise/try..catch..finally` | ✅ |
| Dynamic Code | `eval()/exec()` | ✅ |
| Reflection | `hasattr()/getattr()/setattr()` | ✅ |
| Import | `import/from..import` | ✅ |
| Context Block | `with <expr> as <id>:` | ✅ |
| Type Annotation | `def f(a:int, b:float=1)` | ✅ |
| Generator | `yield i` | ✅ |
| Decorator | `@cache` | ✅ |
| Match Case | `match code: case 200:` | ✅ |
## Supported magic methods

View File

@ -36,4 +36,4 @@ The easiest way to test a feature is to [try it on your browser](https://pocketp
5. In a starred unpacked assignment, e.g. `a, b, *c = x`, the starred variable can only be presented in the last position. `a, *b, c = x` is not supported.
6. A `Tab` is equivalent to 4 spaces. You can mix `Tab` and spaces in indentation, but it is not recommended.
7. A return, break, continue in try/except/with block will make the finally block not executed.
8. `match` is a keyword and `match..case` is equivalent to `if..elif..else`.

View File

@ -3,7 +3,7 @@ output: .retype
url: https://pocketpy.dev
branding:
title: pocketpy
label: v2.0.9
label: v2.1.0
logo: "./static/logo.png"
favicon: "./static/logo.png"
meta:

View File

@ -1,39 +1,118 @@
#pragma once
#include "pocketpy/common/str.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/objects/error.h"
#include <stdint.h>
extern const char* TokenSymbols[];
typedef enum TokenIndex{
TK_EOF, TK_EOL, TK_SOF,
TK_ID, TK_NUM, TK_STR, TK_FSTR_BEGIN, TK_FSTR_CPNT, TK_FSTR_SPEC, TK_FSTR_END, TK_BYTES, TK_IMAG,
TK_INDENT, TK_DEDENT,
typedef enum TokenIndex {
TK_EOF,
TK_EOL,
TK_SOF,
TK_ID,
TK_NUM,
TK_STR,
TK_FSTR_BEGIN,
TK_FSTR_CPNT,
TK_FSTR_SPEC,
TK_FSTR_END,
TK_BYTES,
TK_IMAG,
TK_INDENT,
TK_DEDENT,
/***************/
TK_IS_NOT, TK_NOT_IN, TK_YIELD_FROM,
TK_IS_NOT,
TK_NOT_IN,
TK_YIELD_FROM,
/***************/
TK_ADD, TK_IADD, TK_SUB, TK_ISUB,
TK_MUL, TK_IMUL, TK_DIV, TK_IDIV, TK_FLOORDIV, TK_IFLOORDIV, TK_MOD, TK_IMOD,
TK_AND, TK_IAND, TK_OR, TK_IOR, TK_XOR, TK_IXOR,
TK_LSHIFT, TK_ILSHIFT, TK_RSHIFT, TK_IRSHIFT,
TK_ADD,
TK_IADD,
TK_SUB,
TK_ISUB,
TK_MUL,
TK_IMUL,
TK_DIV,
TK_IDIV,
TK_FLOORDIV,
TK_IFLOORDIV,
TK_MOD,
TK_IMOD,
TK_AND,
TK_IAND,
TK_OR,
TK_IOR,
TK_XOR,
TK_IXOR,
TK_LSHIFT,
TK_ILSHIFT,
TK_RSHIFT,
TK_IRSHIFT,
/***************/
TK_LPAREN, TK_RPAREN, TK_LBRACKET, TK_RBRACKET, TK_LBRACE, TK_RBRACE,
TK_DOT, TK_DOTDOT, TK_DOTDOTDOT, TK_COMMA, TK_COLON, TK_SEMICOLON,
TK_POW, TK_ARROW, TK_HASH, TK_DECORATOR,
TK_GT, TK_LT, TK_ASSIGN, TK_EQ, TK_NE, TK_GE, TK_LE, TK_INVERT,
TK_LPAREN,
TK_RPAREN,
TK_LBRACKET,
TK_RBRACKET,
TK_LBRACE,
TK_RBRACE,
TK_DOT,
TK_DOTDOT,
TK_DOTDOTDOT,
TK_COMMA,
TK_COLON,
TK_SEMICOLON,
TK_POW,
TK_ARROW,
TK_HASH,
TK_DECORATOR,
TK_GT,
TK_LT,
TK_ASSIGN,
TK_EQ,
TK_NE,
TK_GE,
TK_LE,
TK_INVERT,
/***************/
TK_FALSE, TK_NONE, TK_TRUE, TK_AND_KW, TK_AS, TK_ASSERT, TK_BREAK, TK_CLASS, TK_CONTINUE,
TK_DEF, TK_DEL, TK_ELIF, TK_ELSE, TK_EXCEPT, TK_FINALLY, TK_FOR, TK_FROM, TK_GLOBAL,
TK_IF, TK_IMPORT, TK_IN, TK_IS, TK_LAMBDA, TK_NOT_KW, TK_OR_KW, TK_PASS, TK_RAISE, TK_RETURN,
TK_TRY, TK_WHILE, TK_WITH, TK_YIELD,
TK_FALSE,
TK_NONE,
TK_TRUE,
TK_AND_KW,
TK_AS,
TK_ASSERT,
TK_BREAK,
TK_CLASS,
TK_CONTINUE,
TK_DEF,
TK_DEL,
TK_ELIF,
TK_ELSE,
TK_EXCEPT,
TK_FINALLY,
TK_FOR,
TK_FROM,
TK_GLOBAL,
TK_IF,
TK_IMPORT,
TK_IN,
TK_IS,
TK_LAMBDA,
TK_MATCH,
TK_NOT_KW,
TK_OR_KW,
TK_PASS,
TK_RAISE,
TK_RETURN,
TK_TRY,
TK_WHILE,
TK_WITH,
TK_YIELD,
/***************/
TK__COUNT__
} TokenIndex;
enum TokenValueIndex{
enum TokenValueIndex {
TokenValue_EMPTY = 0,
TokenValue_I64 = 1,
TokenValue_F64 = 2,
@ -42,10 +121,11 @@ enum TokenValueIndex{
typedef struct TokenValue {
enum TokenValueIndex index; // 0: empty
union {
int64_t _i64; // 1
double _f64; // 2
c11_string* _str; // 3
int64_t _i64; // 1
double _f64; // 2
c11_string* _str; // 3
};
} TokenValue;
@ -69,7 +149,8 @@ enum Precedence {
/* https://docs.python.org/3/reference/expressions.html#comparisons
* Unlike C, all comparison operations in Python have the same priority,
* which is lower than that of any arithmetic, shifting or bitwise operation.
* Also unlike C, expressions like a < b < c have the interpretation that is conventional in mathematics.
* Also unlike C, expressions like a < b < c have the interpretation that is conventional in
* mathematics.
*/
PREC_COMPARISION, // < > <= >= != ==, in / is / is not / not in
PREC_BITWISE_OR, // |
@ -86,4 +167,5 @@ enum Precedence {
Error* Lexer__process(SourceData_ src, Token** out_tokens, int* out_length);
#define Token__sv(self) (c11_sv){(self)->start, (self)->length}
#define Token__sv(self) \
(c11_sv) { (self)->start, (self)->length }

View File

@ -1,10 +1,10 @@
#pragma once
// clang-format off
#define PK_VERSION "2.0.9"
#define PK_VERSION "2.1.0"
#define PK_VERSION_MAJOR 2
#define PK_VERSION_MINOR 0
#define PK_VERSION_PATCH 9
#define PK_VERSION_MINOR 1
#define PK_VERSION_PATCH 0
/*************** feature settings ***************/
// Whether to compile os-related modules or not

View File

@ -56,6 +56,7 @@ OPCODE(IS_OP)
OPCODE(CONTAINS_OP)
/**************************/
OPCODE(JUMP_FORWARD)
OPCODE(POP_JUMP_IF_NOT_MATCH)
OPCODE(POP_JUMP_IF_FALSE)
OPCODE(POP_JUMP_IF_TRUE)
OPCODE(JUMP_IF_TRUE_OR_POP)

View File

@ -1,9 +1,9 @@
#include "pocketpy/compiler/compiler.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/base.h"
#include "pocketpy/objects/codeobject.h"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/common/sstream.h"
#include <assert.h>
#include <stdbool.h>
@ -1377,6 +1377,15 @@ static bool is_expression(Compiler* self, bool allow_slice) {
#define match(expected) (curr()->type == expected ? (++self->i) : 0)
static bool match_id_by_str(Compiler* self, const char* name) {
if(curr()->type == TK_ID) {
bool ok = c11__sveq2(Token__sv(curr()), name);
if(ok) advance();
return ok;
}
return false;
}
static bool match_newlines_impl(Compiler* self) {
bool consumed = false;
if(curr()->type == TK_EOL) {
@ -1969,10 +1978,10 @@ static Error* consume_type_hints_sv(Compiler* self, c11_sv* out) {
static Error* compile_stmt(Compiler* self);
static Error* compile_block_body(Compiler* self, PrattCallback callback) {
static Error* compile_block_body(Compiler* self) {
Error* err;
assert(callback != NULL);
consume(TK_COLON);
if(curr()->type != TK_EOL && curr()->type != TK_EOF) {
while(true) {
check(compile_stmt(self));
@ -1988,7 +1997,7 @@ static Error* compile_block_body(Compiler* self, PrattCallback callback) {
consume(TK_INDENT);
while(curr()->type != TK_DEDENT) {
match_newlines();
check(callback(self));
check(compile_stmt(self));
match_newlines();
}
consume(TK_DEDENT);
@ -2000,7 +2009,7 @@ static Error* compile_if_stmt(Compiler* self) {
check(EXPR(self)); // condition
Ctx__s_emit_top(ctx());
int patch = Ctx__emit_(ctx(), OP_POP_JUMP_IF_FALSE, BC_NOARG, prev()->line);
err = compile_block_body(self, compile_stmt);
err = compile_block_body(self);
if(err) return err;
if(match(TK_ELIF)) {
int exit_patch = Ctx__emit_(ctx(), OP_JUMP_FORWARD, BC_NOARG, prev()->line);
@ -2010,7 +2019,7 @@ static Error* compile_if_stmt(Compiler* self) {
} else if(match(TK_ELSE)) {
int exit_patch = Ctx__emit_(ctx(), OP_JUMP_FORWARD, BC_NOARG, prev()->line);
Ctx__patch_jump(ctx(), patch);
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__patch_jump(ctx(), exit_patch);
} else {
Ctx__patch_jump(ctx(), patch);
@ -2018,6 +2027,56 @@ static Error* compile_if_stmt(Compiler* self) {
return NULL;
}
static Error* compile_match_case(Compiler* self, c11_vector* patches) {
Error* err;
bool is_case_default = false;
check(EXPR(self)); // condition
Ctx__s_emit_top(ctx());
consume(TK_COLON);
bool consumed = match_newlines();
if(!consumed) return SyntaxError(self, "expected a new line after ':'");
consume(TK_INDENT);
while(curr()->type != TK_DEDENT) {
match_newlines();
if(match_id_by_str(self, "case")) {
if(is_case_default) return SyntaxError(self, "case _: must be the last one");
is_case_default = match_id_by_str(self, "_");
if(!is_case_default) {
Ctx__emit_(ctx(), OP_DUP_TOP, BC_NOARG, prev()->line);
check(EXPR(self)); // expr
Ctx__s_emit_top(ctx());
int patch = Ctx__emit_(ctx(), OP_POP_JUMP_IF_NOT_MATCH, BC_NOARG, prev()->line);
check(compile_block_body(self));
int break_patch = Ctx__emit_(ctx(), OP_JUMP_FORWARD, BC_NOARG, prev()->line);
c11_vector__push(int, patches, break_patch);
Ctx__patch_jump(ctx(), patch);
} else {
check(compile_block_body(self));
int break_patch = Ctx__emit_(ctx(), OP_JUMP_FORWARD, BC_NOARG, prev()->line);
c11_vector__push(int, patches, break_patch);
}
}
match_newlines();
}
consume(TK_DEDENT);
if(patches->length == 0) return SyntaxError(self, "invalid syntax");
for(int i = 0; i < patches->length; i++) {
int patch = c11__getitem(int, patches, i);
Ctx__patch_jump(ctx(), patch);
}
Ctx__emit_(ctx(), OP_POP_TOP, BC_NOARG, prev()->line);
return NULL;
}
static Error* compile_while_loop(Compiler* self) {
Error* err;
int block = Ctx__enter_block(ctx(), CodeBlockType_WHILE_LOOP);
@ -2025,13 +2084,13 @@ static Error* compile_while_loop(Compiler* self) {
check(EXPR(self)); // condition
Ctx__s_emit_top(ctx());
int patch = Ctx__emit_(ctx(), OP_POP_JUMP_IF_FALSE, BC_NOARG, prev()->line);
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__emit_jump(ctx(), block_start, BC_KEEPLINE);
Ctx__patch_jump(ctx(), patch);
Ctx__exit_block(ctx());
// optional else clause
if(match(TK_ELSE)) {
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
CodeBlock* p_block = c11__at(CodeBlock, &ctx()->co->blocks, block);
p_block->end2 = ctx()->co->codes.length;
}
@ -2054,12 +2113,12 @@ static Error* compile_for_loop(Compiler* self) {
// this error occurs in `vars` instead of this line, but...nevermind
return SyntaxError(self, "invalid syntax");
}
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__emit_jump(ctx(), block_start, BC_KEEPLINE);
Ctx__exit_block(ctx());
// optional else clause
if(match(TK_ELSE)) {
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
CodeBlock* p_block = c11__at(CodeBlock, &ctx()->co->blocks, block);
p_block->end2 = ctx()->co->codes.length;
}
@ -2289,7 +2348,7 @@ static Error* compile_function(Compiler* self, int decorators) {
consume(TK_RPAREN);
}
if(match(TK_ARROW)) check(consume_type_hints(self));
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
check(pop_context(self));
if(decl->code.codes.length >= 2) {
@ -2354,7 +2413,7 @@ static Error* compile_class(Compiler* self, int decorators) {
if(it->is_compiling_class) return SyntaxError(self, "nested class is not allowed");
}
ctx()->is_compiling_class = true;
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
ctx()->is_compiling_class = false;
Ctx__s_emit_decorators(ctx(), decorators);
@ -2489,7 +2548,7 @@ static Error* compile_try_except(Compiler* self) {
Ctx__enter_block(ctx(), CodeBlockType_TRY);
Ctx__emit_(ctx(), OP_TRY_ENTER, BC_NOARG, prev()->line);
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
// https://docs.python.org/3/reference/compound_stmts.html#finally-clause
/* If finally is present, it specifies a cleanup handler. The try clause is executed,
@ -2515,7 +2574,7 @@ static Error* compile_try_except(Compiler* self) {
Ctx__emit_(ctx(), OP_BEGIN_FINALLY, BC_NOARG, prev()->line);
// finally only, no except block
Ctx__enter_block(ctx(), CodeBlockType_FINALLY);
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__exit_block(ctx());
Ctx__emit_(ctx(), OP_END_FINALLY, BC_NOARG, BC_KEEPLINE);
// re-raise if needed
@ -2551,7 +2610,7 @@ static Error* compile_try_except(Compiler* self) {
Ctx__emit_store_name(ctx(), name_scope(self), as_name, BC_KEEPLINE);
}
Ctx__enter_block(ctx(), CodeBlockType_EXCEPT);
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__exit_block(ctx());
Ctx__emit_(ctx(), OP_END_EXC_HANDLING, BC_NOARG, BC_KEEPLINE);
patches[patches_length++] = Ctx__emit_(ctx(), OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE);
@ -2568,7 +2627,7 @@ static Error* compile_try_except(Compiler* self) {
if(match(TK_FINALLY)) {
Ctx__emit_(ctx(), OP_BEGIN_FINALLY, BC_NOARG, prev()->line);
Ctx__enter_block(ctx(), CodeBlockType_FINALLY);
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__exit_block(ctx());
Ctx__emit_(ctx(), OP_END_FINALLY, BC_NOARG, BC_KEEPLINE);
}
@ -2629,6 +2688,13 @@ static Error* compile_stmt(Compiler* self) {
break;
/*************************************************/
case TK_IF: check(compile_if_stmt(self)); break;
case TK_MATCH: {
c11_vector patches;
c11_vector__ctor(&patches, sizeof(int));
check(compile_match_case(self, &patches));
c11_vector__dtor(&patches);
break;
}
case TK_WHILE: check(compile_while_loop(self)); break;
case TK_FOR: check(compile_for_loop(self)); break;
case TK_IMPORT: check(compile_normal_import(self)); break;
@ -2701,7 +2767,7 @@ static Error* compile_stmt(Compiler* self) {
// discard `__enter__()`'s return value
Ctx__emit_(ctx(), OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
}
check(compile_block_body(self, compile_stmt));
check(compile_block_body(self));
Ctx__emit_(ctx(), OP_WITH_EXIT, BC_NOARG, prev()->line);
Ctx__exit_block(ctx());
} break;

View File

@ -1,4 +1,3 @@
#include "pocketpy/common/smallmap.h"
#include "pocketpy/common/sstream.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/compiler/lexer.h"
@ -280,20 +279,20 @@ static Error* _eat_string(Lexer* self, c11_sbuf* buff, char quote, enum StringTy
case 'x': {
char hex[3] = {eatchar(self), eatchar(self), '\0'};
int code;
if (sscanf(hex, "%x", &code) != 1 || code > 0xFF) {
if(sscanf(hex, "%x", &code) != 1 || code > 0xFF) {
return LexerError(self, "invalid hex escape");
}
if (type == NORMAL_BYTES) {
if(type == NORMAL_BYTES) {
// Bytes literals: write raw byte
c11_sbuf__write_char(buff, (char)code);
} else {
// Regular strings: encode as UTF-8
if (code <= 0x7F) {
if(code <= 0x7F) {
c11_sbuf__write_char(buff, (char)code);
} else {
// Encode as 2-byte UTF-8 for code points 0x80-0xFF
c11_sbuf__write_char(buff, 0xC0 | (code >> 6)); // Leading byte
c11_sbuf__write_char(buff, 0x80 | (code & 0x3F)); // Continuation byte
c11_sbuf__write_char(buff, 0xC0 | (code >> 6)); // Leading byte
c11_sbuf__write_char(buff, 0x80 | (code & 0x3F)); // Continuation byte
}
}
} break;
@ -712,6 +711,7 @@ const char* TokenSymbols[] = {
"in",
"is",
"lambda",
"match",
"not",
"or",
"pass",

View File

@ -619,6 +619,13 @@ FrameResult VM__run_top_frame(VM* self) {
}
/*****************************************/
case OP_JUMP_FORWARD: DISPATCH_JUMP((int16_t)byte.arg);
case OP_POP_JUMP_IF_NOT_MATCH: {
int res = py_equal(SECOND(), TOP());
if(res < 0) goto __ERROR;
STACK_SHRINK(2);
if(!res) DISPATCH_JUMP((int16_t)byte.arg);
DISPATCH();
}
case OP_POP_JUMP_IF_FALSE: {
int res = py_bool(TOP());
if(res < 0) goto __ERROR;

View File

@ -124,11 +124,51 @@ else:
x = 3
assert x == 2
# t = 0
# for i in range(5):
# try:
# break
# except:
# pass
# t = 1
# assert t == 0
# match case
case, _ = 1, 2
assert case == 1 and _ == 2
match (404 * 1):
case 200:
assert False
case 404:
assert True
case _: assert False
match (555 * 1):
case 200:
assert False
case 404: assert False
case _:
assert True
match (555 * 1):
case 200:
assert False
case 404:
assert False
# no default case
def f(case):
match case:
case 200:
return True
case 404:
return False
case _:
return False
assert f(200) == True
# extras
t = 0
for i in range(5):
try:
break
except:
pass
t = 1
assert t == 0