mirror of
https://github.com/pocketpy/pocketpy
synced 2026-02-04 14:40:16 +00:00
412 lines
14 KiB
C
412 lines
14 KiB
C
#include "pocketpy/objects/codeobject.h"
|
|
#include "pocketpy/common/serialize.h"
|
|
#include "pocketpy/common/utils.h"
|
|
|
|
// Magic number for CodeObject serialization: "CO" = 0x434F
|
|
#define CODEOBJECT_MAGIC 0x434F
|
|
#define CODEOBJECT_VER_MAJOR 1
|
|
#define CODEOBJECT_VER_MINOR 0
|
|
#define CODEOBJECT_VER_MINOR_MIN 0
|
|
|
|
// Forward declarations
|
|
static void FuncDecl__serialize(c11_serializer* s,
|
|
const FuncDecl* decl,
|
|
const struct SourceData* parent_src);
|
|
static FuncDecl_ FuncDecl__deserialize(c11_deserializer* d, SourceData_ embedded_src);
|
|
static void CodeObject__serialize(c11_serializer* s,
|
|
const CodeObject* co,
|
|
const struct SourceData* parent_src);
|
|
static CodeObject CodeObject__deserialize(c11_deserializer* d, const char* filename, SourceData_ embedded_src);
|
|
|
|
// Serialize a py_TValue constant
|
|
static void TValue__serialize(c11_serializer* s, py_Ref val) {
|
|
c11_serializer__write_type(s, val->type);
|
|
// 1. co_consts: int | float | str
|
|
// 2. function defaults: see `read_literal()` in compiler.c
|
|
switch(val->type) {
|
|
case tp_int: c11_serializer__write_i64(s, val->_i64); break;
|
|
case tp_float: c11_serializer__write_f64(s, val->_f64); break;
|
|
case tp_str: {
|
|
c11_sv sv = py_tosv((py_Ref)val);
|
|
c11_serializer__write_i32(s, sv.size);
|
|
c11_serializer__write_bytes(s, sv.data, sv.size);
|
|
break;
|
|
}
|
|
case tp_bool: {
|
|
bool value = py_tobool(val);
|
|
c11_serializer__write_i8(s, value ? 1 : 0);
|
|
break;
|
|
}
|
|
case tp_NoneType: break;
|
|
case tp_ellipsis: break;
|
|
case tp_tuple: {
|
|
int len = py_tuple_len(val);
|
|
c11_serializer__write_i32(s, len);
|
|
for(int i = 0; i < len; i++) {
|
|
py_Ref item = py_tuple_getitem(val, i);
|
|
TValue__serialize(s, item);
|
|
}
|
|
break;
|
|
}
|
|
default: c11__abort("TValue__serialize: invalid type '%s'", py_tpname(val->type));
|
|
}
|
|
}
|
|
|
|
// Deserialize a py_TValue constant
|
|
static void TValue__deserialize(c11_deserializer* d, py_OutRef out) {
|
|
py_Type type = c11_deserializer__read_type(d);
|
|
switch(type) {
|
|
case tp_int: {
|
|
py_i64 v = c11_deserializer__read_i64(d);
|
|
py_newint(out, v);
|
|
break;
|
|
}
|
|
case tp_float: {
|
|
py_f64 v = c11_deserializer__read_f64(d);
|
|
py_newfloat(out, v);
|
|
break;
|
|
}
|
|
case tp_str: {
|
|
int size = c11_deserializer__read_i32(d);
|
|
char* dst = py_newstrn(out, size);
|
|
char* src = c11_deserializer__read_bytes(d, size);
|
|
memcpy(dst, src, size);
|
|
break;
|
|
}
|
|
case tp_bool: {
|
|
bool v = c11_deserializer__read_i8(d) != 0;
|
|
py_newbool(out, v);
|
|
break;
|
|
}
|
|
case tp_NoneType: {
|
|
py_newnone(out);
|
|
break;
|
|
}
|
|
case tp_ellipsis: {
|
|
py_newellipsis(out);
|
|
break;
|
|
}
|
|
case tp_tuple: {
|
|
int len = c11_deserializer__read_i32(d);
|
|
py_newtuple(out, len);
|
|
for(int i = 0; i < len; i++) {
|
|
py_ItemRef item = py_tuple_getitem(out, i);
|
|
TValue__deserialize(d, item);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
c11__abort("TValue__deserialize: invalid type '%s'", py_tpname(type));
|
|
}
|
|
}
|
|
|
|
// Serialize CodeObject
|
|
static void CodeObject__serialize(c11_serializer* s,
|
|
const CodeObject* co,
|
|
const struct SourceData* parent_src) {
|
|
// SourceData
|
|
if(parent_src) {
|
|
c11__rtassert(co->src == parent_src);
|
|
}
|
|
|
|
// name
|
|
c11_serializer__write_cstr(s, co->name->data);
|
|
|
|
// codes
|
|
_Static_assert(sizeof(Bytecode) == sizeof(uint16_t) * 2, "");
|
|
c11_serializer__write_i32(s, co->codes.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
c11_serializer__write_bytes(s, co->codes.data, co->codes.length * sizeof(Bytecode));
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// codes_ex
|
|
_Static_assert(sizeof(BytecodeEx) == sizeof(int32_t) * 2, "");
|
|
c11_serializer__write_i32(s, co->codes_ex.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
c11_serializer__write_bytes(s, co->codes_ex.data, co->codes_ex.length * sizeof(BytecodeEx));
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// consts
|
|
c11_serializer__write_i32(s, co->consts.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
for(int i = 0; i < co->consts.length; i++) {
|
|
py_Ref val = c11__at(py_TValue, &co->consts, i);
|
|
TValue__serialize(s, val);
|
|
}
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// varnames (as cstr via py_name2str)
|
|
c11_serializer__write_i32(s, co->varnames.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
for(int i = 0; i < co->varnames.length; i++) {
|
|
py_Name name = c11__getitem(py_Name, &co->varnames, i);
|
|
c11_serializer__write_cstr(s, py_name2str(name));
|
|
}
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// names (as cstr via py_name2str)
|
|
c11_serializer__write_i32(s, co->names.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
for(int i = 0; i < co->names.length; i++) {
|
|
py_Name name = c11__getitem(py_Name, &co->names, i);
|
|
c11_serializer__write_cstr(s, py_name2str(name));
|
|
}
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// nlocals
|
|
c11_serializer__write_i32(s, co->nlocals);
|
|
|
|
// blocks
|
|
_Static_assert(sizeof(CodeBlock) == sizeof(int32_t) * 5, "");
|
|
c11_serializer__write_i32(s, co->blocks.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
c11_serializer__write_bytes(s, co->blocks.data, co->blocks.length * sizeof(CodeBlock));
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// func_decls
|
|
c11_serializer__write_i32(s, co->func_decls.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
for(int i = 0; i < co->func_decls.length; i++) {
|
|
const FuncDecl* decl = c11__getitem(FuncDecl_, &co->func_decls, i);
|
|
FuncDecl__serialize(s, decl, co->src);
|
|
c11_serializer__write_mark(s, '|');
|
|
}
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// start_line, end_line
|
|
c11_serializer__write_i32(s, co->start_line);
|
|
c11_serializer__write_i32(s, co->end_line);
|
|
}
|
|
|
|
// Deserialize CodeObject (initialize co before calling)
|
|
static CodeObject CodeObject__deserialize(c11_deserializer* d, const char* filename, SourceData_ embedded_src) {
|
|
CodeObject co;
|
|
|
|
// SourceData
|
|
SourceData_ src;
|
|
if(embedded_src != NULL) {
|
|
c11__rtassert(filename == NULL);
|
|
src = embedded_src;
|
|
PK_INCREF(src);
|
|
} else {
|
|
c11__rtassert(filename != NULL);
|
|
src = SourceData__rcnew(NULL, filename, EXEC_MODE, false);
|
|
}
|
|
|
|
// name
|
|
const char* name = c11_deserializer__read_cstr(d);
|
|
c11_sv name_sv = {name, strlen(name)};
|
|
|
|
// Initialize the CodeObject
|
|
CodeObject__ctor(&co, src, name_sv);
|
|
PK_DECREF(src); // CodeObject__ctor increments ref count
|
|
// Clear the default root block that CodeObject__ctor adds
|
|
c11_vector__clear(&co.blocks);
|
|
|
|
// codes
|
|
int codes_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
c11_vector__extend(&co.codes,
|
|
c11_deserializer__read_bytes(d, codes_len * sizeof(Bytecode)),
|
|
codes_len);
|
|
c11_deserializer__consume_mark(d, ']');
|
|
// codes_ex
|
|
int codes_ex_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
c11_vector__extend(&co.codes_ex,
|
|
c11_deserializer__read_bytes(d, codes_ex_len * sizeof(BytecodeEx)),
|
|
codes_ex_len);
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// consts
|
|
int consts_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
for(int i = 0; i < consts_len; i++) {
|
|
py_Ref p_val = c11_vector__emplace(&co.consts);
|
|
TValue__deserialize(d, p_val);
|
|
}
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// varnames
|
|
int varnames_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
for(int i = 0; i < varnames_len; i++) {
|
|
const char* s = c11_deserializer__read_cstr(d);
|
|
py_Name n = py_name(s);
|
|
c11_vector__push(py_Name, &co.varnames, n);
|
|
c11_smallmap_n2d__set(&co.varnames_inv, n, i);
|
|
}
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// names
|
|
int names_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
for(int i = 0; i < names_len; i++) {
|
|
const char* s = c11_deserializer__read_cstr(d);
|
|
py_Name n = py_name(s);
|
|
c11_vector__push(py_Name, &co.names, n);
|
|
c11_smallmap_n2d__set(&co.names_inv, n, i);
|
|
}
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// nlocals
|
|
co.nlocals = c11_deserializer__read_i32(d);
|
|
|
|
// blocks
|
|
int blocks_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
c11_vector__extend(&co.blocks,
|
|
c11_deserializer__read_bytes(d, blocks_len * sizeof(CodeBlock)),
|
|
blocks_len);
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// func_decls
|
|
int func_decls_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
for(int i = 0; i < func_decls_len; i++) {
|
|
FuncDecl_ decl = FuncDecl__deserialize(d, src);
|
|
c11_vector__push(FuncDecl_, &co.func_decls, decl);
|
|
c11_deserializer__consume_mark(d, '|');
|
|
}
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// start_line, end_line
|
|
co.start_line = c11_deserializer__read_i32(d);
|
|
co.end_line = c11_deserializer__read_i32(d);
|
|
|
|
return co;
|
|
}
|
|
|
|
// Serialize FuncDecl
|
|
static void FuncDecl__serialize(c11_serializer* s,
|
|
const FuncDecl* decl,
|
|
const struct SourceData* parent_src) {
|
|
// CodeObject (embedded)
|
|
c11_serializer__write_mark(s, '{');
|
|
CodeObject__serialize(s, &decl->code, parent_src);
|
|
c11_serializer__write_mark(s, '}');
|
|
|
|
// args
|
|
c11_serializer__write_i32(s, decl->args.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
c11_serializer__write_bytes(s, decl->args.data, decl->args.length * sizeof(int32_t));
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// kwargs
|
|
c11_serializer__write_i32(s, decl->kwargs.length);
|
|
c11_serializer__write_mark(s, '[');
|
|
for(int i = 0; i < decl->kwargs.length; i++) {
|
|
FuncDeclKwArg* kw = c11__at(FuncDeclKwArg, &decl->kwargs, i);
|
|
c11_serializer__write_i32(s, kw->index);
|
|
c11_serializer__write_cstr(s, py_name2str(kw->key));
|
|
TValue__serialize(s, &kw->value);
|
|
}
|
|
c11_serializer__write_mark(s, ']');
|
|
|
|
// starred_arg, starred_kwarg
|
|
c11_serializer__write_i32(s, decl->starred_arg);
|
|
c11_serializer__write_i32(s, decl->starred_kwarg);
|
|
|
|
// nested
|
|
c11_serializer__write_i8(s, decl->nested ? 1 : 0);
|
|
|
|
// docstring
|
|
int has_docstring = decl->docstring != NULL ? 1 : 0;
|
|
c11_serializer__write_i8(s, has_docstring);
|
|
if(has_docstring) c11_serializer__write_cstr(s, decl->docstring);
|
|
|
|
// type
|
|
c11_serializer__write_i8(s, (int8_t)decl->type);
|
|
}
|
|
|
|
// Deserialize FuncDecl
|
|
static FuncDecl_ FuncDecl__deserialize(c11_deserializer* d, SourceData_ embedded_src) {
|
|
FuncDecl_ self = PK_MALLOC(sizeof(FuncDecl));
|
|
self->rc.count = 1;
|
|
self->rc.dtor = (void (*)(void*))FuncDecl__dtor;
|
|
|
|
c11_vector__ctor(&self->args, sizeof(int32_t));
|
|
c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg));
|
|
c11_smallmap_n2d__ctor(&self->kw_to_index);
|
|
|
|
// CodeObject (embedded)
|
|
c11_deserializer__consume_mark(d, '{');
|
|
self->code = CodeObject__deserialize(d, NULL, embedded_src);
|
|
c11_deserializer__consume_mark(d, '}');
|
|
|
|
// args
|
|
int args_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
c11_vector__extend(&self->args,
|
|
c11_deserializer__read_bytes(d, args_len * sizeof(int32_t)),
|
|
args_len);
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// kwargs
|
|
int kwargs_len = c11_deserializer__read_i32(d);
|
|
c11_deserializer__consume_mark(d, '[');
|
|
for(int i = 0; i < kwargs_len; i++) {
|
|
FuncDeclKwArg* kw = c11_vector__emplace(&self->kwargs);
|
|
kw->index = c11_deserializer__read_i32(d);
|
|
const char* key_str = c11_deserializer__read_cstr(d);
|
|
kw->key = py_name(key_str);
|
|
TValue__deserialize(d, &kw->value);
|
|
c11_smallmap_n2d__set(&self->kw_to_index, kw->key, kw->index);
|
|
}
|
|
c11_deserializer__consume_mark(d, ']');
|
|
|
|
// starred_arg
|
|
self->starred_arg = c11_deserializer__read_i32(d);
|
|
// starred_kwarg
|
|
self->starred_kwarg = c11_deserializer__read_i32(d);
|
|
|
|
// nested
|
|
self->nested = c11_deserializer__read_i8(d) != 0;
|
|
|
|
// docstring
|
|
int has_docstring = c11_deserializer__read_i8(d);
|
|
if(has_docstring) {
|
|
const char* docstring = c11_deserializer__read_cstr(d);
|
|
self->docstring = c11_strdup(docstring);
|
|
} else {
|
|
self->docstring = NULL;
|
|
}
|
|
|
|
// type
|
|
self->type = (FuncType)c11_deserializer__read_i8(d);
|
|
return self;
|
|
}
|
|
|
|
// Public API: Serialize CodeObject to bytes
|
|
void* CodeObject__dumps(const CodeObject* co, int* size) {
|
|
c11_serializer s;
|
|
c11_serializer__ctor(&s, CODEOBJECT_MAGIC, CODEOBJECT_VER_MAJOR, CODEOBJECT_VER_MINOR);
|
|
CodeObject__serialize(&s, co, NULL);
|
|
return c11_serializer__submit(&s, size);
|
|
}
|
|
|
|
// Public API: Deserialize CodeObject from bytes
|
|
// Returns error message or NULL on success
|
|
char* CodeObject__loads(const void* data, int size, const char* filename, CodeObject* out) {
|
|
c11_deserializer d;
|
|
c11_deserializer__ctor(&d, data, size);
|
|
|
|
if(!c11_deserializer__check_header(&d,
|
|
CODEOBJECT_MAGIC,
|
|
CODEOBJECT_VER_MAJOR,
|
|
CODEOBJECT_VER_MINOR_MIN)) {
|
|
char* error_msg = c11_strdup(d.error_msg);
|
|
c11_deserializer__dtor(&d);
|
|
return error_msg;
|
|
}
|
|
|
|
*out = CodeObject__deserialize(&d, filename, NULL);
|
|
c11_deserializer__dtor(&d);
|
|
return NULL;
|
|
}
|
|
|
|
#undef CODEOBJECT_MAGIC
|
|
#undef CODEOBJECT_VER_MAJOR
|
|
#undef CODEOBJECT_VER_MINOR
|
|
#undef CODEOBJECT_VER_MINOR_MIN
|