update gc

This commit is contained in:
blueloveTH 2023-03-29 13:16:45 +08:00
parent 0b2d54f88d
commit 9634e5c402
13 changed files with 205 additions and 98 deletions

View File

@ -89,6 +89,10 @@ struct CodeObject {
return consts.size() - 1;
}
void _mark() const {
for(PyObject* v : consts) OBJ_MARK(v);
}
/************************************************/
int _curr_block_i = 0;
int _rvalue = 0;

View File

@ -9,7 +9,6 @@
#include <sstream>
#include <regex>
#include <stack>
#include <cmath>
#include <cstdlib>
#include <stdexcept>
@ -18,7 +17,6 @@
#include <cstring>
#include <chrono>
#include <string_view>
#include <queue>
#include <iomanip>
#include <memory>
#include <functional>
@ -28,6 +26,7 @@
#include <algorithm>
#include <random>
#include <initializer_list>
#include <list>
#define PK_VERSION "0.9.5"
#define PK_EXTRA_CHECK 0
@ -101,4 +100,35 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
return is_int(a) && is_int(b);
}
template <typename T>
class queue{
std::list<T> list;
public:
void push(const T& t){ list.push_back(t); }
void push(T&& t){ list.push_back(std::move(t)); }
void pop(){ list.pop_front(); }
void clear(){ list.clear(); }
bool empty() const { return list.empty(); }
size_t size() const { return list.size(); }
T& front(){ return list.front(); }
const T& front() const { return list.front(); }
const std::list<T>& data() const { return list; }
};
template <typename T>
class stack{
std::vector<T> vec;
public:
void push(const T& t){ vec.push_back(t); }
void push(T&& t){ vec.push_back(std::move(t)); }
void pop(){ vec.pop_back(); }
void clear(){ vec.clear(); }
bool empty() const { return vec.empty(); }
size_t size() const { return vec.size(); }
T& top(){ return vec.back(); }
const T& top() const { return vec.back(); }
const std::vector<T>& data() const { return vec; }
};
} // namespace pkpy

View File

@ -21,7 +21,7 @@ enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
class Compiler {
std::unique_ptr<Parser> parser;
std::stack<CodeObject_> codes;
stack<CodeObject_> codes;
int lexing_count = 0;
bool used = false;
VM* vm;

View File

@ -72,7 +72,7 @@ struct SourceData {
class Exception {
StrName type;
Str msg;
std::stack<Str> stacktrace;
stack<Str> stacktrace;
public:
Exception(StrName type, Str msg): type(type), msg(msg) {}
bool match_type(StrName type) const { return this->type == type;}
@ -84,7 +84,7 @@ public:
}
Str summary() const {
std::stack<Str> st(stacktrace);
stack<Str> st(stacktrace);
StrStream ss;
if(is_re) ss << "Traceback (most recent call last):\n";
while(!st.empty()) { ss << st.top() << '\n'; st.pop(); }

View File

@ -159,6 +159,17 @@ struct Frame {
for(int i=n-1; i>=0; i--) v[i] = pop();
return v;
}
void _mark() const {
for(PyObject* obj : _data) OBJ_MARK(obj);
if(_locals != nullptr) _locals->_mark();
if(_closure != nullptr) _closure->_mark();
OBJ_MARK(_module);
for(auto& p : s_try_block){
for(PyObject* obj : p.second) OBJ_MARK(obj);
}
co->_mark();
}
};
}; // namespace pkpy

View File

@ -1,19 +1,28 @@
#pragma once
#include "obj.h"
#include "codeobject.h"
#include "namedict.h"
namespace pkpy {
struct ManagedHeap{
std::vector<PyObject*> heap;
std::vector<PyObject*> gen;
void _add(PyObject* obj){
obj->gc.enabled = true;
heap.push_back(obj);
template<typename T>
PyObject* gcnew(Type type, T&& val){
PyObject* obj = new Py_<std::decay_t<T>>(type, std::forward<T>(val));
gen.push_back(obj);
return obj;
}
void sweep(){
template<typename T>
PyObject* _new(Type type, T&& val){
return gcnew<T>(type, std::forward<T>(val));
}
int sweep(){
std::vector<PyObject*> alive;
for(PyObject* obj: heap){
for(PyObject* obj: gen){
if(obj->gc.marked){
obj->gc.marked = false;
alive.push_back(obj);
@ -21,17 +30,55 @@ namespace pkpy {
delete obj;
}
}
heap.clear();
heap.swap(alive);
int freed = gen.size() - alive.size();
gen.clear();
gen.swap(alive);
return freed;
}
void collect(VM* vm){
std::vector<PyObject*> roots = get_roots(vm);
for(PyObject* obj: roots) obj->mark();
sweep();
int collect(VM* vm){
mark(vm);
return sweep();
}
std::vector<PyObject*> get_roots(VM* vm);
void mark(VM* vm);
};
inline void NameDict::_mark(){
for(uint16_t i=0; i<_capacity; i++){
if(_items[i].first.empty()) continue;
OBJ_MARK(_items[i].second);
}
}
template<> inline void _mark<List>(List& t){
for(PyObject* obj: t) OBJ_MARK(obj);
}
template<> inline void _mark<Tuple>(Tuple& t){
for(int i=0; i<t.size(); i++) OBJ_MARK(t[i]);
}
template<> inline void _mark<Function>(Function& t){
t.code->_mark();
t.kwargs._mark();
if(t._module != nullptr) OBJ_MARK(t._module);
if(t._closure != nullptr) t._closure->_mark();
}
template<> inline void _mark<BoundMethod>(BoundMethod& t){
OBJ_MARK(t.obj);
OBJ_MARK(t.method);
}
template<> inline void _mark<StarWrapper>(StarWrapper& t){
OBJ_MARK(t.obj);
}
template<> inline void _mark<Super>(Super& t){
OBJ_MARK(t.first);
}
// NOTE: std::function may capture some PyObject*, they can not be marked
} // namespace pkpy

View File

@ -65,4 +65,21 @@ inline PyObject* Generator::next(){
}
}
inline void BaseIter::_mark() {
if(_ref != nullptr) OBJ_MARK(_ref);
if(loop_var != nullptr) OBJ_MARK(loop_var);
}
inline void Generator::_mark(){
BaseIter::_mark();
frame->_mark();
}
template<typename T>
void _mark(T& t){
if constexpr(std::is_base_of_v<BaseIter, T>){
t._mark();
}
}
} // namespace pkpy

View File

@ -180,12 +180,7 @@ while(!_items[i].first.empty()) { \
return v;
}
void apply_v(void(*f)(PyObject*)) {
for(uint16_t i=0; i<_capacity; i++){
if(_items[i].first.empty()) continue;
f(_items[i].second);
}
}
void _mark();
#undef HASH_PROBE
#undef _hash
};

View File

@ -63,6 +63,8 @@ struct StarWrapper {
StarWrapper(PyObject* obj, bool rvalue): obj(obj), rvalue(rvalue) {}
};
using Super = std::pair<PyObject*, Type>;
struct Slice {
int start = 0;
int stop = 0x7fffffff;
@ -84,16 +86,13 @@ public:
virtual PyObject* next() = 0;
PyObject* loop_var;
BaseIter(VM* vm, PyObject* _ref) : vm(vm), _ref(_ref) {}
virtual void _mark();
virtual ~BaseIter() = default;
};
template <typename, typename=void> struct is_container_gc : std::false_type {};
template <typename T> struct is_container_gc<T, std::void_t<decltype(T::_mark)>> : std::true_type {};
struct GCHeader {
bool enabled; // whether this object is managed by GC
bool marked; // whether this object is marked
GCHeader() : enabled(false), marked(false) {}
GCHeader() : marked(false) {}
};
struct PyObject {
@ -105,12 +104,15 @@ struct PyObject {
NameDict& attr() noexcept { return *_attr; }
PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; }
virtual void* value() = 0;
virtual void mark() = 0;
virtual void _mark() = 0;
PyObject(Type type) : type(type) {}
virtual ~PyObject() { delete _attr; }
};
template<typename T>
void _mark(T& t);
template <typename T>
struct Py_ : PyObject {
T _value;
@ -131,16 +133,17 @@ struct Py_ : PyObject {
}
void* value() override { return &_value; }
void mark() override {
if(!gc.enabled || gc.marked) return;
void _mark() override {
if(gc.marked) return;
gc.marked = true;
if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); });
if constexpr (is_container_gc<T>::value) _value._mark();
if(is_attr_valid()) attr()._mark();
pkpy::_mark<T>(_value); // handle PyObject* inside _value `T`
}
};
#define OBJ_GET(T, obj) (((Py_<T>*)(obj))->_value)
#define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__))
#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_mark()
const int kTpIntIndex = 2;
const int kTpFloatIndex = 3;
@ -210,7 +213,7 @@ __T _py_cast(VM* vm, PyObject* obj) {
}
#define VAR(x) py_var(vm, x)
#define VAR_T(T, ...) vm->gcnew<T>(T::_type(vm), T(__VA_ARGS__))
#define VAR_T(T, ...) vm->heap.gcnew<T>(T::_type(vm), T(__VA_ARGS__))
#define CAST(T, x) py_cast<T>(vm, x)
#define _CAST(T, x) _py_cast<T>(vm, x)

View File

@ -101,8 +101,8 @@ struct Parser {
const char* curr_char;
int current_line = 1;
Token prev, curr;
std::queue<Token> nexts;
std::stack<int> indents;
queue<Token> nexts;
stack<int> indents;
int brackets_level = 0;

View File

@ -69,7 +69,7 @@ inline void init_builtins(VM* _vm) {
vm->TypeError("super(type, obj): obj must be an instance or subtype of type");
}
Type base = vm->_all_types[type].base;
return vm->gcnew(vm->tp_super, Super(args[1], base));
return vm->heap.gcnew(vm->tp_super, Super(args[1], base));
});
_vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) {
@ -757,7 +757,7 @@ inline void add_module_random(VM* vm){
inline void add_module_gc(VM* vm){
PyObject* mod = vm->new_module("gc");
vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->gc_collect())));
vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect(vm))));
}
inline void VM::post_init(){

View File

@ -152,7 +152,7 @@ struct TupleRef : BaseRef {
template<typename P>
PyObject* VM::PyRef(P&& value) {
static_assert(std::is_base_of_v<BaseRef, std::decay_t<P>>);
return gcnew<P>(tp_ref, std::forward<P>(value));
return heap.gcnew<P>(tp_ref, std::forward<P>(value));
}
inline const BaseRef* VM::PyRef_AS_C(PyObject* obj)
@ -166,4 +166,18 @@ inline void Frame::try_deref(VM* vm, PyObject*& v){
if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this);
}
/***** GC's Impl *****/
template<> inline void _mark<AttrRef>(AttrRef& t){
OBJ_MARK(obj);
}
template<> inline void _mark<IndexRef>(IndexRef& t){
OBJ_MARK(obj);
OBJ_MARK(index);
}
template<> inline void _mark<TupleRef>(TupleRef& t){
_mark<Tuple>(t.objs);
}
} // namespace pkpy

View File

@ -24,8 +24,8 @@ Str _read_file_cwd(const Str& name, bool* ok);
template<> inline ctype& _py_cast<ctype&>(VM* vm, PyObject* obj) { \
return OBJ_GET(ctype, obj); \
} \
inline PyObject* py_var(VM* vm, const ctype& value) { return vm->gcnew(vm->ptype, value);} \
inline PyObject* py_var(VM* vm, ctype&& value) { return vm->gcnew(vm->ptype, std::move(value));}
inline PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \
inline PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));}
class Generator: public BaseIter {
@ -35,7 +35,8 @@ public:
Generator(VM* vm, std::unique_ptr<Frame>&& frame)
: BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {}
PyObject* next();
PyObject* next() override;
void _mark() override;
};
struct PyTypeInfo{
@ -46,9 +47,9 @@ struct PyTypeInfo{
class VM {
VM* vm; // self reference for simplify code
ManagedHeap heap;
public:
std::stack< std::unique_ptr<Frame> > callstack;
ManagedHeap heap;
stack< std::unique_ptr<Frame> > callstack;
std::vector<PyTypeInfo> _all_types;
PyObject* run_frame(Frame* frame);
@ -56,15 +57,12 @@ public:
NameDict _modules; // loaded modules
std::map<StrName, Str> _lazy_modules; // lazy loaded modules
// singleton objects, need_gc=false
PyObject* _py_op_call;
PyObject* _py_op_yield;
PyObject* None;
PyObject* True;
PyObject* False;
PyObject* Ellipsis;
// managed by _modules, need_gc=false
PyObject* builtins; // builtins module
PyObject* _main; // __main__ module
@ -73,6 +71,13 @@ public:
std::ostream* _stderr;
int recursionlimit = 1000;
// for quick access
Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str;
Type tp_list, tp_tuple;
Type tp_function, tp_native_function, tp_iterator, tp_bound_method;
Type tp_slice, tp_range, tp_module, tp_ref;
Type tp_super, tp_exception, tp_star_wrapper;
VM(bool use_stdio){
this->vm = this;
this->use_stdio = use_stdio;
@ -118,7 +123,7 @@ public:
do{
val = cls->attr().try_get(name);
if(val != nullptr) return val;
Type cls_t = static_cast<Py_<Type>*>(cls)->_value;
Type cls_t = OBJ_GET(Type, cls);
Type base = _all_types[cls_t].base;
if(base.index == -1) break;
cls = _all_types[base].obj;
@ -144,18 +149,6 @@ public:
return nullptr;
}
i64 gc_collect(){
heap.collect(this);
return 0;
}
template<typename T>
PyObject* gcnew(Type type, T&& val){
PyObject* obj = new Py_<std::decay_t<T>>(type, std::forward<T>(val));
heap._add(obj);
return obj;
}
template<typename ArgT>
std::enable_if_t<std::is_same_v<std::decay_t<ArgT>, Args>, PyObject*>
call(PyObject* callable, ArgT&& args){
@ -200,12 +193,12 @@ public:
PyObject* property(NativeFuncRaw fget){
PyObject* p = builtins->attr("property");
PyObject* method = gcnew(tp_native_function, NativeFunc(fget, 1, false));
PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false));
return call(p, Args{method});
}
PyObject* new_type_object(PyObject* mod, StrName name, Type base){
PyObject* obj = new Py_<Type>(tp_type, _all_types.size());
PyObject* obj = heap._new<Type>(tp_type, _all_types.size());
PyTypeInfo info{
.obj = obj,
.base = base,
@ -263,17 +256,10 @@ public:
return index;
}
// for quick access
Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str;
Type tp_list, tp_tuple;
Type tp_function, tp_native_function, tp_iterator, tp_bound_method;
Type tp_slice, tp_range, tp_module, tp_ref;
Type tp_super, tp_exception, tp_star_wrapper;
template<typename P>
PyObject* PyIter(P&& value) {
static_assert(std::is_base_of_v<BaseIter, std::decay_t<P>>);
return gcnew<P>(tp_iterator, std::forward<P>(value));
return heap.gcnew<P>(tp_iterator, std::forward<P>(value));
}
BaseIter* PyIter_AS_C(PyObject* obj)
@ -323,6 +309,7 @@ public:
}
~VM() {
heap.collect(this);
if(!use_stdio){
delete _stdout;
delete _stderr;
@ -578,7 +565,7 @@ inline PyObject* VM::asRepr(PyObject* obj){
}
inline PyObject* VM::new_module(StrName name) {
PyObject* obj = new Py_<DummyModule>(tp_module, DummyModule());
PyObject* obj = heap._new<DummyModule>(tp_module, DummyModule());
obj->attr().set(__name__, VAR(name.str()));
// we do not allow override in order to avoid memory leak
// it is because Module objects are not garbage collected
@ -666,8 +653,8 @@ inline void VM::init_builtin_types(){
// PyTypeObject is managed by _all_types
// PyModuleObject is managed by _modules
// They are not managed by GC, so we use a simple "new"
_all_types.push_back({.obj = new Py_<Type>(Type(1), Type(0)), .base = -1, .name = "object"});
_all_types.push_back({.obj = new Py_<Type>(Type(1), Type(1)), .base = 0, .name = "type"});
_all_types.push_back({.obj = heap._new<Type>(Type(1), Type(0)), .base = -1, .name = "object"});
_all_types.push_back({.obj = heap._new<Type>(Type(1), Type(1)), .base = 0, .name = "type"});
tp_object = 0; tp_type = 1;
tp_int = _new_type_object("int");
@ -690,12 +677,12 @@ inline void VM::init_builtin_types(){
tp_super = _new_type_object("super");
tp_exception = _new_type_object("Exception");
this->None = new Py_<Dummy>(_new_type_object("NoneType"), {});
this->Ellipsis = new Py_<Dummy>(_new_type_object("ellipsis"), {});
this->True = new Py_<Dummy>(tp_bool, {});
this->False = new Py_<Dummy>(tp_bool, {});
this->_py_op_call = new Py_<Dummy>(_new_type_object("_py_op_call"), {});
this->_py_op_yield = new Py_<Dummy>(_new_type_object("_py_op_yield"), {});
this->None = heap._new<Dummy>(_new_type_object("NoneType"), {});
this->Ellipsis = heap._new<Dummy>(_new_type_object("ellipsis"), {});
this->True = heap._new<Dummy>(tp_bool, {});
this->False = heap._new<Dummy>(tp_bool, {});
this->_py_op_call = heap._new<Dummy>(_new_type_object("_py_op_call"), {});
this->_py_op_yield = heap._new<Dummy>(_new_type_object("_py_op_yield"), {});
this->builtins = new_module("builtins");
this->_main = new_module("__main__");
@ -723,7 +710,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo
if(new_f != nullptr){
obj = call(new_f, std::move(args), kwargs, false);
}else{
obj = gcnew<DummyInstance>(OBJ_GET(Type, callable), {});
obj = heap.gcnew<DummyInstance>(OBJ_GET(Type, callable), {});
PyObject* init_f = getattr(obj, __init__, false, true);
if (init_f != nullptr) call(init_f, std::move(args), kwargs, false);
}
@ -812,8 +799,6 @@ inline void VM::unpack_args(Args& args){
args = Args(std::move(unpacked));
}
using Super = std::pair<PyObject*, Type>;
// https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){
PyObject* objtype = _t(obj);
@ -936,10 +921,11 @@ inline PyObject* VM::_exec(){
}
}
inline std::vector<PyObject*> ManagedHeap::get_roots(VM *vm) {
std::vector<PyObject*> roots;
// ...
return roots;
inline void ManagedHeap::mark(VM *vm) {
// iterate callstack frames
for(auto& frame : vm->callstack.data()){
frame->_mark();
}
}
} // namespace pkpy