Compare commits

...

9 Commits

Author SHA1 Message Date
blueloveTH
40d922e896 fix a bug about %s -> %v 2024-11-23 16:46:29 +08:00
blueloveTH
0a75eb857c update int hash 2024-11-23 16:43:06 +08:00
blueloveTH
620db020ed ... 2024-11-23 16:38:57 +08:00
blueloveTH
178bfa4c32 Update py_dict.c 2024-11-23 16:31:59 +08:00
blueloveTH
cd8ae22c2b fix a bug 2024-11-23 16:27:55 +08:00
blueloveTH
72d5e4d098 Revert "add cpy310_tuplehash"
This reverts commit b03feb0294c984002992b5b226df02d0d53103f5.
2024-11-23 15:52:56 +08:00
blueloveTH
b03feb0294 add cpy310_tuplehash 2024-11-23 15:42:42 +08:00
blueloveTH
63f2198ed1 Update py_dict.c 2024-11-23 15:03:16 +08:00
blueloveTH
6eb785144e fix a bug of dict 2024-11-23 14:56:47 +08:00
13 changed files with 189 additions and 74 deletions

View File

@ -12,9 +12,9 @@
#define PK_HEX_TABLE "0123456789abcdef"
#ifdef _MSC_VER
#define c11__unreachedable() __assume(0)
#define c11__unreachable() __assume(0)
#else
#define c11__unreachedable() __builtin_unreachable()
#define c11__unreachable() __builtin_unreachable()
#endif
#define c11__abort(...) \

45
scripts/gen_primes.py Normal file
View File

@ -0,0 +1,45 @@
import numba
from typing import List
@numba.jit(nopython=True)
def sieve_of_eratosthenes(n: int) -> List[int]:
assert n >= 2
is_prime = [True] * (n + 1)
is_prime[0] = is_prime[1] = False # 0 和 1 不是素数
for start in range(2, int(n**0.5) + 1):
if is_prime[start]:
for multiple in range(start*start, n + 1, start):
is_prime[multiple] = False
primes = [num for num, prime in enumerate(is_prime) if prime]
return primes
all_primes = sieve_of_eratosthenes(2**30)
print(len(all_primes), all_primes[:10], all_primes[-10:])
index = 3
caps = [all_primes[index]]
while True:
for i in range(index+1, len(all_primes)):
last_cap = caps[-1]
if last_cap < 1000:
min_cap = last_cap * 2
else:
min_cap = last_cap * 1.5
if all_primes[i] >= min_cap:
caps.append(all_primes[i])
index = i
break
else:
break
print('-'*20)
print(caps)
print('switch(cap) {')
for i in range(len(caps)-1):
print(f' case {caps[i]}:', f'return {caps[i+1]};')
print(' default: c11__unreachable();')
print('}')

View File

@ -128,7 +128,7 @@ bool NameExpr__emit_del(Expr* self_, Ctx* ctx) {
break;
case NAME_GLOBAL: Ctx__emit_(ctx, OP_DELETE_GLOBAL, self->name, self->line); break;
case NAME_GLOBAL_UNKNOWN: Ctx__emit_(ctx, OP_DELETE_NAME, self->name, self->line); break;
default: c11__unreachedable();
default: c11__unreachable();
}
return true;
}
@ -327,7 +327,7 @@ void LiteralExpr__emit_(Expr* self_, Ctx* ctx) {
Ctx__emit_(ctx, OP_LOAD_CONST, index, self->line);
break;
}
default: c11__unreachedable();
default: c11__unreachable();
}
}
@ -355,7 +355,7 @@ void Literal0Expr__emit_(Expr* self_, Ctx* ctx) {
case TK_TRUE: opcode = OP_LOAD_TRUE; break;
case TK_FALSE: opcode = OP_LOAD_FALSE; break;
case TK_DOTDOTDOT: opcode = OP_LOAD_ELLIPSIS; break;
default: c11__unreachedable();
default: c11__unreachable();
}
Ctx__emit_(ctx, opcode, BC_NOARG, self->line);
}
@ -1245,7 +1245,7 @@ static void Ctx__emit_store_name(Ctx* self, NameScope scope, py_Name name, int l
case NAME_LOCAL: Ctx__emit_(self, OP_STORE_FAST, Ctx__add_varname(self, name), line); break;
case NAME_GLOBAL: Ctx__emit_(self, OP_STORE_GLOBAL, name, line); break;
case NAME_GLOBAL_UNKNOWN: Ctx__emit_(self, OP_STORE_NAME, name, line); break;
default: c11__unreachedable();
default: c11__unreachable();
}
}
@ -2182,7 +2182,7 @@ static Error* read_literal(Compiler* self, py_Ref out) {
} else if(value->index == TokenValue_F64) {
py_newfloat(out, negated ? -value->_f64 : value->_f64);
} else {
c11__unreachedable();
c11__unreachable();
}
return NULL;
}

View File

@ -60,7 +60,7 @@ static bool stack_format_object(VM* self, c11_sv spec);
case RES_RETURN: PUSH(&self->last_retval); break; \
case RES_CALL: frame = self->top_frame; goto __NEXT_FRAME; \
case RES_ERROR: goto __ERROR; \
default: c11__unreachedable(); \
default: c11__unreachable(); \
} \
} while(0)
@ -1101,10 +1101,10 @@ FrameResult VM__run_top_frame(VM* self) {
if(!ok) goto __ERROR;
DISPATCH();
}
default: c11__unreachedable();
default: c11__unreachable();
}
c11__unreachedable();
c11__unreachable();
__ERROR:
py_BaseException__stpush(&self->curr_exception,
@ -1340,7 +1340,7 @@ static bool stack_format_object(VM* self, c11_sv spec) {
c11_sbuf__write_pad(&buf, pad_right, pad_c);
break;
}
default: c11__unreachedable();
default: c11__unreachable();
}
} else {
c11_sbuf__write_sv(&buf, c11_string__sv(body));

View File

@ -502,10 +502,10 @@ FrameResult VM__vectorcall(VM* self, uint16_t argc, uint16_t kwargc, bool opcall
self->stack.sp = p0; // reset the stack
return RES_RETURN;
}
default: c11__unreachedable();
default: c11__unreachable();
};
c11__unreachedable();
c11__unreachable();
/*****************_py_call*****************/
}

View File

@ -61,7 +61,7 @@ static bool disassemble(CodeObject* co) {
case OP_LOAD_CONST: {
py_Ref value = c11__at(py_TValue, &co->consts, byte.arg);
if(py_repr(value)) {
pk_sprintf(&ss, " (%s)", py_tosv(py_retval()));
pk_sprintf(&ss, " (%v)", py_tosv(py_retval()));
} else {
return false;
}

View File

@ -298,21 +298,32 @@ DEF_VECTOR_OPS(3)
sum += a.data[i] * b.data[i]; \
py_newint(py_retval(), sum); \
return true; \
} \
static bool vec##D##i##__hash__(int argc, py_Ref argv) { \
PY_CHECK_ARGC(1); \
const uint32_t C = 2654435761; \
c11_vec##D##i v = py_tovec##D##i(argv); \
uint64_t hash = 0; \
for(int i = 0; i < D; i++) \
hash = hash * 31 + (uint32_t)v.data[i] * C; \
py_newint(py_retval(), (py_i64)hash); \
return true; \
}
DEF_VECTOR_INT_OPS(2)
DEF_VECTOR_INT_OPS(3)
static bool vec2i__hash__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
c11_vec2i v = py_tovec2i(argv);
uint64_t x_part = (uint32_t)v.x & 0xFFFFFFFF;
uint64_t y_part = (uint32_t)v.y & 0xFFFFFFFF;
uint64_t hash = (x_part << 32) | y_part;
py_newint(py_retval(), (py_i64)hash);
return true;
}
static bool vec3i__hash__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
c11_vec3i v = py_tovec3i(argv);
uint64_t x_part = (uint32_t)v.x & 0xFFFFFF;
uint64_t y_part = (uint32_t)v.y & 0xFFFFFF;
uint64_t z_part = (uint32_t)v.z & 0xFFFF;
uint64_t hash = (x_part << 40) | (y_part << 16) | z_part;
py_newint(py_retval(), (py_i64)hash);
return true;
}
static bool vec2__repr__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
char buf[64];

View File

@ -64,7 +64,7 @@ bool pk_exec(CodeObject* co, py_Ref module) {
FrameResult res = VM__run_top_frame(vm);
if(res == RES_ERROR) return false;
if(res == RES_RETURN) return true;
c11__unreachedable();
c11__unreachable();
}
bool py_exec(const char* source, const char* filename, enum py_CompileMode mode, py_Ref module) {

View File

@ -210,7 +210,7 @@ bool pk_loadmethod(py_StackRef self, py_Name name) {
self[0] = *py_getslot(cls_var, 0);
self[1] = pk__type_info(type)->self;
break;
default: c11__unreachedable();
default: c11__unreachable();
}
return true;
}

View File

@ -7,8 +7,54 @@
#define PK_DICT_MAX_COLLISION 4
static uint32_t Dict__next_cap(uint32_t cap) {
switch(cap) {
case 7: return 17;
case 17: return 37;
case 37: return 79;
case 79: return 163;
case 163: return 331;
case 331: return 673;
case 673: return 1361;
case 1361: return 2053;
case 2053: return 3083;
case 3083: return 4637;
case 4637: return 6959;
case 6959: return 10453;
case 10453: return 15683;
case 15683: return 23531;
case 23531: return 35311;
case 35311: return 52967;
case 52967: return 79451;
case 79451: return 119179;
case 119179: return 178781;
case 178781: return 268189;
case 268189: return 402299;
case 402299: return 603457;
case 603457: return 905189;
case 905189: return 1357787;
case 1357787: return 2036687;
case 2036687: return 3055043;
case 3055043: return 4582577;
case 4582577: return 6873871;
case 6873871: return 10310819;
case 10310819: return 15466229;
case 15466229: return 23199347;
case 23199347: return 34799021;
case 34799021: return 52198537;
case 52198537: return 78297827;
case 78297827: return 117446801;
case 117446801: return 176170229;
case 176170229: return 264255353;
case 264255353: return 396383041;
case 396383041: return 594574583;
case 594574583: return 891861923;
default: c11__unreachable();
}
}
typedef struct {
py_i64 hash;
uint64_t hash;
py_TValue key;
py_TValue val;
} DictEntry;
@ -19,7 +65,7 @@ typedef struct {
typedef struct {
int length;
int capacity;
uint32_t capacity;
DictIndex* indices;
c11_vector /*T=DictEntry*/ entries;
} Dict;
@ -29,13 +75,13 @@ typedef struct {
DictEntry* end;
} DictIterator;
static void Dict__ctor(Dict* self, int capacity) {
static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) {
self->length = 0;
self->capacity = capacity;
self->indices = malloc(self->capacity * sizeof(DictIndex));
memset(self->indices, -1, self->capacity * sizeof(DictIndex));
c11_vector__ctor(&self->entries, sizeof(DictEntry));
c11_vector__reserve(&self->entries, capacity);
c11_vector__reserve(&self->entries, entries_capacity);
}
static void Dict__dtor(Dict* self) {
@ -48,7 +94,7 @@ static void Dict__dtor(Dict* self) {
static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
py_i64 hash;
if(!py_hash(key, &hash)) return false;
int idx = hash & (self->capacity - 1);
int idx = (uint64_t)hash % self->capacity;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) continue;
@ -72,16 +118,18 @@ static void Dict__clear(Dict* self) {
static void Dict__rehash_2x(Dict* self) {
Dict old_dict = *self;
uint32_t new_capacity = self->capacity;
int new_capacity = self->capacity * 2;
do {
Dict__ctor(self, new_capacity);
__RETRY:
// use next capacity
new_capacity = Dict__next_cap(new_capacity);
// create a new dict with new capacity
Dict__ctor(self, new_capacity, old_dict.entries.capacity);
// move entries from old dict to new dict
for(int i = 0; i < old_dict.entries.length; i++) {
DictEntry* entry = c11__at(DictEntry, &old_dict.entries, i);
if(py_isnil(&entry->key)) continue;
int idx = entry->hash & (new_capacity - 1);
int idx = entry->hash % new_capacity;
bool success = false;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
@ -96,14 +144,11 @@ static void Dict__rehash_2x(Dict* self) {
}
if(!success) {
Dict__dtor(self);
new_capacity *= 2;
continue;
goto __RETRY;
}
}
// resize complete
// done
Dict__dtor(&old_dict);
return;
} while(1);
}
static void Dict__compact_entries(Dict* self) {
@ -135,13 +180,13 @@ static void Dict__compact_entries(Dict* self) {
static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
py_i64 hash;
if(!py_hash(key, &hash)) return false;
int idx = hash & (self->capacity - 1);
int idx = (uint64_t)hash % self->capacity;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) {
// insert new entry
DictEntry* new_entry = c11_vector__emplace(&self->entries);
new_entry->hash = hash;
new_entry->hash = (uint64_t)hash;
new_entry->key = *key;
new_entry->val = *val;
self->indices[idx]._[i] = self->entries.length - 1;
@ -159,7 +204,11 @@ static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
}
// no empty slot found
if(self->capacity >= self->entries.length * 10) {
return RuntimeError("dict has too much collision: %d/%d", self->entries.length, self->capacity);
// raise error if we reach the minimum load factor (10%)
return RuntimeError("dict has too much collision: %d/%d/%d",
self->entries.length,
self->entries.capacity,
self->capacity);
}
Dict__rehash_2x(self);
return Dict__set(self, key, val);
@ -170,7 +219,7 @@ static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
static int Dict__pop(Dict* self, py_Ref key) {
py_i64 hash;
if(!py_hash(key, &hash)) return -1;
int idx = hash & (self->capacity - 1);
int idx = (uint64_t)hash % self->capacity;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) continue;
@ -208,13 +257,13 @@ static bool dict__new__(int argc, py_Ref argv) {
py_Type cls = py_totype(argv);
int slots = cls == tp_dict ? 0 : -1;
Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
Dict__ctor(ud, 8);
Dict__ctor(ud, 7, 8);
return true;
}
void py_newdict(py_Ref out) {
Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
Dict__ctor(ud, 8);
Dict__ctor(ud, 7, 8);
}
static bool dict__init__(int argc, py_Ref argv) {

View File

@ -244,9 +244,7 @@ static py_i64 c11_8bytes__hash(union c11_8bytes u) {
static bool int__hash__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
py_i64 val = py_toint(&argv[0]);
union c11_8bytes u = {._i64 = val};
py_newint(py_retval(), c11_8bytes__hash(u));
py_assign(py_retval(), argv);
return true;
}

View File

@ -138,3 +138,13 @@ for i in range(n):
# if i % 3 == 0:
# y = b.pop()
# delattr(a, y)
d = {}
for i in range(-1000, 1000):
d[i] = i
assert d[i] == i
e = {}
for i in range(-10000, 10000, 3):
e[i] = i
assert e[i] == i

View File

@ -408,12 +408,14 @@ assert x == 3.0 and y == 4.0
x, y, z = vec3(1.0, 2.0, 3.0)
assert x == 1.0 and y == 2.0 and z == 3.0
assert hash(vec2i(11, -1)) == 51539607551
assert hash(vec3i(11, -1, 0)) == 13194139467776
d = {vec2i(12, 12): 1035.313708305359, vec2i(12, 11): 2059.313708305359, vec2i(12, 13): 2059.313708305359, vec2i(11, 12): 2059.313708305359, vec2i(13, 12): 2059.313708305359, vec2i(13, 11): 3083.313708305359, vec2i(13, 13): 3083.313708305359, vec2i(14, 12): 3083.313708305359, vec2i(12, 14): 3083.313708305359, vec2i(11, 13): 3083.313708305359, vec2i(12, 10): 3083.313708305359, vec2i(11, 11): 3083.313708305359, vec2i(10, 12): 3083.313708305359, vec2i(13, 14): 4107.313708305359, vec2i(14, 13): 4107.313708305359, vec2i(14, 11): 4107.313708305359, vec2i(15, 12): 4107.313708305359, vec2i(12, 15): 4107.313708305359, vec2i(11, 14): 4107.313708305359, vec2i(13, 10): 4107.313708305359, vec2i(10, 13): 4107.313708305359, vec2i(11, 10): 4107.313708305359, vec2i(10, 11): 4107.313708305359, vec2i(12, 9): 4107.313708305359, vec2i(9, 12): 4107.313708305359, vec2i(14, 14): 5131.313708305359, vec2i(15, 13): 5131.313708305359, vec2i(13, 15): 5131.313708305359, vec2i(15, 11): 5131.313708305359, vec2i(16, 12): 5131.313708305359, vec2i(12, 16): 5131.313708305359, vec2i(11, 15): 5131.313708305359, vec2i(14, 10): 5131.313708305359, vec2i(10, 14): 5131.313708305359, vec2i(13, 9): 5131.313708305359, vec2i(9, 13): 5131.313708305359}
d[vec2i(11, 9)] = 1
e = {}
for i in range(10000):
for i in range(-1000, 10000):
e[vec2i(12, i)] = i
e[vec2i(11, i)] = i
e[vec2i(13, i)] = i