chectus/lichessdb/prepare.sh
2024-07-28 14:48:02 +08:00

21 lines
475 B
Bash

#!/bin/sh
# Downloads
if [ ! -f lichess_db_eval.jsonl ]; then
wget https://database.lichess.org/lichess_db_eval.jsonl.zst
zstd -d lichess_db_eval.jsonl.zst
fi
# Split
head -n 2000000 lichess_db_eval.jsonl > train2M.jsonl
tail -n 200000 lichess_db_eval.jsonl > test200K.jsonl
# Create database
if [ ! -f evals.db ]; then
sqlite3 evals.db < init.sql
fi
# Processz
python3 process_data.py evals.db train2M.jsonl Train
python3 process_data.py evals.db test200K.jsonl Test