chectus/lichessdb/process_data.py
2024-07-28 14:48:02 +08:00

48 lines
1.6 KiB
Python

import sqlite3
import json
from tqdm import tqdm
import argparse
def main():
# Argument parsing
parser = argparse.ArgumentParser(description='Process and insert JSON data into SQLite database.')
parser.add_argument('db_name', type=str, help='Name of the SQLite database file.')
parser.add_argument('input_file', type=str, help='Name of the input JSON file.')
parser.add_argument('table_name', type=str, help='Name of the table in the database.')
args = parser.parse_args()
# Connect to the SQLite database
conn = sqlite3.connect(args.db_name)
cursor = conn.cursor()
# Process and insert JSON data
with open(args.input_file, 'r', encoding='utf-8') as f:
for line in tqdm(f, desc='Processing data', unit=' lines'):
data = json.loads(line)
fen = data['fen']
best_eval = max(data['evals'], key=lambda x: x['depth'])
depth = best_eval['depth']
cur_player = fen.split(' ')[1]
# Safely get evaluation details
pvs = best_eval.get('pvs', [{}])[0]
cpe = pvs.get('cp', 20000 if cur_player == 'w' else -20000)
mate = pvs.get('mate')
nxt = pvs.get('line', '').split(' ')[0]
# Insert data into the table
cursor.execute(f'''
INSERT OR IGNORE INTO {args.table_name} (fen, cpe, dep, nxt, mate)
VALUES (?, ?, ?, ?, ?)
''', (fen, cpe, depth, nxt, mate))
# Commit the transaction and close the connection
conn.commit()
conn.close()
print("Data processing and insertion complete.")
if __name__ == "__main__":
main()