Duck2Persistent
Sat 17 May 2025
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct 3 2024, 07:29:13) [GCC 11.2.0]'
import duckdb
import pandas as pd
# Path to your CSV file
csv_file_path = '../dataset/student_data.csv'
# Create a DuckDB connection
con = duckdb.connect(database=':memory:', read_only=False)
# Load the CSV file into a DuckDB table
con.execute(f"CREATE TABLE my_table AS SELECT * FROM read_csv_auto('{csv_file_path}')")
<duckdb.duckdb.DuckDBPyConnection at 0x7fb9a0190070>
# Optionally, you can query the table to verify the data
result = con.execute("SELECT * FROM my_table LIMIT 5").fetchdf()
print(result)
student_id student_name test_scores attendance participation \
0 1 John Doe 85 90 80
1 2 Jane Smith 78 85 75
2 3 Bob Johnson 92 95 85
3 4 Alice Brown 70 80 70
4 5 Charlie Davis 88 92 83
project_scores got_job
0 88 1
1 80 0
2 90 1
3 75 0
4 85 1
# Persist the data by saving the DuckDB database to a file
persisted_db_path = 'student_duck.db'
con.execute(f"COPY my_table TO '{persisted_db_path}' (FORMAT CSV)")
<duckdb.duckdb.DuckDBPyConnection at 0x7fb9a0190070>
# Close the connection
con.close()
print(f"Data has been persisted to {persisted_db_path}")
Data has been persisted to student_duck.db
# read from persisted
# con = duckdb.connect(database='student_duck.db', read_only=False)
# Create a new DuckDB database file
db_path = 'student_duck.db'
con = duckdb.connect(database=db_path, read_only=False)
# Verify the connection
print(f"Connected to DuckDB database at {db_path}")
---------------------------------------------------------------------------
IOException Traceback (most recent call last)
Cell In[39], line 3
1 # Create a new DuckDB database file
2 db_path = 'student_duck.db'
----> 3 con = duckdb.connect(database=db_path, read_only=True)
5 # Verify the connection
6 print(f"Connected to DuckDB database at {db_path}")
IOException: IO Error: The file "/home/rajaraman/tprojects/mlnotes/notebooks/duckdb/student_duck.db" exists, but it is not a valid DuckDB database file!
Score: 15
Category: duckdb