Csv2Duckdb-Persist

Sat 17 May 2025

import duckdb

# Create a new DuckDB database file
db_path = 'student_duck1.db'
con = duckdb.connect(database=db_path, read_only=False)

# Verify the connection
print(f"Connected to DuckDB database at {db_path}")
Connected to DuckDB database at student_duck1.db
# Path to your CSV file
csv_file_path = '../dataset/student_data.csv'

# Load the CSV file into a DuckDB table
con.execute(f"CREATE TABLE my_table AS SELECT * FROM read_csv_auto('{csv_file_path}')")

# Optionally, you can query the table to verify the data
result = con.execute("SELECT * FROM my_table LIMIT 5").fetchdf()
print(result)
   student_id   student_name  test_scores  attendance  participation  \
0           1       John Doe           85          90             80   
1           2     Jane Smith           78          85             75   
2           3    Bob Johnson           92          95             85   
3           4    Alice Brown           70          80             70   
4           5  Charlie Davis           88          92             83

   project_scores  got_job  
0              88        1  
1              80        0  
2              90        1  
3              75        0  
4              85        1
# Close the connection
con.close()

print(f"Data has been persisted to {db_path}")
Data has been persisted to student_duck1.db


# Reconnect to the persisted DuckDB database
con = duckdb.connect(database=db_path, read_only=False)

# Query the table to verify the data
result = con.execute("SELECT * FROM my_table LIMIT 5").fetchdf()
print(result)

# Close the connection
con.close()
   student_id   student_name  test_scores  attendance  participation  \
0           1       John Doe           85          90             80   
1           2     Jane Smith           78          85             75   
2           3    Bob Johnson           92          95             85   
3           4    Alice Brown           70          80             70   
4           5  Charlie Davis           88          92             83

   project_scores  got_job  
0              88        1  
1              80        0  
2              90        1  
3              75        0  
4              85        1


Score: 5

Category: duckdb


Duck-Automobile

Sat 17 May 2025
# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install duckdb
!pip show duckdb | grep "Version:"
Version: 1.1 …

Category: duckdb

Read More

Duck-Test1

Sat 17 May 2025
# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install duckdb
!pip show duckdb | grep "Version:"
Version: 1.1 …

Category: duckdb

Read More

Duck-Test2

Sat 17 May 2025
# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install duckdb
!pip show duckdb | grep "Version:"
Version: 1.1 …

Category: duckdb

Read More

Duck2Persistent

Sat 17 May 2025

import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'
import duckdb
import pandas as pd
# Path to your CSV file
csv_file_path = '../dataset/student_data.csv'
# Create a DuckDB connection
con = duckdb.connect(database=':memory:', read_only=False …

Category: duckdb

Read More

List2Table-D-Pandas

Sat 17 May 2025

import pandas as pd
from IPython.display import display

# Example list of tuples
data = [
    (1, 'Alice', 25),
    (2, 'Bob', 30),
    (3, 'Charlie', 35)
]

# Convert the list of tuples to a DataFrame
df = pd.DataFrame(data, columns=['ID', 'Name', 'Age'])

# Set display options
pd.set_option('display.max_rows', 100)
pd.set_option('display …

Category: duckdb

Read More
Page 1 of 1