Duck-Test2
Sat 17 May 2025
# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct 3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install duckdb
!pip show duckdb | grep "Version:"
Version: 1.1.3
import duckdb
# Connect to DuckDB
con = duckdb.connect()
# Load CSV file
con.execute("CREATE TABLE my_table AS SELECT * FROM '../dataset/student_data.parquet'")
<duckdb.duckdb.DuckDBPyConnection at 0x7f207c18c6b0>
# Perform SQL query
result = con.execute("SELECT * FROM my_table WHERE student_name = 'Patricia Hall'").fetchall()
result
[(18, 'Patricia Hall', 71, 75, 70, 73, 0)]
print(result)
[(18, 'Patricia Hall', 71, 75, 70, 73, 0)]
# type(result[0])
import pandas as pd
from IPython.display import display
def list2table(cdata, col_list = None):
first_row = cdata[0]
cols_count = len(first_row)
if not col_list:
col_list = []
for idx in range(cols_count):
col_list.append(f'col_{idx}')
# Convert the list of tuples to a DataFrame
df1 = pd.DataFrame(cdata, columns = col_list)
# Set display options
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
# Apply custom styling
styled_df = df1.style.set_properties(**{'text-align': 'center'})
# Display the DataFrame
display(styled_df)
def q(query, col_list = None):
result = con.execute(query).fetchall()
# print(result)
return list2table(result, col_list)
q("SELECT * FROM my_table WHERE student_id = 1")
| col_0 | col_1 | col_2 | col_3 | col_4 | col_5 | col_6 | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | John Doe | 85 | 90 | 80 | 88 | 1 |
q("""
SELECT
STUDENT_NAME
FROM my_table
limit 2
""", ["student_name"])
| student_name | |
|---|---|
| 0 | John Doe |
| 1 | Jane Smith |
Score: 15
Category: duckdb