Duck-Test1

Fri 14 November 2025

# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset

import pyutil as pyu
pyu.get_local_pyinfo()

'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'

# !pip install duckdb

!pip show duckdb | grep "Version:"

Version: 1.1.3

import duckdb

# Connect to DuckDB
con = duckdb.connect()

# Load CSV file
con.execute("CREATE TABLE my_table AS SELECT * FROM '../dataset/student_data.csv'")

<duckdb.duckdb.DuckDBPyConnection at 0x7ff46ea2adf0>

# Perform SQL query
result = con.execute("SELECT * FROM my_table WHERE student_name = 'Patricia Hall'").fetchall()

result

[(18, 'Patricia Hall', 71, 75, 70, 73, 0)]

print(result)

[(18, 'Patricia Hall', 71, 75, 70, 73, 0)]

# type(result[0])

import pandas as pd
from IPython.display import display

def list2table(cdata, col_list = None):

    first_row = cdata[0]
    cols_count = len(first_row)

    if not col_list:
        col_list = []
        for idx in range(cols_count):
            col_list.append(f'col_{idx}')

    # Convert the list of tuples to a DataFrame
    df1 = pd.DataFrame(cdata, columns = col_list)

    # Set display options
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.max_columns', 100)

    # Apply custom styling
    styled_df = df1.style.set_properties(**{'text-align': 'center'})

    # Display the DataFrame
    display(styled_df)

def q(query, col_list = None):

    result = con.execute(query).fetchall()
    # print(result)

    return list2table(result, col_list)

q("SELECT * FROM my_table WHERE student_id = 1")

	col_0	col_1	col_2	col_3	col_4	col_5	col_6
0	1	John Doe	85	90	80	88	1

q("""
SELECT 
STUDENT_NAME
FROM my_table
limit 2
""", ["student_name"])

	student_name
0	John Doe
1	Jane Smith

q("""
SELECT 
AVG(test_scores)
FROM my_table
limit 2
""",
["one"])

	one
0	77.790000

q("""
SELECT 
"student_name"
FROM my_table
limit 2
""",
["one"])

	one
0	John Doe
1	Jane Smith

q("""
SELECT 
student_id
FROM my_table
WHERE student_name IN ('John Doe', 'Jane Smith')
""",
["one"])

	one
0	1
1	2

Score: 15

Category: duckdb