Duck-Test2

Sat 17 May 2025
# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install duckdb
!pip show duckdb | grep "Version:"
Version: 1.1.3
import duckdb
# Connect to DuckDB
con = duckdb.connect()
# Load CSV file
con.execute("CREATE TABLE my_table AS SELECT * FROM '../dataset/student_data.parquet'")
<duckdb.duckdb.DuckDBPyConnection at 0x7f207c18c6b0>
# Perform SQL query
result = con.execute("SELECT * FROM my_table WHERE student_name = 'Patricia Hall'").fetchall()
result
[(18, 'Patricia Hall', 71, 75, 70, 73, 0)]
print(result)
[(18, 'Patricia Hall', 71, 75, 70, 73, 0)]
# type(result[0])
import pandas as pd
from IPython.display import display

def list2table(cdata, col_list = None):

    first_row = cdata[0]
    cols_count = len(first_row)

    if not col_list:
        col_list = []
        for idx in range(cols_count):
            col_list.append(f'col_{idx}')

    # Convert the list of tuples to a DataFrame
    df1 = pd.DataFrame(cdata, columns = col_list)

    # Set display options
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.max_columns', 100)

    # Apply custom styling
    styled_df = df1.style.set_properties(**{'text-align': 'center'})

    # Display the DataFrame
    display(styled_df)
def q(query, col_list = None):

    result = con.execute(query).fetchall()
    # print(result)

    return list2table(result, col_list)
q("SELECT * FROM my_table WHERE student_id = 1")
  col_0 col_1 col_2 col_3 col_4 col_5 col_6
0 1 John Doe 85 90 80 88 1
q("""
SELECT 
STUDENT_NAME
FROM my_table
limit 2
""", ["student_name"])
  student_name
0 John Doe
1 Jane Smith




Score: 15

Category: duckdb