Duck-Automobile

Fri 14 November 2025

# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset

import pyutil as pyu
pyu.get_local_pyinfo()

'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'

# !pip install duckdb

!pip show duckdb | grep "Version:"

Version: 1.1.3

import duckdb

# Connect to DuckDB
con = duckdb.connect()

# Load CSV file
con.execute("CREATE TABLE automobile AS SELECT * FROM '../dataset/automobile_data.csv'")

<duckdb.duckdb.DuckDBPyConnection at 0x7f2df2f47770>

# Perform SQL query
result = con.execute("SELECT * FROM automobile").fetchall()

# result

# print(result)

# type(result[0])

import pandas as pd
from IPython.display import display

def list2table(cdata, col_list = None):

    first_row = cdata[0]
    cols_count = len(first_row)

    if not col_list:
        col_list = []
        for idx in range(cols_count):
            col_list.append(f'col_{idx}')

    # Convert the list of tuples to a DataFrame
    df1 = pd.DataFrame(cdata, columns = col_list)

    # Set display options
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.max_columns', 100)

    # Apply custom styling
    styled_df = df1.style.set_properties(**{'text-align': 'center'})

    # Display the DataFrame
    display(styled_df)

def q(query, col_list = None):

    result = con.execute(query).fetchall()
    # print(result)

    return list2table(result, col_list)

q("SELECT * FROM automobile limit 2")

	col_0	col_1	col_2	col_3	col_4	col_5	col_6	col_7	col_8	col_9	col_10	col_11	col_12	col_13	col_14	col_15	col_16	col_17	col_18	col_19	col_20	col_21	col_22	col_23	col_24	col_25
0	3	?	alfa-romero	gas	std	two	convertible	rwd	front	88.600000	168.800000	64.100000	48.800000	2548	dohc	four	130	mpfi	3.47	2.68	9.000000	111	5000	21	27	13495
1	3	?	alfa-romero	gas	std	two	convertible	rwd	front	88.600000	168.800000	64.100000	48.800000	2548	dohc	four	130	mpfi	3.47	2.68	9.000000	111	5000	21	27	16500

q("""
SELECT 
    make, "num-of-doors", COUNT(*) AS door_count 
FROM automobile 

WHERE "num-of-doors" IN ('two', 'four')
GROUP BY 
    make,
    "num-of-doors"
LIMIT 2
""")

	col_0	col_1	col_2
0	audi	two	2
1	chevrolet	two	2

q("""
SELECT *
FROM automobile
WHERE 
CAST(horsepower as INT) > 150
and horsepower != '?'

limit 2
""")

	col_0	col_1	col_2	col_3	col_4	col_5	col_6	col_7	col_8	col_9	col_10	col_11	col_12	col_13	col_14	col_15	col_16	col_17	col_18	col_19	col_20	col_21	col_22	col_23	col_24	col_25
0	1	?	alfa-romero	gas	std	two	hatchback	rwd	front	94.500000	171.200000	65.500000	52.400000	2823	ohcv	six	152	mpfi	2.68	3.47	9.000000	154	5000	19	26	16500
1	0	?	audi	gas	turbo	two	hatchback	4wd	front	99.500000	178.200000	67.900000	52.000000	3053	ohc	five	131	mpfi	3.13	3.4	7.000000	160	5500	16	22	?

q("""
SELECT 
    ‘fuel-type’,
    make, 
    COUNT(*) AS number_cars
FROM automobile
WHERE 
'fuel-type' IN ('gas', 'diesel')

GROUP BY make, 'fuel-type'
""")

---------------------------------------------------------------------------

BinderException                           Traceback (most recent call last)

Cell In[35], line 1
----> 1 q("""
      2 SELECT 
      3     ‘fuel-type’,
      4     make, 
      5     COUNT(*) AS number_cars
      6 FROM automobile
      7 WHERE 
      8 'fuel-type' IN ('gas', 'diesel')
      9 
     10 GROUP BY make, 'fuel-type'
     11 """)


Cell In[19], line 3, in q(query, col_list)
      1 def q(query, col_list = None):
----> 3     result = con.execute(query).fetchall()
      4     # print(result)
      6     return list2table(result, col_list)


BinderException: Binder Error: Referenced column "‘fuel" not found in FROM clause!
Candidate bindings: "automobile.fuel-type", "automobile.fuel-system"
LINE 3:     ‘fuel-type’,
            ^

Score: 20

Category: duckdb