Duck-Automobile

Sat 17 May 2025
# https://rajasgs.gitbook.io/pynotes/gcp#data-modeling
# https://github.com/tactlabs/student-hiring-prediction-mle/tree/main/dataset
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct  3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install duckdb
!pip show duckdb | grep "Version:"
Version: 1.1.3
import duckdb
# Connect to DuckDB
con = duckdb.connect()
# Load CSV file
con.execute("CREATE TABLE automobile AS SELECT * FROM '../dataset/automobile_data.csv'")
<duckdb.duckdb.DuckDBPyConnection at 0x7f2df2f47770>
# Perform SQL query
result = con.execute("SELECT * FROM automobile").fetchall()
# result
# print(result)
# type(result[0])
import pandas as pd
from IPython.display import display

def list2table(cdata, col_list = None):

    first_row = cdata[0]
    cols_count = len(first_row)

    if not col_list:
        col_list = []
        for idx in range(cols_count):
            col_list.append(f'col_{idx}')

    # Convert the list of tuples to a DataFrame
    df1 = pd.DataFrame(cdata, columns = col_list)

    # Set display options
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.max_columns', 100)

    # Apply custom styling
    styled_df = df1.style.set_properties(**{'text-align': 'center'})

    # Display the DataFrame
    display(styled_df)
def q(query, col_list = None):

    result = con.execute(query).fetchall()
    # print(result)

    return list2table(result, col_list)
q("SELECT * FROM automobile limit 2")
  col_0 col_1 col_2 col_3 col_4 col_5 col_6 col_7 col_8 col_9 col_10 col_11 col_12 col_13 col_14 col_15 col_16 col_17 col_18 col_19 col_20 col_21 col_22 col_23 col_24 col_25
0 3 ? alfa-romero gas std two convertible rwd front 88.600000 168.800000 64.100000 48.800000 2548 dohc four 130 mpfi 3.47 2.68 9.000000 111 5000 21 27 13495
1 3 ? alfa-romero gas std two convertible rwd front 88.600000 168.800000 64.100000 48.800000 2548 dohc four 130 mpfi 3.47 2.68 9.000000 111 5000 21 27 16500



q("""
SELECT 
    make, "num-of-doors", COUNT(*) AS door_count 
FROM automobile 

WHERE "num-of-doors" IN ('two', 'four')
GROUP BY 
    make,
    "num-of-doors"
LIMIT 2
""")
  col_0 col_1 col_2
0 audi two 2
1 chevrolet two 2
q("""
SELECT *
FROM automobile
WHERE 
CAST(horsepower as INT) > 150
and horsepower != '?'

limit 2
""")
  col_0 col_1 col_2 col_3 col_4 col_5 col_6 col_7 col_8 col_9 col_10 col_11 col_12 col_13 col_14 col_15 col_16 col_17 col_18 col_19 col_20 col_21 col_22 col_23 col_24 col_25
0 1 ? alfa-romero gas std two hatchback rwd front 94.500000 171.200000 65.500000 52.400000 2823 ohcv six 152 mpfi 2.68 3.47 9.000000 154 5000 19 26 16500
1 0 ? audi gas turbo two hatchback 4wd front 99.500000 178.200000 67.900000 52.000000 3053 ohc five 131 mpfi 3.13 3.4 7.000000 160 5500 16 22 ?

q("""
SELECT 
    ‘fuel-type’,
    make, 
    COUNT(*) AS number_cars
FROM automobile
WHERE 
'fuel-type' IN ('gas', 'diesel')

GROUP BY make, 'fuel-type'
""")
---------------------------------------------------------------------------

BinderException                           Traceback (most recent call last)

Cell In[35], line 1
----> 1 q("""
      2 SELECT 
      3     ‘fuel-type’,
      4     make, 
      5     COUNT(*) AS number_cars
      6 FROM automobile
      7 WHERE 
      8 'fuel-type' IN ('gas', 'diesel')
      9 
     10 GROUP BY make, 'fuel-type'
     11 """)


Cell In[19], line 3, in q(query, col_list)
      1 def q(query, col_list = None):
----> 3     result = con.execute(query).fetchall()
      4     # print(result)
      6     return list2table(result, col_list)


BinderException: Binder Error: Referenced column "‘fuel" not found in FROM clause!
Candidate bindings: "automobile.fuel-type", "automobile.fuel-system"
LINE 3:     fuel-type,
            ^


Score: 20

Category: duckdb