Cutom-Parser-2

Fri 14 November 2025

!python --version

Python 3.12.4

from constants import OPENAI_API_KEY

!pip show langchain-openai | grep "Version:"

Version: 0.2.9

import os

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# https://python.langchain.com/docs/how_to/functions/

from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini")

from langchain_core.runnables import RunnableLambda
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from typing import Iterator, List

prompt = ChatPromptTemplate.from_template(
    "Write a comma-separated list of 5 animals similar to: {animal}. Do not include numbers"
)

str_chain = prompt | model | StrOutputParser()

for chunk in str_chain.stream({"animal": "bear"}):
    print(chunk, end="", flush=True)

wolf, fox, cougar, lynx, badger

# This is a custom parser that splits an iterator of llm tokens
# into a list of strings separated by commas
def split_into_list(input: Iterator[str]) -> Iterator[List[str]]:
    # hold partial input until we get a comma
    buffer = ""
    for chunk in input:
        # add current chunk to buffer
        buffer += chunk
        # while there are commas in the buffer
        while "," in buffer:
            # split buffer on comma
            comma_index = buffer.index(",")
            # yield everything before the comma
            yield [buffer[:comma_index].strip()]
            # save the rest for the next iteration
            buffer = buffer[comma_index + 1 :]
    # yield the last chunk
    yield [buffer.strip()]


list_chain = str_chain | split_into_list

for chunk in list_chain.stream({"animal": "bear"}):
    print(chunk, flush=True)

['wolf']
['cougar']
['lynx']
['bison']
['moose']

list_chain.invoke({"animal": "bear"})

['wolf', 'cougar', 'bison', 'moose', 'elk']

Score: 10

Category: langchain