2024-05-08 Pulumi Webinar
A solution to limited context windows
and that's tricky
Turning words into numbers
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("What's up?")
print(response)
part of LlamaCloud
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader
parser = LlamaParse(
result_type="markdown"
)
file_extractor = {".pdf": parser}
reader = SimpleDirectoryReader(
"./data",
file_extractor=file_extractor
)
documents = reader.load_data()
...plus everything on Hugging Face!
query_engine = index.as_query_engine(
similarity_top_k=2,
node_postprocessors=[
MetadataReplacementPostProcessor(target_metadata_key="window")
],
)
response = query_engine.query(
"What happened on August 3rd?"
)
print(response)
query_engine = index.as_query_engine(
filters=MetadataFilters(
filters=[ExactMatchFilter(key="year", value="2021")]
)
)
response = query_engine.query(
"What was the annual profit in 2021?"
)
print(response)
vector_store_info = VectorStoreInfo(
content_info="Brief summary of a movie",
metadata_info=[
MetadataInfo(
name="year",
description="The year the movie was released",
type="integer",
),
MetadataInfo(
name="director",
description="The name of the movie director",
type="string",
),
],
)
retriever = VectorIndexAutoRetriever(
index, vector_store_info=vector_store_info
)
query_engine = index.as_query_engine(
vector_store_query_mode="hybrid",
similarity_top_k=2,
alpha=0.5
)
response = query_engine.query(
"What did the author do growing up?",
)
# connect to database
engine = create_engine("sqlite:///:memory:")
sql_database = SQLDatabase(
engine,
include_tables=["city_stats"]
)
# create SQL query engine
query_engine = NLSQLTableQueryEngine(
sql_database=sql_database,
tables=["city_stats"],
)
query_str = "Which city has the highest population?"
response = query_engine.query(query_str)
table_node_mapping = SQLTableNodeMapping(sql_database)
table_schema_objs = [
(SQLTableSchema(table_name="city_stats"))
]
obj_index = ObjectIndex.from_objects(
table_schema_objs,
table_node_mapping,
VectorStoreIndex,
)
query_engine = SQLTableRetrieverQueryEngine(
sql_database, obj_index.as_retriever(similarity_top_k=1)
)
city_stats_text = (
"This table gives information regarding the population and country of a"
" given city. The user will query with codewords, where 'foo' corresponds"
" to population and 'bar'corresponds to city."
)
table_node_mapping = SQLTableNodeMapping(sql_database)
table_schema_objs = [
(SQLTableSchema(table_name="city_stats", context_str=city_stats_text))
]
documents = SimpleDirectoryReader("2020").load_data()
index2020 = VectorStoreIndex.from_documents(documents)
query_engine_2020 = index2020.as_query_engine()
documents = SimpleDirectoryReader("2021").load_data()
index2021 = VectorStoreIndex.from_documents(documents)
query_engine_2021 = index2021.as_query_engine()
documents = SimpleDirectoryReader("2022").load_data()
index2022 = VectorStoreIndex.from_documents(documents)
query_engine_2022 = index2022.as_query_engine()
query_engine_tools = [
QueryEngineTool(
query_engine=query_engine_2020,
metadata=ToolMetadata(
name="2020_facts_tool",
description=(
"Contains facts about filings "
"about the company from the year 2020"
),
),
),
# ... etc ...
]
function_llm = OpenAI(model="gpt-4")
agent = OpenAIAgent.from_tools(
query_engine_tools,
llm=function_llm,
system_prompt=f"""\
You are a specialized agent designed to answer queries about financial filings.
You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
""",
)
– Shawn "swyx" Wang, Latent.Space podcast
Follow me on Twitter: @seldo
Python:
TypeScript: