-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfin_data_qa.py
140 lines (120 loc) · 4.36 KB
/
fin_data_qa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# {{{ imports
import logging
import sys
from IPython.display import Markdown, display
import os
from dotenv import load_dotenv
import pandas as pd
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.llms.openai import OpenAI
from llama_index.core.query_pipeline import (
QueryPipeline as QP, Link, InputComponent
)
from llama_index.experimental.query_engine.pandas import (
PandasInstructionParser,
)
from llama_index.core import PromptTemplate
from pyvis.network import Network
load_dotenv()
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# }}}
# {{{ env variables
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
api_key = os.getenv('AZURE_OPENAI_API_KEY')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# }}}
# {{{ initialize llm
azure_llm = AzureOpenAI(
model = 'gpt-35-turbo-16k',
deployment_name = 'RG210-openai-35turbo',
api_key = api_key,
azure_endpoint = AZURE_OPENAI_ENDPOINT,
# api_version = ,
temperature = 0.1
)
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
# }}}
# {{{ load data
cust_data = pd.read_csv(r'./artifacts/cat.csv')
user_data = pd.read_csv(r'./artifacts/user_info.csv')
user_data_dict = user_data.to_dict(orient='records')
# }}}
# query_engine = PandasQueryEngine(df=cust_data, verbose=True)
# response = query_engine.query('what is the data about')
# {{{ prompts
instruction_str = (
"1. Convert the query to executable Python code using Pandas.\n"
"2. The final line of code should be a Python expression that can be called with the `eval()` function.\n"
"3. The code should represent a solution to the query.\n"
"4. PRINT ONLY THE EXPRESSION.\n"
"5. Do not quote the expression.\n"
)
pandas_prompt_str = (
"You are working with a pandas dataframe in Python.\n"
"The name of the dataframe is `df`.\n"
"This is the result of `print(df.head())`:\n"
"{df_str}\n\n"
"Follow these instructions:\n"
"{instruction_str}\n"
"Query: {query_str}\n\n"
"Expression:"
)
# user_data_response_synthesis_prompt_str = (
# "You are a skilled financial advisor, expert at deriving financial insights from user information, risk capacity and other factors.\n"
# ""
# )
response_synthesis_prompt_str = (
"You are a highly experienced Indian financial advisor, skilled are analysing customer financial data and recommending informative insights for investment.\n"
"This is the customer information:\n"
"{user_data_dict}"
"Given an input question, synthesize a response from the query results.\n"
"Query: {query_str}\n\n"
"Pandas Instructions (optional):\n{pandas_instructions}\n\n"
"Pandas Output: {pandas_output}\n\n"
"Response: "
)
pandas_prompt = PromptTemplate(pandas_prompt_str).partial_format(
instruction_str=instruction_str, df_str=cust_data.head(5),
)
pandas_prompt_user_data = PromptTemplate(pandas_prompt_str).partial_format(
instruction_str=instruction_str, df_str=user_data.head(5)
)
pandas_output_parser = PandasInstructionParser(cust_data)
# pandas_output_parser_user_data = PandasInstructionParser(user_data)
response_synthesis_prompt = PromptTemplate(response_synthesis_prompt_str).partial_format(user_data_dict=user_data_dict)
# }}}
# {{{ query pipeline
qp = QP(
modules={
"input": InputComponent(),
"pandas_prompt": pandas_prompt,
# "pandas_prompt2": pandas_prompt_user_data,
"llm1": llm,
"pandas_output_parser": pandas_output_parser,
"response_synthesis_prompt": response_synthesis_prompt,
"llm2": llm,
},
verbose=True,
)
qp.add_chain(["input", "pandas_prompt", "llm1", "pandas_output_parser"])
qp.add_links(
[
Link("input", "response_synthesis_prompt", dest_key="query_str"),
Link("llm1", "response_synthesis_prompt", dest_key="pandas_instructions"),
Link("pandas_output_parser", "response_synthesis_prompt", dest_key="pandas_output",),
]
)
qp.add_link("response_synthesis_prompt", "llm2")
# }}}
# {{{ visualize dag
net = Network(notebook=True, cdn_resources='in_line', directed=True)
net.from_nx(qp.dag)
net.save_graph('query_pipeline_dag.html')
# }}}
# {{{ run pipeline
def analyze_financial_data(query):
response = qp.run(query_str = query)
return response.message.content
# }}}