diff --git a/__pycache__/gather.cpython-311.pyc b/__pycache__/gather.cpython-311.pyc new file mode 100644 index 0000000..8473cc1 Binary files /dev/null and b/__pycache__/gather.cpython-311.pyc differ diff --git a/gather.py b/gather.py new file mode 100644 index 0000000..681b50f --- /dev/null +++ b/gather.py @@ -0,0 +1,9 @@ +import requests +from bs4 import BeautifulSoup + +def gather(keyword): + url = "https://en.wikipedia.org/wiki/" + keyword + response = requests.get(url) + soup = BeautifulSoup(response.content, "html.parser") + details = soup.find_all('table', {'class': 'infobox'}) + return details, soup('p')[:2] \ No newline at end of file diff --git a/infohunt.py b/infohunt.py new file mode 100644 index 0000000..562cf92 --- /dev/null +++ b/infohunt.py @@ -0,0 +1,32 @@ +import streamlit as st +from gather import gather + +def main(): + st.markdown( + '
', + unsafe_allow_html=True + ) + st.markdown('

InFo_hUnT

', unsafe_allow_html=True) + input_data = st.text_input("Enter your word") + if input_data: + keyword = "_".join(input_data.strip().split()) + details, paragraphs = gather(keyword) + + if details: + st.write("### Structured Data '{}'".format(input_data)) + for detail in details: + rows = detail.find_all('tr') + for row in rows: + heading = row.find('th') + detail = row.find('td') + if heading and detail: + st.write("**{}:** {}".format(heading.text.strip(), detail.text.strip())) + else: + st.write(" Stuctured data not yet available for '{}'".format(input_data)) + + st.write("### Summary for '{}'".format(input_data)) + for paragraph in paragraphs: + st.write(paragraph.text.strip()) + +if __name__ == "__main__": + main() diff --git a/main.py b/main.py deleted file mode 100644 index aa6939c..0000000 --- a/main.py +++ /dev/null @@ -1,32 +0,0 @@ -from bs4 import BeautifulSoup -import requests -import string -import streamlit - -input_data= input("search: ") -u_i= string.capwords(input_data) -lists= u_i.split() -word= "_".join(lists) - -url= "https://en.wikipedia.org/wiki/"+word - -def wikifind(url): - url_open= requests.get(url) - soup= BeautifulSoup(url_open.content, "html.parser") - details= soup('table', {'class': 'infobox'}) - - for i in details: - h= i.find_all('tr') - for j in h: - heading= j.find_all('th') - detail= j.find_all('td') - if heading is not None and detail is not None: - for x,y in zip(heading, detail): - print("{} :: {}".format(x.text, y.text)) - print("...................") - - for i in range(1,3): - print(soup('p')[i].text) - -wikifind(url) - diff --git a/template/search.gif b/template/search.gif new file mode 100644 index 0000000..bc7b873 Binary files /dev/null and b/template/search.gif differ diff --git a/test.py b/test.py deleted file mode 100644 index 26cc546..0000000 --- a/test.py +++ /dev/null @@ -1,37 +0,0 @@ -import streamlit as st -import requests -from bs4 import BeautifulSoup -import string - -def get_wikipedia_data(keyword): - url = "https://en.wikipedia.org/wiki/" + keyword - response = requests.get(url) - soup = BeautifulSoup(response.content, "html.parser") - infobox = soup.find('table', {'class': 'infobox'}) - if infobox: - details = {} - rows = infobox.find_all('tr') - for row in rows: - heading = row.find('th') - detail = row.find('td') - if heading and detail: - details[heading.text.strip()] = detail.text.strip() - return details - else: - return None - -def main(): - st.title("Know more about your word") - input_data = st.text_input("Enter your word:") - if input_data: - keyword = "_".join(input_data.split()).capitalize() - wiki_data = get_wikipedia_data(keyword) - if wiki_data: - st.write("### {}".format(keyword)) - for key, value in wiki_data.items(): - st.write("**{}:** {}".format(key, value)) - else: - st.write("No Wikipedia infobox found for {}".format(keyword)) - -if __name__ == "__main__": - main()