Spaces:
Running
Running
| import streamlit as st | |
| import time | |
| import concurrent.futures | |
| import json | |
| from gensim.models import Word2Vec | |
| import pandas as pd | |
| import threading | |
| import matplotlib.pyplot as plt | |
| import squarify | |
| import numpy as np | |
| import re | |
| import urllib.request | |
| import random | |
| import plotly.express as px | |
| st.set_page_config( | |
| page_title="Abstractalytics", | |
| page_icon=":microscope:", | |
| layout="wide", #centered | |
| initial_sidebar_state="auto", | |
| menu_items={ | |
| 'About': "Abstractalytics is a Natural Language Processing (NLP) that harnesses Word2Vec to mine" | |
| " insight from pubmed abstracts. Created by Jimmie E. Fata, PhD" | |
| } | |
| ) | |
| # Define the HTML and CSS styles | |
| st.markdown(""" | |
| <style> | |
| [data-testid=stSidebar] { | |
| background-color: #99CCFF; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <style> | |
| body { | |
| background-color: #CCFFFF; | |
| # color: #ffffff; | |
| # font-size: 1px | |
| } | |
| .stApp { | |
| background-color: #CCFFFF; | |
| # color: #ffffff; | |
| # font-size: 1px | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.header(":red[*Abstractalytics*]") | |
| st.subheader("*A web app designed to explore :red[*PubMed abstracts*] for deeper understanding and fresh insights, driven " | |
| "by Natural Language Processing (NLP) techniques.*") | |
| def custom_subheader(text, identifier, font_size): | |
| st.markdown(f"<h3 id='{identifier}' style='font-size: {font_size}px;'>{text}</h3>", unsafe_allow_html=True) | |
| custom_subheader("Welcome to our innovative web2vec app designed to unlock the wealth of knowledge and insights hidden " | |
| "within PubMed abstracts! To begin, simply select a corpus that interests you. Next, enter a single keyword " | |
| "you wish to explore within the corpus. Abstractalytics powerful Natural Language " | |
| "Processing (NLP) algorithms will analyze the chosen corpus and present you with a list of top words, " | |
| "genes, drugs, phytochemicals, and compounds that are contextually and semantically related " | |
| "to your input. This advanced text-mining technique enables you to explore and understand complex " | |
| "relationships, uncovering new discoveries and connections in your field of research across a massive " | |
| "amount of abstracts. Dive in and enjoy the exploration! More oncology-related corpora comming soon.", "unique-id", 18) | |
| st.markdown("---") | |
| #Define the correct password | |
| # CORRECT_PASSWORD = "123" | |
| # Define a function to check if the password is correct | |
| # def authenticate(password): | |
| # if password == CORRECT_PASSWORD: | |
| # return True | |
| # else: | |
| # return False | |
| # | |
| # # Create a Streamlit input field for the password | |
| # password = st.text_input("Enter password:", type="password") | |
| # | |
| # # If the password is correct, show the app content | |
| # if authenticate(password): | |
| opt = st.sidebar.radio("Select a PubMed Corpus", | |
| options=( | |
| 'Breast Cancer corpus', 'Lung Cancer corpus')) | |
| # if opt == "Clotting corpus": | |
| # model_used = ("pubmed_model_clotting") | |
| # num_abstracts = 45493 | |
| # database_name = "Clotting" | |
| # if opt == "Neuroblastoma corpus": | |
| # model_used = ("pubmed_model_neuroblastoma") | |
| # num_abstracts = 29032 | |
| # database_name = "Neuroblastoma" | |
| if opt == "Breast Cancer corpus": | |
| model_used = ("pubmed_model_breast_cancer2") | |
| num_abstracts = 290320 | |
| database_name = "Breast_cancer" | |
| if opt == "Lung Cancer corpus": | |
| model_used = ("lung_cancer_pubmed_model") | |
| num_abstracts = 210320 | |
| database_name = "Lung_cancer" | |
| st.header(f":blue[{database_name} Pubmed corpus.]") | |
| text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus") | |
| query = text_input_value | |
| query = query.lower() | |
| query = re.sub("[,.?!&*;:]", "", query) | |
| query = re.sub(" ", "-", query) | |
| # matches = [" "] | |
| # if any([x in query for x in matches]): | |
| # st.write("Please only enter one term or a term without spaces") | |
| # # query = input ("Enter your keyword(s):") | |
| if query: | |
| bar = st.progress(0) | |
| time.sleep(.05) | |
| st.caption(f"Searching {num_abstracts} {database_name} PubMed abstracts covering 1990-2022") | |
| for i in range(10): | |
| bar.progress((i + 1) * 10) | |
| time.sleep(.1) | |
| # try: | |
| model = Word2Vec.load(f"{model_used}") # you can continue training with the loaded model! | |
| words = list(model.wv.key_to_index) | |
| X = model.wv[model.wv.key_to_index] | |
| # print(model.wv['bfgf']) | |
| model2 = model.wv[query] | |
| # print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None)) | |
| df = pd.DataFrame(X) | |
| def get_compound_ids(compound_names): | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| compound_ids = list(executor.map(get_compound_id, compound_names)) | |
| return compound_ids | |
| import requests | |
| def get_compound_id(compound_name): | |
| url = f"http://rest.kegg.jp/find/compound/{compound_name}" | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| result = response.text.split('\n') | |
| if result[0]: | |
| compound_id = result[0].split('\t')[0] | |
| return compound_id | |
| return None | |
| # except: | |
| # st.error("Term occurrence is too low - please try another term") | |
| # st.stop() | |
| st.markdown("---") | |
| table = model.wv.most_similar_cosmul(query, topn=10000) | |
| table = (pd.DataFrame(table)) | |
| table.index.name = 'Rank' | |
| table.columns = ['Word', 'SIMILARITY'] | |
| pd.set_option('display.max_rows', None) | |
| table2 = table.copy() | |
| # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize " | |
| # f"<span style='color:red; font-style: italic;'>words</span> contextually " | |
| # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # unsafe_allow_html=True) | |
| # Set the max number of words to display | |
| value_word = min(100, len(table2)) | |
| st.markdown( | |
| f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} " | |
| f"</span>words contextually and semantically similar to " | |
| f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. " | |
| f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>", | |
| unsafe_allow_html=True) | |
| short_table = table2.head(value_word).round(2) | |
| short_table.index += 1 | |
| short_table.index = (1 / short_table.index) * 10 | |
| sizes = short_table.index.tolist() | |
| short_table.set_index('Word', inplace=True) | |
| table2["SIMILARITY"] = 'Similarity Score ' + table2.head(value_word)["SIMILARITY"].round(2).astype(str) | |
| rank_num = list(short_table.index.tolist()) | |
| df = short_table | |
| try: | |
| df['text'] = short_table.index | |
| df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index] | |
| df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index] | |
| df.loc[:, 'database'] = database_name | |
| fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'], | |
| hover_name=(table2.head(value_word)['SIMILARITY'])) | |
| fig.update(layout_coloraxis_showscale=False) | |
| fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| fig.update_annotations(visible=False) | |
| fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| texttemplate="<br><span " | |
| "style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>" | |
| "<a href='%{customdata[0]}'>PubMed" | |
| "</a><br><br><a href='%{customdata[3]}'>Wikipedia" | |
| "</span></a>") | |
| fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"]) | |
| # st.pyplot(fig2) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # st.caption( | |
| # "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/") | |
| # st.caption("Gene designation add in exceptions [p21, p53, her2, her3]") | |
| csv = table2.head(value_word).to_csv().encode('utf-8') | |
| st.download_button(label=f"download top {value_word} words (csv)", data=csv, | |
| file_name=f'{database_name}_words.csv', mime='text/csv') | |
| except: | |
| st.warning( | |
| f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number") | |
| # st.markdown("---") | |
| # # st.write(short_table) | |
| # # | |
| # | |
| # # print() | |
| # # print("Human genes similar to " + str(query)) | |
| # df1 = table.copy() | |
| # df2 = pd.read_csv('Human Genes.csv') | |
| # m = df1.Word.isin(df2.symbol) | |
| # df1 = df1[m] | |
| # df1.rename(columns={'Word': 'Human Gene'}, inplace=True) | |
| # df1["Human Gene"] = df1["Human Gene"].str.upper() | |
| # # print(df1.head(50)) | |
| # # print() | |
| # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False) | |
| # # time.sleep(2) | |
| # # Create the slider with increments of 5 up to 100 | |
| # | |
| # # Set the maximum number of genes to display up to 100 | |
| # value_gene = min(len(df1), 100) | |
| # | |
| # if value_gene > 0: | |
| # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Treemap visualization of " | |
| # # f"<span style='color:red; font-style: italic;'>genes</span> contextually " | |
| # # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # # unsafe_allow_html=True) | |
| # | |
| # st.markdown( | |
| # f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} " | |
| # f"</span>genes contextually and semantically similar to " | |
| # f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. " | |
| # f"Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>", | |
| # unsafe_allow_html=True) | |
| # | |
| # df10 = df1.head(value_gene).copy() | |
| # df10.index = (1 / df10.index) * 100000 | |
| # sizes = df10.index.tolist() | |
| # df10.set_index('Human Gene', inplace=True) | |
| # | |
| # df3 = df1.copy() | |
| # df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value_gene)["SIMILARITY"].round(2).astype(str) | |
| # df3.reset_index(inplace=True) | |
| # df3 = df3.rename(columns={'Human Gene': 'symbol2'}) | |
| # # Use df.query to get a subset of df1 based on ids in df2 | |
| # subset = df3.head(value_gene).query('symbol2 in @df2.symbol2') | |
| # # Use merge to join the two DataFrames on id | |
| # result = pd.merge(subset, df2, on='symbol2') | |
| # # Show the result | |
| # # print(result) | |
| # # label = df10.index.tolist() | |
| # # df2 = df10 | |
| # # print(df2) | |
| # try: | |
| # # Define the `text` column for labels and `href` column for links | |
| # df10['text'] = df10.index | |
| # df10['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| # '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10['text']] | |
| # df10['href2'] = [f'https://www.genecards.org/cgi-bin/carddisp.pl?gene=' + c for c in df10['text']] | |
| # | |
| # df10['name'] = [c for c in result['Approved name']] | |
| # assert isinstance(df10, object) | |
| # df10.loc[:, 'database'] = database_name | |
| # | |
| # # print(df['name']) | |
| # | |
| # # Create the treemap using `px.treemap` | |
| # fig = px.treemap(df10, path=[df10['text']], values=sizes, | |
| # custom_data=['href', 'name', 'database', 'href2', 'text'], | |
| # hover_name=(df3.head(value_gene)['SIMILARITY'])) | |
| # | |
| # fig.update(layout_coloraxis_showscale=False) | |
| # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| # fig.update_annotations(visible=False) | |
| # fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| # hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| # texttemplate="<br><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}<br><br>" | |
| # "%{customdata[1]}<br><br>" | |
| # "<a href='%{customdata[0]}'>PubMed" | |
| # "</a><br><br><a href='%{customdata[3]}'>GeneCard" | |
| # "</span></a>") | |
| # fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"]) | |
| # # # display the treemap in Streamlit | |
| # # with treemap2: | |
| # | |
| # # st.pyplot(fig2) | |
| # st.plotly_chart(fig, use_container_width=True) | |
| # | |
| # st.caption( | |
| # "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/") | |
| # st.caption("Gene designation add in exceptions [p21, p53, her2, her3]") | |
| # st.caption("Gene information provided by GeneCards: https://www.genecards.org//") | |
| # | |
| # csv = df1.head(value_gene).to_csv().encode('utf-8') | |
| # st.download_button(label=f"download top {value_gene} genes (csv)", data=csv, | |
| # file_name=f'{database_name}_genes.csv', mime='text/csv') | |
| # | |
| # | |
| # except: | |
| # st.warning(f"No similar genes related to {query} within the {database_name} corpus were found.") | |
| st.markdown("---") | |
| df1 = table.copy() | |
| df2 = pd.read_csv('Human Genes.csv') | |
| m = df1.Word.isin(df2.symbol) | |
| df1 = df1[m] | |
| df1.rename(columns={'Word': 'Genes'}, inplace=True) | |
| df_len = len(df1) | |
| print(len(df1)) | |
| # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize " | |
| # f"<span style='color:red; font-style: italic;'>proteins</span> contextually " | |
| # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # unsafe_allow_html=True) | |
| # Set the number of proteins to display | |
| value_gene = min(df_len, 100) | |
| st.markdown( | |
| f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} " | |
| f"</span>human genes contextually and semantically similar to " | |
| f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name} </span>corpus. Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>", | |
| unsafe_allow_html=True) | |
| df11 = df1.head(value_gene).copy() | |
| df11.index = (1 / df11.index) * 10000 | |
| sizes = df11.index.tolist() | |
| df11.set_index('Genes', inplace=True) | |
| df4 = df1.copy() | |
| # print(df4.head(10)) | |
| df4["SIMILARITY"] = 'Similarity Score ' + df4.head(value_gene)["SIMILARITY"].round(2).astype(str) | |
| df4.reset_index(inplace=True) | |
| # df4 = df4.rename(columns={'Protein': 'symbol2'}) | |
| # print(df4) | |
| # # Use df.query to get a subset of df1 based on ids in df2 | |
| # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2') | |
| # # Use merge to join the two DataFrames on id | |
| # result = pd.merge(subset, df2b, on='symbol2') | |
| # print(result) | |
| if value_gene <= df_len: | |
| # Define the `text` column for labels and `href` column for links | |
| df11['text'] = df11.index | |
| df11['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df11['text']] | |
| df11['href2'] = [f'https://www.genecards.org/cgi-bin/carddisp.pl?gene=' + c for c in df11['text']] | |
| assert isinstance(df11, object) | |
| df11['database'] = database_name | |
| # df11['name'] = [c for c in result['Approved name']] | |
| # Create the treemap using `px.treemap` | |
| fig = px.treemap(df11, path=[df11['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'], | |
| hover_name=(df4.head(value_gene)['SIMILARITY'])) | |
| fig.update(layout_coloraxis_showscale=False) | |
| fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| fig.update_annotations(visible=False) | |
| fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>" | |
| "<a href='%{customdata[0]}'>PubMed" | |
| "</a><br><br><a href='%{customdata[2]}'>GeneCard" | |
| "</span></a>") | |
| fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightPink"]) | |
| # # display the treemap in Streamlit | |
| # with treemap2: | |
| # st.pyplot(fig2) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # st.caption( | |
| # "Gene designation and database provided by KEGG homo sapien gene list: https://rest.kegg.jp/list/hsa") | |
| # st.caption("Gene information provided by GeneCards: https://www.genecards.org//") | |
| st.caption("Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/") | |
| st.caption("Gene designation add in exceptions [p21, p53, her2, her3]") | |
| st.caption("Gene information provided by GeneCards: https://www.genecards.org//") | |
| csv = df1.head(value_gene).to_csv().encode('utf-8') | |
| st.download_button(label=f"download top {value_gene} genes (csv)", data=csv, | |
| file_name=f'{database_name}_genes.csv', mime='text/csv') | |
| else: | |
| st.warning( | |
| f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number") | |
| st.markdown("---") | |
| # print() | |
| # print("Human genes similar to " + str(query)) | |
| df1 = table.copy() | |
| df2 = pd.read_csv('kegg_drug_list_lowercase.csv') | |
| m = df1.Word.isin(df2.drugs) | |
| df1 = df1[m] | |
| df1.rename(columns={'Word': 'Drugs'}, inplace=True) | |
| df_len = len(df1) | |
| # print(len(df1)) | |
| # df1["Human Gene"] = df1["Human Gene"].str.upper() | |
| # print(df1.head(50)) | |
| # print() | |
| # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False) | |
| # time.sleep(2) | |
| # Create the slider with increments of 5 up to 100 | |
| # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100 | |
| value_drug = min(df1.shape[0], 100) | |
| # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize " | |
| # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually " | |
| # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # unsafe_allow_html=True) | |
| st.markdown( | |
| f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_drug} " | |
| f"</span>Drugs contextually and semantically similar to " | |
| f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>", | |
| unsafe_allow_html=True) | |
| df13 = df1.head(value_drug).copy() | |
| df13.index = (1 / df13.index) * 10000 | |
| sizes = df13.index.tolist() | |
| df13.set_index('Drugs', inplace=True) | |
| df6 = df1.copy() | |
| # print(df4.head(10)) | |
| df6["SIMILARITY"] = 'Similarity Score ' + df6.head(value_drug)["SIMILARITY"].round(2).astype(str) | |
| df6.reset_index(inplace=True) | |
| # df4 = df4.rename(columns={'Protein': 'symbol2'}) | |
| # print(df4) | |
| # # Use df.query to get a subset of df1 based on ids in df2 | |
| # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2') | |
| # # Use merge to join the two DataFrames on id | |
| # result = pd.merge(subset, df2b, on='symbol2') | |
| # print(result) | |
| if value_drug <= df_len: | |
| # Define the `text` column for labels and `href` column for links | |
| # Reset the index | |
| df13.reset_index(inplace=True) | |
| # Replace hyphens with spaces in the 'text' column | |
| df13['Drugs'] = df13['Drugs'].str.replace('-', ' ') | |
| # Set the 'text' column back as the index | |
| df13.set_index('Drugs', inplace=True) | |
| df13['text'] = df13.index | |
| df13['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df13['text']] | |
| df13['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df13['text']] | |
| assert isinstance(df13, object) | |
| df13['database'] = database_name | |
| # df11['name'] = [c for c in result['Approved name']] | |
| # Create the treemap using `px.treemap` | |
| fig = px.treemap(df13, path=[df13['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'], | |
| hover_name=(df6.head(value_drug)['SIMILARITY'])) | |
| fig.update(layout_coloraxis_showscale=False) | |
| fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| fig.update_annotations(visible=False) | |
| fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>" | |
| "<a href='%{customdata[0]}'>PubMed" | |
| "</a><br><br><a href='%{customdata[2]}'>Wikipedia" | |
| "</span></a>") | |
| fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["Thistle"]) | |
| # # display the treemap in Streamlit | |
| # with treemap2: | |
| # st.pyplot(fig2) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.caption( | |
| "Drug designation and database provided by KEGG: https://www.kegg.jp/kegg/drug/") | |
| csv = df1.head(value_drug).to_csv().encode('utf-8') | |
| st.download_button(label=f"download top {value_drug} drugs (csv)", data=csv, | |
| file_name=f'{database_name}_drugs.csv', mime='text/csv') | |
| else: | |
| st.warning( | |
| f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number") | |
| st.markdown("---") | |
| # | |
| # st.markdown("---") | |
| # # print() | |
| # # print("Human genes similar to " + str(query)) | |
| # df1 = table.copy() | |
| # df2 = pd.read_csv('diseasesKegg.csv') | |
| # m = df1.Word.isin(df2.disease) | |
| # df1 = df1[m] | |
| # df1.rename(columns={'Word': 'Disease'}, inplace=True) | |
| # df_len = len(df1) | |
| # # print(len(df1)) | |
| # # df1["Human Gene"] = df1["Human Gene"].str.upper() | |
| # # print(df1.head(50)) | |
| # # print() | |
| # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False) | |
| # # time.sleep(2) | |
| # # Create the slider with increments of 5 up to 100 | |
| # | |
| # # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100 | |
| # value_disease = min(df1.shape[0], 100) | |
| # | |
| # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize " | |
| # # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually " | |
| # # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # # unsafe_allow_html=True) | |
| # | |
| # st.markdown( | |
| # f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_disease} " | |
| # f"</span>Diseases contextually and semantically similar to " | |
| # f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>", | |
| # unsafe_allow_html=True) | |
| # | |
| # df14 = df1.head(value_disease).copy() | |
| # | |
| # df14.index = (1 / df14.index) * 10000 | |
| # sizes = df14.index.tolist() | |
| # | |
| # df14.set_index('Disease', inplace=True) | |
| # | |
| # df7 = df1.copy() | |
| # # print(df4.head(10)) | |
| # df7["SIMILARITY"] = 'Similarity Score ' + df7.head(value_disease)["SIMILARITY"].round(2).astype(str) | |
| # df7.reset_index(inplace=True) | |
| # # df4 = df4.rename(columns={'Protein': 'symbol2'}) | |
| # # print(df4) | |
| # # # Use df.query to get a subset of df1 based on ids in df2 | |
| # # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2') | |
| # # # Use merge to join the two DataFrames on id | |
| # # result = pd.merge(subset, df2b, on='symbol2') | |
| # # print(result) | |
| # if value_disease <= df_len: | |
| # # Define the `text` column for labels and `href` column for links | |
| # # Reset the index | |
| # df14.reset_index(inplace=True) | |
| # | |
| # # Replace hyphens with spaces in the 'text' column | |
| # df14['Disease'] = df14['Disease'].str.replace('-', ' ') | |
| # | |
| # # Set the 'text' column back as the index | |
| # df14.set_index('Disease', inplace=True) | |
| # df14['text'] = df14.index | |
| # df14['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| # '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df14['text']] | |
| # df14['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df14['text']] | |
| # assert isinstance(df14, object) | |
| # df14['database'] = database_name | |
| # | |
| # # df11['name'] = [c for c in result['Approved name']] | |
| # | |
| # # Create the treemap using `px.treemap` | |
| # fig = px.treemap(df14, path=[df14['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'], | |
| # hover_name=(df7.head(value_disease)['SIMILARITY'])) | |
| # | |
| # fig.update(layout_coloraxis_showscale=False) | |
| # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| # fig.update_annotations(visible=False) | |
| # fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| # hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| # texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>" | |
| # "<a href='%{customdata[0]}'>PubMed" | |
| # "</a><br><br><a href='%{customdata[2]}'>Wikipedia" | |
| # "</span></a>") | |
| # fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["PaleGoldenRod"]) | |
| # # # display the treemap in Streamlit | |
| # # with treemap2: | |
| # | |
| # # st.pyplot(fig2) | |
| # st.plotly_chart(fig, use_container_width=True) | |
| # | |
| # st.caption("Disease designation and database provided by KEGG: https://www.genome.jp/kegg/disease/") | |
| # | |
| # csv = df1.head(value_disease).to_csv().encode('utf-8') | |
| # st.download_button(label=f"download top {value_disease} diseases (csv)", data=csv, | |
| # file_name=f'{database_name}_disease.csv', mime='text/csv') | |
| # | |
| # | |
| # else: | |
| # st.warning( | |
| # f"This selection exceeds the number of similar diseases related to {query} within the {database_name} corpus, please choose a lower number") | |
| # st.markdown("---") | |
| # st.markdown("---") | |
| # # print() | |
| # # print("Human genes similar to " + str(query)) | |
| # df1 = table.copy() | |
| # df2 = pd.read_csv('pathwaysKegg.csv') | |
| # m = df1.Word.isin(df2.pathway) | |
| # df1 = df1[m] | |
| # df1.rename(columns={'Word': 'Pathway'}, inplace=True) | |
| # df_len = len(df1) | |
| # # print(len(df1)) | |
| # # df1["Human Gene"] = df1["Human Gene"].str.upper() | |
| # # print(df1.head(50)) | |
| # # print() | |
| # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False) | |
| # # time.sleep(2) | |
| # # Create the slider with increments of 5 up to 100 | |
| # | |
| # # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100 | |
| # value_pathway = min(df1.shape[0], 100) | |
| # | |
| # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize " | |
| # # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually " | |
| # # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # # unsafe_allow_html=True) | |
| # | |
| # st.markdown( | |
| # f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_pathway} " | |
| # f"</span>Pathways contextually and semantically similar to " | |
| # f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>", | |
| # unsafe_allow_html=True) | |
| # | |
| # df16 = df1.head(value_pathway).copy() | |
| # | |
| # df16.index = (1 / df16.index) * 10000 | |
| # sizes = df16.index.tolist() | |
| # | |
| # df16.set_index('Pathway', inplace=True) | |
| # | |
| # df9 = df1.copy() | |
| # # print(df4.head(10)) | |
| # df9["SIMILARITY"] = 'Similarity Score ' + df9.head(value_pathway)["SIMILARITY"].round(2).astype(str) | |
| # df9.reset_index(inplace=True) | |
| # # df4 = df4.rename(columns={'Protein': 'symbol2'}) | |
| # # print(df4) | |
| # # # Use df.query to get a subset of df1 based on ids in df2 | |
| # # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2') | |
| # # # Use merge to join the two DataFrames on id | |
| # # result = pd.merge(subset, df2b, on='symbol2') | |
| # # print(result) | |
| # if value_pathway <= df_len: | |
| # # Define the `text` column for labels and `href` column for links | |
| # # Reset the index | |
| # df16.reset_index(inplace=True) | |
| # | |
| # # Replace hyphens with spaces in the 'text' column | |
| # df16['Pathway'] = df16['Pathway'].str.replace('-', ' ') | |
| # | |
| # # Set the 'text' column back as the index | |
| # df16.set_index('Pathway', inplace=True) | |
| # df16['text'] = df16.index | |
| # df16['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| # '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df16['text']] | |
| # df16['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df16['text']] | |
| # assert isinstance(df16, object) | |
| # df16['database'] = database_name | |
| # | |
| # # df11['name'] = [c for c in result['Approved name']] | |
| # | |
| # # Create the treemap using `px.treemap` | |
| # fig = px.treemap(df16, path=[df16['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'], | |
| # hover_name=(df9.head(value_pathway)['SIMILARITY'])) | |
| # | |
| # fig.update(layout_coloraxis_showscale=False) | |
| # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| # fig.update_annotations(visible=False) | |
| # fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| # hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| # texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>" | |
| # "<a href='%{customdata[0]}'>PubMed" | |
| # "</a><br><br><a href='%{customdata[2]}'>Wikipedia" | |
| # "</span></a>") | |
| # fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["FloralWhite"]) | |
| # # # display the treemap in Streamlit | |
| # # with treemap2: | |
| # | |
| # # st.pyplot(fig2) | |
| # st.plotly_chart(fig, use_container_width=True) | |
| # | |
| # st.caption("Pathway designation and database provided by KEGG: https://www.genome.jp/kegg/pathway.html") | |
| # | |
| # csv = df1.head(value_pathway).to_csv().encode('utf-8') | |
| # st.download_button(label=f"download top {value_pathway} pathways (csv)", data=csv, | |
| # file_name=f'{database_name}_pathways.csv', mime='text/csv') | |
| # | |
| # | |
| # else: | |
| # st.warning( | |
| # f"This selection exceeds the number of similar pathways related to {query} within the {database_name} corpus, please choose a lower number") | |
| # st.markdown("---") | |
| st.markdown("---") | |
| # print() | |
| # print("Human genes similar to " + str(query)) | |
| df1 = table.copy() | |
| df2 = pd.read_csv('phytochemicals.csv') | |
| m = df1.Word.isin(df2.phyto) | |
| df1 = df1[m] | |
| df1.rename(columns={'Word': 'Phytochemical'}, inplace=True) | |
| df_len = len(df1) | |
| # print(len(df1)) | |
| # df1["Human Gene"] = df1["Human Gene"].str.upper() | |
| # print(df1.head(50)) | |
| # print() | |
| # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False) | |
| # time.sleep(2) | |
| # Create the slider with increments of 5 up to 100 | |
| # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100 | |
| value_phyto = min(df1.shape[0], 100) | |
| # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize " | |
| # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually " | |
| # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # unsafe_allow_html=True) | |
| st.markdown( | |
| f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_phyto} " | |
| f"</span>Phytochemicals contextually and semantically similar to " | |
| f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. " | |
| f"Click on the squares to expand and also the Pubmed and Wikipedia links for more compound information</span></p></b>", | |
| unsafe_allow_html=True) | |
| df15 = df1.head(value_phyto).copy() | |
| df15.index = (1 / df15.index) * 10000 | |
| sizes = df15.index.tolist() | |
| df15.set_index('Phytochemical', inplace=True) | |
| df8 = df1.copy() | |
| # print(df4.head(10)) | |
| df8["SIMILARITY"] = 'Similarity Score ' + df8.head(value_phyto)["SIMILARITY"].round(2).astype(str) | |
| df8.reset_index(inplace=True) | |
| # df4 = df4.rename(columns={'Protein': 'symbol2'}) | |
| # print(df4) | |
| # # Use df.query to get a subset of df1 based on ids in df2 | |
| # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2') | |
| # # Use merge to join the two DataFrames on id | |
| # result = pd.merge(subset, df2b, on='symbol2') | |
| # print(result) | |
| if value_phyto <= df_len: | |
| # Define the `text` column for labels and `href` column for links | |
| # Reset the index | |
| df15.reset_index(inplace=True) | |
| # Replace hyphens with spaces in the 'text' column | |
| df15['Phytochemical'] = df15['Phytochemical'].str.replace('-', ' ') | |
| # Set the 'text' column back as the index | |
| df15.set_index('Phytochemical', inplace=True) | |
| df15['text'] = df15.index | |
| df15['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df15['text']] | |
| df15['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df15['text']] | |
| assert isinstance(df15, object) | |
| df15['database'] = database_name | |
| # df11['name'] = [c for c in result['Approved name']] | |
| # Create the treemap using `px.treemap` | |
| fig = px.treemap(df15, path=[df15['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'], | |
| hover_name=(df8.head(value_phyto)['SIMILARITY'])) | |
| fig.update(layout_coloraxis_showscale=False) | |
| fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| fig.update_annotations(visible=False) | |
| fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>" | |
| "<a href='%{customdata[0]}'>PubMed" | |
| "</a><br><br><a href='%{customdata[2]}'>Wikipedia" | |
| "</span></a>") | |
| fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightSeaGreen"]) | |
| # # display the treemap in Streamlit | |
| # with treemap2: | |
| # st.pyplot(fig2) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.caption("Phytochemical designation and database provided by PhytoHub: https://phytohub.eu/") | |
| csv = df1.head(value_phyto).to_csv().encode('utf-8') | |
| st.download_button(label=f"download top {value_phyto} phytochemicals (csv)", data=csv, | |
| file_name=f'{database_name}_phytochemicals.csv', mime='text/csv') | |
| else: | |
| st.warning( | |
| f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number") | |
| st.markdown("---") | |
| # print() | |
| # print("Human genes similar to " + str(query)) | |
| df1 = table.copy() | |
| df2 = pd.read_csv('kegg_compounds_lowercase.csv') | |
| m = df1.Word.isin(df2.compound) | |
| df1 = df1[m] | |
| df1.rename(columns={'Word': 'Compounds'}, inplace=True) | |
| df_len = len(df1) | |
| # df1["Human Gene"] = df1["Human Gene"].str.upper() | |
| # print(df1.head(50)) | |
| # print() | |
| # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False) | |
| # time.sleep(2) | |
| # Create the slider with increments of 5 up to 100 | |
| # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100 | |
| value_compound = min(df1.shape[0], 100) | |
| # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize " | |
| # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually " | |
| # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> " | |
| # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>", | |
| # unsafe_allow_html=True) | |
| st.markdown( | |
| f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_compound} " | |
| f"</span>Compounds contextually and semantically similar to " | |
| f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. " | |
| f"Click on the squares to expand and the Pubmed, Wikipedia, and KEGG links for more compound information (may take time to load)</span></p></b>", | |
| unsafe_allow_html=True) | |
| df12 = df1.head(value_compound).copy() | |
| df12.index = (1 / df12.index) * 10000 | |
| sizes = df12.index.tolist() | |
| df12.set_index('Compounds', inplace=True) | |
| df5 = df1.copy() | |
| # print(df4.head(10)) | |
| df5["SIMILARITY"] = 'Similarity Score ' + df5.head(value_compound)["SIMILARITY"].round(2).astype(str) | |
| df5.reset_index(inplace=True) | |
| # df4 = df4.rename(columns={'Protein': 'symbol2'}) | |
| # print(df4) | |
| # # Use df.query to get a subset of df1 based on ids in df2 | |
| # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2') | |
| # # Use merge to join the two DataFrames on id | |
| # result = pd.merge(subset, df2b, on='symbol2') | |
| # print(result) | |
| if value_compound <= df_len: | |
| # Define the `text` column for labels and `href` column for links | |
| # Reset the index | |
| df12.reset_index(inplace=True) | |
| # Replace hyphens with spaces in the 'text' column | |
| df12['Compounds'] = df12['Compounds'].str.replace('-', ' ') | |
| # Set the 'text' column back as the index | |
| df12.set_index('Compounds', inplace=True) | |
| df12['text'] = df12.index | |
| df12['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \ | |
| '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df12['text']] | |
| df12['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df12['text']] | |
| df12['href3'] = [f'https://www.genome.jp/entry/{compound_id}' for compound_id in get_compound_ids(df12['text'])] | |
| assert isinstance(df12, object) | |
| df12['database'] = database_name | |
| # df11['name'] = [c for c in result['Approved name']] | |
| # Create the treemap using `px.treemap` | |
| fig = px.treemap(df12, path=[df12['text']], values=sizes, | |
| custom_data=['href', 'database', 'href2', 'text', 'href3'], | |
| hover_name=(df5.head(value_compound)['SIMILARITY'])) | |
| fig.update(layout_coloraxis_showscale=False) | |
| fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0)) | |
| fig.update_annotations(visible=False) | |
| fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None, | |
| hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", | |
| texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>" | |
| "<a href='%{customdata[0]}'>PubMed" | |
| "</a><br><br><a href='%{customdata[2]}'>Wikipedia" | |
| "</a><br><br><a href='%{customdata[4]}'>KEGG Compound Page" | |
| "</span></a>") | |
| fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightYellow"]) | |
| # # display the treemap in Streamlit | |
| # with treemap2: | |
| # st.pyplot(fig2) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.caption("Compound designation and database provided by KEGG: https://www.kegg.jp/kegg/compound/") | |
| csv = df1.head(value_compound).to_csv().encode('utf-8') | |
| st.download_button(label=f"download top {value_compound} compounds (csv)", data=csv, | |
| file_name=f'{database_name}_compounds.csv', mime='text/csv') | |
| else: | |
| st.warning( | |
| f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number") | |
| st.markdown("---") | |
| def save_comment(comment): | |
| with open('comments.txt', 'a') as f: | |
| f.write(f'{comment}\n') | |
| def save_comment_threaded(comment): | |
| t = threading.Thread(target=save_comment, args=(comment,)) | |
| t.start() | |
| st.title("Abstractalytics Web App") | |
| st.write("We appreciate your feedback!") | |
| user_comment = st.text_area("Please send us your anonymous remarks/suggestions about the Abstractalytics Web App: " | |
| "(app will pause while we save your comments)") | |
| if st.button("Submit"): | |
| if user_comment: | |
| save_comment_threaded(user_comment) | |
| st.success("Your comment has been saved. Thank you for your feedback!") | |
| else: | |
| st.warning("Please enter a comment before submitting.") | |
| st.markdown("---") | |
| st.subheader("Cancer-related videos") | |
| if query: | |
| idlist = [] | |
| search_keyword = {query} | |
| html = urllib.request.urlopen("https://www.youtube.com/@NCIgov/search?query=cancer") | |
| html2 = urllib.request.urlopen("https://www.youtube.com/@CancerCenter/search?query=cancer") | |
| html3 = urllib.request.urlopen("https://www.youtube.com/@NorthwesternMedicine/search?query=cancer") | |
| html4 = urllib.request.urlopen("https://www.youtube.com/@TEDEd/search?query=cancer") | |
| html5 = urllib.request.urlopen("https://www.youtube.com/@CancerResearchUK/search?query=cancer") | |
| video_ids = re.findall(r"watch\?v=(\S{11})", html.read().decode()) | |
| video_ids2 = re.findall(r"watch\?v=(\S{11})", html2.read().decode()) | |
| video_ids3 = re.findall(r"watch\?v=(\S{11})", html3.read().decode()) | |
| video_ids4 = re.findall(r"watch\?v=(\S{11})", html4.read().decode()) | |
| video_ids5 = re.findall(r"watch\?v=(\S{11})", html5.read().decode()) | |
| for i in video_ids2: | |
| video_ids.append(i) | |
| for i in video_ids3: | |
| video_ids.append(i) | |
| for i in video_ids4: | |
| video_ids.append(i) | |
| for i in video_ids5: | |
| video_ids.append(i) | |
| random.shuffle(video_ids) | |
| c1, c2, c3 = st.columns(3) | |
| with c1: | |
| st.video("https://www.youtube.com/watch?v=" + video_ids[0]) | |
| with c2: | |
| st.video("https://www.youtube.com/watch?v=" + video_ids[1]) | |
| with c3: | |
| st.video("https://www.youtube.com/watch?v=" + video_ids[2]) | |
| st.markdown("---") | |
| # else: | |
| # st.error("The password you entered is incorrect.") | |