Spaces:
Running
Running
| import streamlit as st | |
| import time | |
| import json | |
| from gensim.models import Word2Vec | |
| import pandas as pd | |
| from datasets import load_dataset | |
| from datasets import Dataset | |
| # Define the HTML and CSS styles | |
| html_temp = """ | |
| <div style="background-color:black;padding:10px"> | |
| <h1 style="color:white;text-align:center;">My Streamlit App with HTML and CSS</h1> | |
| </div> | |
| """ | |
| # Display the HTML and CSS styles | |
| st.markdown(html_temp, unsafe_allow_html=True) | |
| # Add some text to the app | |
| st.write("This is my Streamlit app with HTML and CSS formatting.") | |
| query = st.text_input("Enter a word") | |
| # query = input ("Enter your keyword(s):") | |
| query = query.lower() | |
| if query: | |
| model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model! | |
| words = list(model.wv.key_to_index) | |
| X = model.wv[model.wv.key_to_index] | |
| model2 = model.wv[query] | |
| df = pd.DataFrame(X) | |
| # def findRelationships(query, df): | |
| table = model.wv.most_similar_cosmul(query, topn=10000) | |
| table = (pd.DataFrame(table)) | |
| table.index.name = 'Rank' | |
| table.columns = ['Word', 'SIMILARITY'] | |
| print() | |
| print("Similarity to " + str(query)) | |
| pd.set_option('display.max_rows', None) | |
| csv = table.head(50).to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label=f"Download words similar to {query} in .csv format", | |
| data=csv, | |
| file_name='clotting_sim1.csv', | |
| mime='text/csv' | |
| ) | |
| json = table.head(50).to_json(index=True).encode('utf-8') | |
| st.download_button( | |
| label=f"Download words similar to {query} in .js format", | |
| data=json, | |
| file_name='clotting_sim1.js', | |
| mime='json' | |
| ) | |
| print(table.head(10)) | |
| table.head(50).to_csv("clotting_sim1.csv", index=True) | |
| table.head(50).to_json("clotting_sim1.js", index=True) | |
| st.header(f"Similar Words to {query}") | |
| st.write(table.head(50)) | |
| # | |
| print() | |
| print("Human genes similar to " + str(query)) | |
| df1 = table | |
| df2 = pd.read_csv('Human_Genes.csv') | |
| m = df1.Word.isin(df2.symbol) | |
| df1 = df1[m] | |
| df1.rename(columns={'Word': 'Human Gene'}, inplace=True) | |
| csv2 = df1.head(50).to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label=f"Download genes similar to {query} in .csv format", | |
| data=csv2, | |
| file_name='clotting_sim2.csv', | |
| mime='text/csv' | |
| ) | |
| json2 = df1.head(50).to_json(index=True).encode('utf-8') | |
| st.download_button( | |
| label=f"Download words similar to {query} in .js format", | |
| data=json2, | |
| file_name='clotting_sim1.js', | |
| mime='json' | |
| ) | |
| print(df1.head(10)) | |
| df1.head(50).to_csv("clotting_sim2.csv", index=True) | |
| df1.head(50).to_json("clotting_sim2.js", index=True) | |
| print() | |
| st.header(f"Similar Genes to {query}") | |
| st.write(df1.head(50)) | |
| # arrow_dataset = Dataset.from_pandas(df1.head(50)) | |
| # arrow_dataset.save_to_disk("https://huggingface.co/datasets/jfataphd/word2vec_dataset/sim2") | |
| # arrow_dataset_reloaded = load_from_disk('sim2.js') | |
| # arrow_dataset_reloaded | |