-
Notifications
You must be signed in to change notification settings - Fork 0
/
Home.py
214 lines (169 loc) · 7.97 KB
/
Home.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import os
import string
import random
import streamlit as st
import base64
from PIL import Image
from labels import MESSAGES
from streamlit_lottie import st_lottie
##multilingual
import gettext
_ = gettext.gettext
#################################################################################
# Use local CSS
def local_css(file_name):
with open(file_name) as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
#local_css("style/style.css")
@st.cache(allow_output_mutation=True)
def get_base64_of_bin_file(png_file):
with open(png_file, "rb") as f:
data = f.read()
return base64.b64encode(data).decode()
def build_markup_for_logo(
png_file,
background_position="50% 10%",
margin_top="10%",
image_width="60%",
image_height="",
):
binary_string = get_base64_of_bin_file(png_file)
return """
<style>
[data-testid="stSidebarNav"] {
background-image: url("data:image/png;base64,%s");
background-repeat: no-repeat;
background-position: %s;
margin-top: %s;
background-size: %s %s;
}
</style>
""" % (
binary_string,
background_position,
margin_top,
image_width,
image_height,
)
def add_logo(png_file):
logo_markup = build_markup_for_logo(png_file)
st.markdown(
logo_markup,
unsafe_allow_html=True,
)
# ----------------
st.set_page_config(
page_title='Welsh FreeTxt Tool',
page_icon='🌐',
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': "https://ucrel.lancs.ac.uk/freetxt/",
'Report a bug': "https:/UCREL/welsh-freetxt-app/issues",
'About': '''## The FreeTxt/TestunRhydd tool
FreeTxt was developed as part of an AHRC funded collaborative
FreeTxt supporting bilingual free-text survey
and questionnaire data analysis
research project involving colleagues from
Cardiff University and Lancaster University (Grant Number AH/W004844/1).
The team included PI - Dawn Knight;
CIs - Paul Rayson, Mo El-Haj;
RAs - Ignatius Ezeani, Nouran Khallaf and Steve Morris.
The Project Advisory Group included representatives from
National Trust Wales, Cadw, National Museum Wales,
CBAC | WJEC and National Centre for Learning Welsh.
-------------------------------------------------------
Datblygwyd TestunRhydd fel rhan o brosiect ymchwil
cydweithredol a gyllidwyd gan yr AHRC
‘TestunRhydd: yn cefnogi dadansoddi data arolygon testun
rhydd a holiaduron dwyieithog’ sy’n cynnwys cydweithwyr
o Brifysgol Caerdydd a Phrifysgol Caerhirfryn (Rhif y
Grant AH/W004844/1).
Roedd y tîm yn cynnwys PY – Dawn Knight;
CYwyr – Paul Rayson, Mo El-Haj; CydY
– Igantius Ezeani, Nouran Khallaf a Steve Morris.
Roedd Grŵp Ymgynghorol y Prosiect yn cynnwys cynrychiolwyr
o Ymddiriedolaeth Genedlaethol Cymru, Amgueddfa Cymru,
CBAC a’r Ganolfan Dysgu Cymraeg Genedlaethol.
'''
}
)
language = st.sidebar.selectbox('', ['en', 'cy'])
try:
localizator = gettext.translation('base', localedir='locales', languages=[language])
localizator.install()
_ = localizator.gettext
except:
pass
st.markdown (_("# FreeTxt Text Analysis"))
add_logo("img/FreeTxt_logo.png")
st.write("---")
######### gif from local file"""
def read_gif(name):
file_ = open(name, "rb")
contents = file_.read()
data_url = base64.b64encode(contents).decode("utf-8")
file_.close()
return data_url
#######################
with st.container():
left_column, right_column = st.columns([1, 1])
with left_column:
st.subheader(_("[Reviews analysis and illustrations](https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Reviews_analysis_and_Illustrations)"))
st.write(_('''This tool has three components:
1. Data View: to select, view and filter columns from a data file
2. Word Cloud: creates a word cloud from content in the selected columns of a file
3. Context and Collocation: extracts the most frequent words that appear in the selected columns of your file, illustrating how they appear in sentences. It also shows the words which most often co-occur with these most frequent words'''))
with right_column:
data_url = read_gif("img/visualization.gif")
st.markdown(
f'<p style="text-align: center; color: grey;"> <a href="https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Reviews_analysis_and_Illustrations"><img width="200" height="200" src="data:image/gif;base64,{data_url} "></a></p>',
unsafe_allow_html=True,
)
st.write("---")
with st.container():
left_column, right_column = st.columns([1, 1])
with left_column:
data_url_2 = read_gif("img/reviews.gif")
st.markdown(
f'<p style="text-align: center; color: grey;"><a href="https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Positive_and_Negative_Reviews"><img width="200" height="200" src="data:image/gif;base64,{data_url_2} "></a></p>',
unsafe_allow_html=True,
)
with right_column:
st.subheader(_("[Positive and Negative reviews](https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Positive_and_Negative_Reviews)"))
st.write(_("This feature performs sentiment classification on reviews from selected column(s) and displays a pie chart to visualize the output"))
#######################
st.write("---")
with st.container():
left_column, right_column = st.columns([1, 1])
with left_column:
st.subheader(_("[Generate_a_summary](https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Generate_a_Summary)"))
st.write(_('This tool, adapted from the Welsh Summarization project, produces a basic extractive summary of the review text from the selected columns.'))
with right_column:
data_url = read_gif("img/summary.gif")
st.markdown(
f'<p style="text-align: center; color: grey;"><a href="https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Generate_a_Summary"><img width="400" height="400" src="data:image/gif;base64,{data_url} "></a></p>',
unsafe_allow_html=True,
)
st.write("---")
with st.container():
left_column, right_column = st.columns([1, 1])
with left_column:
data_url_2 = read_gif("img/semantic.gif")
st.markdown(
f'<p style="text-align: center; color: grey;"><a href="https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Word_Types_and_Relations"><img width="500" height="500" src="data:image/gif;base64,{data_url_2} "></a></p>',
unsafe_allow_html=True,
)
with right_column:
st.subheader(_("[Word_Types_and_Relations](https://ucrel-welsh-freetxt-app-home-rvqet7.streamlit.app/Word_Types_and_Relations)"))
st.write(_('This feature uses the PyMUSAS pipeline on Spacy to generate and display POS (CyTag) tags as well as semantic (USAS) tags. It currently works on the Ucrel-freetxt-VM as setting up Docker on the Streamlit cloud is a bit complex'))
text = "Sefydliad cyllidol yw bancwr neu fanc sy'n actio fel asiant talu ar gyfer cwsmeriaid, ac yn rhoi benthyg ac yn benthyg arian. Yn rhai gwledydd, megis yr Almaen a Siapan, mae banciau'n brif berchenogion corfforaethau diwydiannol, tra mewn gwledydd eraill, megis yr Unol Daleithiau, mae banciau'n cael eu gwahardd rhag bod yn berchen ar gwmniau sydd ddim yn rhai cyllidol. Adran Iechyd Cymru."
# ---- HIDE STREAMLIT STYLE ----
#hide_st_style = """
# <style>
#MainMenu {visibility: hidden;}
# footer {visibility: hidden;}
# #header {visibility: hidden;}
# </style>
# """
#st.markdown(hide_st_style, unsafe_allow_html=True)