tahamueed23 commited on
Commit
cfae69a
·
verified ·
1 Parent(s): 0780c88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +369 -126
app.py CHANGED
@@ -1,76 +1,256 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import pandas as pd
4
  import os
5
  import re
6
  from filelock import FileLock
 
 
7
 
8
  # -----------------------------
9
- # Load Transformer Models
10
  # -----------------------------
11
- english_model = pipeline(
12
- "sentiment-analysis",
13
- model="siebert/sentiment-roberta-large-english"
14
- )
15
-
16
- urdu_model = pipeline(
17
- "sentiment-analysis",
18
- model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
19
- )
20
-
21
- roman_urdu_model = pipeline(
22
- "sentiment-analysis",
23
- model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
24
- )
25
-
26
- # -----------------------------
27
- # CSV Setup
28
- # -----------------------------
29
- SAVE_FILE = "sentiment_logs.csv"
30
- LOCK_FILE = SAVE_FILE + ".lock"
31
-
32
- if not os.path.exists(SAVE_FILE):
33
- pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
34
- SAVE_FILE, index=False, encoding="utf-8-sig"
35
  )
 
 
 
 
36
 
37
  # -----------------------------
38
- # Improved Language Detection
39
  # -----------------------------
40
- def detect_language(text):
41
- urdu_script = re.compile(r"[\u0600-\u06FF]")
42
- if urdu_script.search(text):
43
- return "Urdu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- roman_urdu_patterns = [
46
- r"\b(hai|hain|tha|thi|parhta|parhai|acha|bura|bohot|zabardast)\b",
47
- r"\b(sir|madam|ustad|class|parh|samajh)\b",
48
- ]
49
 
50
- text_l = text.lower()
51
- for p in roman_urdu_patterns:
52
- if re.search(p, text_l):
53
- return "Roman Urdu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  return "English"
56
 
57
  # -----------------------------
58
- # Roman Urdu Normalization
59
  # -----------------------------
60
- def normalize_roman_urdu(text):
61
- text = text.lower()
62
- text = text.replace("hy", "hai").replace("h", "hai")
63
- text = re.sub(r"\bnhi\b|\bnai\b|\bnhi\b", "nahi", text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  return text
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  # -----------------------------
67
  # Normalize Labels
68
  # -----------------------------
69
  def normalize_label(label):
70
- label = label.lower()
71
- if "pos" in label or "positive" in label:
 
 
72
  return "Positive"
73
- elif "neg" in label or "negative" in label:
74
  return "Negative"
75
  else:
76
  return "Neutral"
@@ -78,117 +258,180 @@ def normalize_label(label):
78
  # -----------------------------
79
  # Polarity Explanation
80
  # -----------------------------
81
- def polarity_explanation(text, sentiment):
 
 
 
82
  explanations = {
83
- "Positive": "Contains praise words or positive evaluation.",
84
- "Negative": "Contains criticism or negative expressions.",
85
- "Neutral": "Factual statement or balanced observation."
 
 
 
 
 
 
 
 
 
 
 
 
86
  }
87
- return explanations.get(sentiment, "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # -----------------------------
90
- # Ensemble Roman Urdu + Urdu
91
  # -----------------------------
92
- def ensemble_roman_urdu(text):
93
- ru = roman_urdu_model(text)[0]
94
- ur = urdu_model(text)[0]
95
-
96
- ru_sent, ur_sent = normalize_label(ru["label"]), normalize_label(ur["label"])
97
-
98
- if ru_sent == ur_sent:
99
- return ru if ru["score"] >= ur["score"] else ur
100
-
101
- # Weight Roman Urdu higher for Roman Urdu input
102
- weight_ru = ru["score"] * 1.25
103
- weight_ur = ur["score"]
104
- return ru if weight_ru >= weight_ur else ur
105
 
106
- # -----------------------------
107
- # Adjust sentiment if low intensity
108
- # -----------------------------
109
- def adjust_for_neutral(text, sentiment, score):
110
- if sentiment in ["Positive", "Negative"] and score < 0.7:
111
- return "Neutral", score
112
- return sentiment, score
113
 
114
  # -----------------------------
115
  # Main Analysis Function
116
  # -----------------------------
117
- def analyze_sentiment(text, lang_hint):
 
118
  if not text.strip():
119
- return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
120
 
121
- lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- if lang == "English":
124
- result = english_model(text)[0]
125
- elif lang == "Urdu":
126
- result = urdu_model(text)[0]
127
- else:
128
- text = normalize_roman_urdu(text)
129
- result = ensemble_roman_urdu(text)
130
-
131
- sentiment = normalize_label(result["label"])
132
- score = round(float(result["score"]), 3)
133
- sentiment, score = adjust_for_neutral(text, sentiment, score)
134
- explanation = polarity_explanation(text, sentiment)
135
-
136
- # Save logs
137
- with FileLock(LOCK_FILE):
138
- df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \
139
- if os.path.exists(SAVE_FILE) else pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
140
- new_row = pd.DataFrame([[text, lang, sentiment, score]],
141
- columns=["Sentence", "Language", "Sentiment", "Confidence"])
142
- df = pd.concat([df, new_row], ignore_index=True)
143
- df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
144
-
145
- return sentiment, str(score), explanation, SAVE_FILE
146
 
147
  # -----------------------------
148
  # Show Logs
149
  # -----------------------------
150
  def show_logs():
151
  if os.path.exists(SAVE_FILE):
152
- return pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
 
153
  else:
154
- return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
155
 
156
  # -----------------------------
157
- # Gradio UI
 
 
 
 
 
 
158
  # -----------------------------
159
- with gr.Blocks() as demo:
 
 
160
  gr.Markdown(
161
- "## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
162
- "Detect **Positive**, **Negative**, or **Neutral** tone with confidence score.\n\n"
163
- "🪶 Improved Roman Urdu normalization + ensemble + polarity explanation.\n"
 
 
 
 
 
 
 
164
  )
165
 
166
  with gr.Row():
167
- with gr.Column():
168
- user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type English, Urdu, or Roman Urdu...")
 
 
 
 
169
  lang_dropdown = gr.Dropdown(
170
  ["Auto Detect", "English", "Urdu", "Roman Urdu"],
171
- value="Auto Detect", label="🌐 Language"
 
172
  )
173
- btn_analyze = gr.Button("🔍 Analyze Sentiment")
174
- btn_show = gr.Button("📂 Show Saved Logs")
175
-
176
- with gr.Column():
177
- out_sent = gr.Textbox(label="Sentiment")
178
- out_conf = gr.Textbox(label="Confidence (0–1)")
179
- out_exp = gr.Textbox(label="Polarity Explanation")
180
- out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")
181
-
182
- logs_df = gr.Dataframe(
183
- headers=["Sentence", "Language", "Sentiment", "Confidence"],
184
- label="🧾 Sentiment Logs", interactive=False
185
- )
186
 
187
- btn_analyze.click(analyze_sentiment,
188
- inputs=[user_text, lang_dropdown],
189
- outputs=[out_sent, out_conf, out_exp, out_file])
 
 
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  btn_show.click(show_logs, outputs=[logs_df])
 
192
 
193
  if __name__ == "__main__":
194
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
  import pandas as pd
4
  import os
5
  import re
6
  from filelock import FileLock
7
+ import torch
8
+ import numpy as np
9
 
10
  # -----------------------------
11
+ # Load Models with Error Handling
12
  # -----------------------------
13
+ try:
14
+ # English model
15
+ english_model = pipeline(
16
+ "sentiment-analysis",
17
+ model="siebert/sentiment-roberta-large-english",
18
+ tokenizer="siebert/sentiment-roberta-large-english"
19
+ )
20
+
21
+ # Urdu model
22
+ urdu_model = pipeline(
23
+ "sentiment-analysis",
24
+ model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
25
+ )
26
+
27
+ # Roman Urdu model
28
+ roman_urdu_model = pipeline(
29
+ "sentiment-analysis",
30
+ model="tahamueed23/urdu-roman-urdu-sentiment-cardiffnlp"
31
+ )
32
+
33
+ # Language detection model
34
+ lang_detector = pipeline(
35
+ "text-classification",
36
+ model="papluca/xlm-roberta-base-language-detection"
37
  )
38
+
39
+ except Exception as e:
40
+ print(f"Error loading models: {e}")
41
+ raise
42
 
43
  # -----------------------------
44
+ # Enhanced Language Detection
45
  # -----------------------------
46
+ # Core Roman Urdu keywords (expanded list)
47
+ roman_urdu_core = [
48
+ "acha", "achy", "achay", "khali", "aain", "aram", "aate", "achi", "aik", "asaani",
49
+ "aur", "aj", "aya", "baat", "behas", "behtar", "bohot", "chal", "deh", "dala",
50
+ "dali", "dalta", "deen", "detay", "deta", "deti", "dostana", "di", "diya", "diye",
51
+ "dilchasp", "fori", "gaya", "ganda", "gaye", "hain", "hai", "hi", "hoslaafzai",
52
+ "hoti", "hotay", "hua", "huay", "hue", "hosla", "huin", "hal", "hain", "hui",
53
+ "imtihaan", "ja", "kab", "kabhi", "ka", "kam", "karta", "ke", "kesy", "khrab",
54
+ "kharab", "kiya", "kun", "ki", "kamzor", "ko", "kuch", "lamba", "lambe", "liye",
55
+ "madad", "madadgar", "maine", "mehdood", "mein", "mera", "meri", "munsifana",
56
+ "mutaharrik", "munazzam", "mufeed", "mushkil", "mukhtasir", "mutasir", "mukammal",
57
+ "na", "namukammal", "nishistain", "naqis", "nahi", "ne", "nisab", "par", "pasand",
58
+ "paya", "py", "pursukoon", "purani", "purana", "purany", "raha", "roshan", "rakhi",
59
+ "saka", "samajh", "sarah", "se", "shandaar", "seekha", "sust", "saaf", "suthri",
60
+ "tareef", "targheeb", "tez", "tha", "thay", "theen", "tulaba", "thein", "thin",
61
+ "thi", "tor", "tumne", "uljha", "ur", "usne", "ustad", "waqfa", "wala", "wazeh",
62
+ "zyada", "zabardast", "bohat", "kya", "main", "tum", "wo", "ye", "unhon", "inhon"
63
+ ]
64
 
65
+ # Compile regex patterns
66
+ roman_urdu_pattern_core = re.compile(r'\b(' + "|".join(roman_urdu_core) + r')\b', re.IGNORECASE)
 
 
67
 
68
+ def detect_language_enhanced(text):
69
+ """Enhanced language detection using both model and rule-based approach"""
70
+ if not text.strip():
71
+ return "English"
72
+
73
+ text_clean = str(text).strip()
74
+
75
+ # Step 1: Urdu script detection (most reliable)
76
+ if re.search(r'[\u0600-\u06FF]', text_clean):
77
+ return "Urdu"
78
+
79
+ # Step 2: Use transformer model for language detection
80
+ try:
81
+ lang_result = lang_detector(text_clean[:512])[0] # Limit text length
82
+ lang_label = lang_result['label']
83
+ lang_score = lang_result['score']
84
+
85
+ if lang_label == 'ur' and lang_score > 0.7:
86
+ return "Urdu"
87
+ elif lang_label in ['en', 'ur'] and lang_score > 0.6:
88
+ # Further check for Roman Urdu
89
+ core_hits = len(re.findall(roman_urdu_pattern_core, text_clean.lower()))
90
+ tokens = re.findall(r'\b\w+\b', text_clean)
91
+ total_tokens = len(tokens)
92
+
93
+ # Strong Roman Urdu indicators
94
+ if core_hits >= 2:
95
+ return "Roman Urdu"
96
+ elif core_hits >= 1 and total_tokens <= 6:
97
+ return "Roman Urdu"
98
+ elif core_hits / max(total_tokens, 1) > 0.3: # 30% Roman Urdu words
99
+ return "Roman Urdu"
100
+
101
+ return "English" if lang_label == 'en' else "Urdu"
102
+
103
+ except Exception as e:
104
+ print(f"Language detection error: {e}")
105
+
106
+ # Fallback: Rule-based detection
107
+ return detect_language_fallback(text_clean)
108
 
109
+ def detect_language_fallback(text):
110
+ """Fallback language detection using rules"""
111
+ text_lower = text.lower()
112
+
113
+ # Urdu script check
114
+ if re.search(r'[\u0600-\u06FF]', text):
115
+ return "Urdu"
116
+
117
+ # Count Roman Urdu core words
118
+ core_hits = len(re.findall(roman_urdu_pattern_core, text_lower))
119
+ tokens = re.findall(r'\b\w+\b', text_lower)
120
+ total_tokens = len(tokens)
121
+
122
+ # Roman Urdu detection rules
123
+ if core_hits >= 2:
124
+ return "Roman Urdu"
125
+ elif core_hits >= 1 and total_tokens <= 5:
126
+ return "Roman Urdu"
127
+ elif core_hits / max(total_tokens, 1) > 0.25: # 25% threshold
128
+ return "Roman Urdu"
129
+
130
  return "English"
131
 
132
  # -----------------------------
133
+ # Enhanced Roman Urdu Normalization
134
  # -----------------------------
135
+ def normalize_roman_urdu_enhanced(text):
136
+ """Enhanced Roman Urdu text normalization"""
137
+ text = text.lower().strip()
138
+
139
+ # Common Roman Urdu variations normalization
140
+ replacements = {
141
+ r'\bhy\b': 'hai',
142
+ r'\bh\b': 'hai',
143
+ r'\bnhi\b': 'nahi',
144
+ r'\bnai\b': 'nahi',
145
+ r'\bna\b': 'nahi',
146
+ r'\bboht\b': 'bohot',
147
+ r'\bbhot\b': 'bohot',
148
+ r'\bzyada\b': 'zyada',
149
+ r'\bzada\b': 'zyada',
150
+ r'\bacha\b': 'acha',
151
+ r'\bachay\b': 'achay',
152
+ r'\bthy\b': 'thay',
153
+ r'\bthi\b': 'thi',
154
+ r'\btha\b': 'tha'
155
+ }
156
+
157
+ for pattern, replacement in replacements.items():
158
+ text = re.sub(pattern, replacement, text)
159
+
160
  return text
161
 
162
+ # -----------------------------
163
+ # Sentiment Analysis Enhancement
164
+ # -----------------------------
165
+ def get_strong_words(text, language):
166
+ """Extract strong sentiment-bearing words"""
167
+ text_lower = text.lower()
168
+ strong_words = []
169
+
170
+ # Positive indicators
171
+ positive_patterns = {
172
+ 'english': [r'excellent', r'outstanding', r'amazing', r'wonderful', r'perfect',
173
+ r'brilliant', r'fantastic', r'superb', r'terrible', r'awful',
174
+ r'horrible', r'disappointing', r'poor', r'bad'],
175
+ 'urdu': [r'زبردست', r'شاندار', r'عمدہ', r'بہترین', r'خراب', r'برا', r'مایوس کن'],
176
+ 'roman_urdu': [r'zabardast', r'shandaar', r'umdah', r'behtareen', r'kharab',
177
+ r'bura', r'mayus', r'kamaal']
178
+ }
179
+
180
+ lang_key = 'english' if language == 'English' else 'urdu' if language == 'Urdu' else 'roman_urdu'
181
+
182
+ for pattern in positive_patterns[lang_key]:
183
+ matches = re.findall(pattern, text_lower, re.IGNORECASE)
184
+ strong_words.extend(matches)
185
+
186
+ return strong_words
187
+
188
+ def adjust_sentiment_with_context(text, sentiment, score, language):
189
+ """Adjust sentiment based on context and strong words"""
190
+ strong_words = get_strong_words(text, language)
191
+
192
+ # If strong negative words present but sentiment is positive/neutral, adjust
193
+ negative_indicators = ['terrible', 'awful', 'horrible', 'disappointing', 'poor', 'bad',
194
+ 'خراب', 'برا', 'مایوس کن', 'kharab', 'bura', 'mayus']
195
+
196
+ positive_indicators = ['excellent', 'outstanding', 'amazing', 'wonderful', 'perfect',
197
+ 'brilliant', 'fantastic', 'superb', 'زبردست', 'شاندار', 'عمدہ',
198
+ 'zabardast', 'shandaar', 'umdah']
199
+
200
+ strong_negative_present = any(word in strong_words for word in negative_indicators)
201
+ strong_positive_present = any(word in strong_words for word in positive_indicators)
202
+
203
+ # Adjustment rules
204
+ if strong_negative_present and sentiment in ["Positive", "Neutral"] and score < 0.8:
205
+ return "Negative", min(score + 0.2, 0.95)
206
+ elif strong_positive_present and sentiment in ["Negative", "Neutral"] and score < 0.8:
207
+ return "Positive", min(score + 0.2, 0.95)
208
+
209
+ # Low confidence adjustment
210
+ if score < 0.6:
211
+ return "Neutral", 0.5
212
+
213
+ return sentiment, score
214
+
215
+ # -----------------------------
216
+ # Enhanced Ensemble Method
217
+ # -----------------------------
218
+ def ensemble_roman_urdu_enhanced(text):
219
+ """Enhanced ensemble for Roman Urdu sentiment"""
220
+ normalized_text = normalize_roman_urdu_enhanced(text)
221
+
222
+ try:
223
+ ru_result = roman_urdu_model(normalized_text)[0]
224
+ ur_result = urdu_model(normalized_text)[0]
225
+
226
+ ru_sent = normalize_label(ru_result["label"])
227
+ ur_sent = normalize_label(ur_result["label"])
228
+
229
+ # If both agree, return the higher confidence one
230
+ if ru_sent == ur_sent:
231
+ return ru_result if ru_result["score"] >= ur_result["score"] else ur_result
232
+
233
+ # Weight Roman Urdu model higher for Roman Urdu text
234
+ ru_weight = ru_result["score"] * 1.3 # Increased weight
235
+ ur_weight = ur_result["score"]
236
+
237
+ return ru_result if ru_weight >= ur_weight else ur_result
238
+
239
+ except Exception as e:
240
+ print(f"Ensemble error: {e}")
241
+ # Fallback to Roman Urdu model
242
+ return roman_urdu_model(normalized_text)[0]
243
+
244
  # -----------------------------
245
  # Normalize Labels
246
  # -----------------------------
247
  def normalize_label(label):
248
+ """Normalize sentiment labels across different models"""
249
+ label = str(label).lower()
250
+
251
+ if any(word in label for word in ["pos", "positive", "positive", "lab"]):
252
  return "Positive"
253
+ elif any(word in label for word in ["neg", "negative", "negative"]):
254
  return "Negative"
255
  else:
256
  return "Neutral"
 
258
  # -----------------------------
259
  # Polarity Explanation
260
  # -----------------------------
261
+ def polarity_explanation_enhanced(text, sentiment, score, language):
262
+ """Enhanced polarity explanation with examples"""
263
+ strong_words = get_strong_words(text, language)
264
+
265
  explanations = {
266
+ "Positive": {
267
+ "high": "Strong positive sentiment with clear praise words.",
268
+ "medium": "Moderately positive with some favorable expressions.",
269
+ "low": "Slightly positive tone."
270
+ },
271
+ "Negative": {
272
+ "high": "Strong negative sentiment with clear criticism.",
273
+ "medium": "Moderately negative with some critical expressions.",
274
+ "low": "Slightly negative tone."
275
+ },
276
+ "Neutral": {
277
+ "high": "Clearly neutral or factual statement.",
278
+ "medium": "Mostly neutral with balanced perspective.",
279
+ "low": "Weak sentiment leaning neutral."
280
+ }
281
  }
282
+
283
+ # Determine confidence level
284
+ if score >= 0.8:
285
+ confidence = "high"
286
+ elif score >= 0.6:
287
+ confidence = "medium"
288
+ else:
289
+ confidence = "low"
290
+
291
+ base_explanation = explanations[sentiment][confidence]
292
+
293
+ if strong_words:
294
+ base_explanation += f" Key words: {', '.join(strong_words[:3])}."
295
+
296
+ return base_explanation
297
 
298
  # -----------------------------
299
+ # CSV Setup
300
  # -----------------------------
301
+ SAVE_FILE = "sentiment_logs.csv"
302
+ LOCK_FILE = SAVE_FILE + ".lock"
 
 
 
 
 
 
 
 
 
 
 
303
 
304
+ if not os.path.exists(SAVE_FILE):
305
+ pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words"]).to_csv(
306
+ SAVE_FILE, index=False, encoding="utf-8-sig"
307
+ )
 
 
 
308
 
309
  # -----------------------------
310
  # Main Analysis Function
311
  # -----------------------------
312
+ def analyze_sentiment_enhanced(text, lang_hint):
313
+ """Enhanced sentiment analysis with better language detection and context"""
314
  if not text.strip():
315
+ return "⚠️ Please enter a sentence.", "", "", SAVE_FILE, ""
316
 
317
+ # Language detection
318
+ lang = lang_hint if lang_hint != "Auto Detect" else detect_language_enhanced(text)
319
+
320
+ try:
321
+ # Sentiment analysis based on language
322
+ if lang == "English":
323
+ result = english_model(text[:512])[0] # Limit text length
324
+ elif lang == "Urdu":
325
+ result = urdu_model(text[:512])[0]
326
+ else: # Roman Urdu
327
+ result = ensemble_roman_urdu_enhanced(text)
328
+
329
+ sentiment = normalize_label(result["label"])
330
+ score = round(float(result["score"]), 3)
331
+
332
+ # Context-aware sentiment adjustment
333
+ sentiment, score = adjust_sentiment_with_context(text, sentiment, score, lang)
334
+
335
+ # Get strong words and explanation
336
+ strong_words = get_strong_words(text, lang)
337
+ explanation = polarity_explanation_enhanced(text, sentiment, score, lang)
338
+ strong_words_str = ", ".join(strong_words[:5]) if strong_words else "None"
339
+
340
+ # Save logs
341
+ with FileLock(LOCK_FILE):
342
+ df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \
343
+ if os.path.exists(SAVE_FILE) else pd.DataFrame(
344
+ columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words"]
345
+ )
346
+ new_row = pd.DataFrame([[text, lang, sentiment, score, strong_words_str]],
347
+ columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words"])
348
+ df = pd.concat([df, new_row], ignore_index=True)
349
+ df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
350
 
351
+ return sentiment, str(score), explanation, SAVE_FILE, strong_words_str
352
+
353
+ except Exception as e:
354
+ error_msg = f"Analysis error: {str(e)}"
355
+ return "Error", "0", error_msg, SAVE_FILE, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
  # -----------------------------
358
  # Show Logs
359
  # -----------------------------
360
  def show_logs():
361
  if os.path.exists(SAVE_FILE):
362
+ df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
363
+ return df.tail(20) # Show last 20 entries
364
  else:
365
+ return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words"])
366
 
367
  # -----------------------------
368
+ # Clear Logs
369
+ # -----------------------------
370
+ def clear_logs():
371
+ if os.path.exists(SAVE_FILE):
372
+ os.remove(SAVE_FILE)
373
+ return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words"])
374
+
375
  # -----------------------------
376
+ # Enhanced Gradio UI
377
+ # -----------------------------
378
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
379
  gr.Markdown(
380
+ """
381
+ # 🌍 Enhanced Multilingual Sentiment Analysis
382
+ **English Urdu Roman Urdu**
383
+
384
+ Advanced sentiment detection with:
385
+ - 🤖 Transformer-based language detection
386
+ - 🔍 Context-aware sentiment analysis
387
+ - 💪 Strong word extraction
388
+ - 🎯 Enhanced Roman Urdu processing
389
+ """
390
  )
391
 
392
  with gr.Row():
393
+ with gr.Column(scale=1):
394
+ user_text = gr.Textbox(
395
+ label="✍️ Enter Text",
396
+ placeholder="Type in English, Urdu, or Roman Urdu...",
397
+ lines=3
398
+ )
399
  lang_dropdown = gr.Dropdown(
400
  ["Auto Detect", "English", "Urdu", "Roman Urdu"],
401
+ value="Auto Detect",
402
+ label="🌐 Language Selection"
403
  )
404
+
405
+ with gr.Row():
406
+ btn_analyze = gr.Button("🔍 Analyze Sentiment", variant="primary")
407
+ btn_show = gr.Button("📂 Show Recent Logs")
408
+ btn_clear = gr.Button("🗑️ Clear Logs", variant="secondary")
 
 
 
 
 
 
 
 
409
 
410
+ with gr.Column(scale=1):
411
+ out_sent = gr.Textbox(label="🎭 Sentiment")
412
+ out_conf = gr.Textbox(label="📊 Confidence Score")
413
+ out_exp = gr.Textbox(label="💡 Analysis Explanation")
414
+ out_strong = gr.Textbox(label="💪 Strong Words Detected")
415
+ out_file = gr.File(label="⬇️ Download Complete Logs", type="filepath")
416
 
417
+ with gr.Row():
418
+ logs_df = gr.Dataframe(
419
+ headers=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words"],
420
+ label="📋 Recent Sentiment Logs",
421
+ interactive=False,
422
+ wrap=True,
423
+ max_height=400
424
+ )
425
+
426
+ # Event handlers
427
+ btn_analyze.click(
428
+ analyze_sentiment_enhanced,
429
+ inputs=[user_text, lang_dropdown],
430
+ outputs=[out_sent, out_conf, out_exp, out_file, out_strong]
431
+ )
432
+
433
  btn_show.click(show_logs, outputs=[logs_df])
434
+ btn_clear.click(clear_logs, outputs=[logs_df])
435
 
436
  if __name__ == "__main__":
437
+ demo.launch(share=False)