Spaces:

Anirban0011
/

multimodal-shopee-finetune

Running

upd

fcd2005 about 1 month ago

314 Bytes

	import re
	import string
	from unidecode import unidecode
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize

	stop_words = set(stopwords.words("english")) \| set(stopwords.words("indonesian"))

	def clean_text(text):
	text = unidecode(text)
	text = text.lower()
	return text