Spaces:
Sleeping
Sleeping
Add reported phishing to dataset
Browse files- app.py +15 -3
- phishing_datasets.py +28 -0
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from fastapi.responses import JSONResponse
|
|
| 3 |
from pydantic import BaseModel
|
| 4 |
from enum import Enum
|
| 5 |
from transformers import pipeline
|
|
|
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
|
@@ -42,6 +43,7 @@ class OutputModel(BaseModel):
|
|
| 42 |
|
| 43 |
pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
|
| 44 |
|
|
|
|
| 45 |
@app.get("/.well-known/apple-app-site-association", include_in_schema=False)
|
| 46 |
def get_well_known_aasa():
|
| 47 |
return JSONResponse(
|
|
@@ -57,9 +59,19 @@ def get_well_known_aasa():
|
|
| 57 |
)
|
| 58 |
|
| 59 |
@app.post("/predict")
|
| 60 |
-
def predict(
|
| 61 |
-
|
|
|
|
| 62 |
if label[0]['label'] == 'LABEL_1':
|
|
|
|
| 63 |
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
|
| 64 |
else:
|
| 65 |
-
return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from pydantic import BaseModel
|
| 4 |
from enum import Enum
|
| 5 |
from transformers import pipeline
|
| 6 |
+
from phishing_datasets import submit_entry
|
| 7 |
|
| 8 |
app = FastAPI()
|
| 9 |
|
|
|
|
| 43 |
|
| 44 |
pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
|
| 45 |
|
| 46 |
+
|
| 47 |
@app.get("/.well-known/apple-app-site-association", include_in_schema=False)
|
| 48 |
def get_well_known_aasa():
|
| 49 |
return JSONResponse(
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
@app.post("/predict")
|
| 62 |
+
def predict(model: InputModel) -> OutputModel:
|
| 63 |
+
text = model.query.message.text
|
| 64 |
+
label = pipe(text)
|
| 65 |
if label[0]['label'] == 'LABEL_1':
|
| 66 |
+
submit_entry(model.query.sender, model.query.message.text)
|
| 67 |
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
|
| 68 |
else:
|
| 69 |
+
return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
|
| 70 |
+
|
| 71 |
+
class ReportModel(BaseModel):
|
| 72 |
+
sender: str
|
| 73 |
+
message: str
|
| 74 |
+
|
| 75 |
+
@app.post("/report")
|
| 76 |
+
def report(model: ReportModel):
|
| 77 |
+
submit_entry(model.sender, model.message)
|
phishing_datasets.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from datasets import load_dataset, Dataset
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
DATASET_NAME = os.getenv("DATASET_NAME")
|
| 6 |
+
|
| 7 |
+
dataset = load_dataset(DATASET_NAME, split="train")
|
| 8 |
+
df = pd.DataFrame(dataset)
|
| 9 |
+
|
| 10 |
+
def submit_entry(sender, message):
|
| 11 |
+
"""Adds a new SMS phishing report if it's not already in the dataset."""
|
| 12 |
+
global df
|
| 13 |
+
|
| 14 |
+
sender = sender.strip().replace(" ", "") # Remove all spaces inside sender
|
| 15 |
+
message = message.strip()
|
| 16 |
+
|
| 17 |
+
# Check for duplicates
|
| 18 |
+
if ((df["sender"] == sender) & (df["message"] == message)).any():
|
| 19 |
+
"⚠️ This entry already exists in the dataset!"
|
| 20 |
+
|
| 21 |
+
# Append new entry
|
| 22 |
+
new_entry = pd.DataFrame([[sender, message]], columns=["sender", "message"])
|
| 23 |
+
df = pd.concat([df, new_entry], ignore_index=True)
|
| 24 |
+
|
| 25 |
+
new_dataset = Dataset.from_pandas(df)
|
| 26 |
+
new_dataset.push_to_hub(DATASET_NAME)
|
| 27 |
+
|
| 28 |
+
"✅ Submission saved successfully!"
|
requirements.txt
CHANGED
|
@@ -2,4 +2,6 @@ fastapi
|
|
| 2 |
uvicorn[standard]
|
| 3 |
pydantic
|
| 4 |
transformers
|
| 5 |
-
torch
|
|
|
|
|
|
|
|
|
| 2 |
uvicorn[standard]
|
| 3 |
pydantic
|
| 4 |
transformers
|
| 5 |
+
torch
|
| 6 |
+
datasets
|
| 7 |
+
pandas
|