trackio-stats / app.py
abidlabs's picture
abidlabs HF Staff
Update app.py
04d3991 verified
#!/usr/bin/env python3
import datetime
import httpx
import polars as pl
import plotly.express as px
import gradio as gr
PACKAGES = ["trackio", "wandb", "neptune", "comet-ml"]
def fetch_pypi_stats(package: str) -> pl.DataFrame:
"""Fetch daily PyPI download stats from pypistats.org for a given package."""
url = f"https://pypistats.org/api/packages/{package}/overall"
r = httpx.get(url, timeout=30.0)
r.raise_for_status()
data = r.json().get("data", [])
df = pl.DataFrame(data)
if df.is_empty():
return df
df = df.select(["date", "downloads"]).with_columns(
pl.col("date").str.strptime(pl.Date, "%Y-%m-%d")
)
df = df.sort("date")
df = df.with_columns(pl.lit(package).alias("package"))
return df
def fetch_all():
dfs = []
for pkg in PACKAGES:
df = fetch_pypi_stats(pkg)
if not df.is_empty():
# Ensure at least one nonzero download for rangeslider
if df["downloads"].sum() == 0:
df = df.with_columns(pl.lit(1).alias("downloads"))
dfs.append(df)
if not dfs:
return pl.DataFrame(schema=["date", "downloads", "package"])
return pl.concat(dfs)
def get_fig(df: pl.DataFrame, start_date=None, end_date=None, smooth=False, n_days=7):
if df.is_empty():
return px.scatter(title="No data available")
# Shift downloads by 1 to avoid log(0) issues
df = df.with_columns((pl.col("downloads") + 1).alias("downloads"))
if smooth:
df = df.with_columns(
pl.col("downloads")
.rolling_mean(window_size=n_days)
.over("package")
.alias(f"{n_days}-day avg")
)
y = f"{n_days}-day avg"
else:
y = "downloads"
color_map = {pkg: ("red" if pkg == "trackio" else None) for pkg in PACKAGES}
fig = px.line(
df.to_pandas(),
x="date",
y=y,
color="package",
title=f"PyPI Downloads β€” {', '.join(PACKAGES)}",
markers=True,
log_y=True,
color_discrete_map=color_map
)
# Trackio stands out
for trace in fig.data:
if trace.name == "trackio":
trace.line.width = 4
trace.marker.symbol = "star"
trace.marker.size = 10
# --- REMOVE THE PREVIEW / RANGESLIDER ---
fig.update_layout(
xaxis_rangeslider_visible=False # <- this disables the preview
)
if start_date and end_date:
fig.update_layout(xaxis_range=[start_date, end_date])
fig.update_yaxes(title="Downloads per day (log scale)")
fig.update_xaxes(title="Date")
fig.update_layout(legend_title="Package")
return fig
def update_fig(start_date, end_date, smooth, n_days):
df = fetch_all()
return get_fig(df, start_date, end_date, smooth, n_days)
def update_date_range(delta_days: int = 42): # ~12 weeks
today = datetime.datetime.now(datetime.timezone.utc)
start_date = today - datetime.timedelta(days=delta_days)
return start_date, today
# --- Gradio App ---
with gr.Blocks() as demo:
gr.Markdown("## πŸ“ˆ PyPI Downloads Comparison (trackio, wandb, neptune, comet-ml)")
with gr.Row():
start_date = gr.DateTime(label="Start date", type="datetime", include_time=False)
end_date = gr.DateTime(label="End date", type="datetime", include_time=False)
with gr.Row():
smooth = gr.Checkbox(label="Show moving average", value=True)
n_days = gr.Slider(label="Days", minimum=1, maximum=28, step=1, value=7)
fig = gr.Plot(label="PyPI Downloads Comparison")
demo.load(fn=update_date_range, outputs=[start_date, end_date])
gr.on(
triggers=[start_date.change, end_date.change, smooth.change, n_days.change],
fn=update_fig,
inputs=[start_date, end_date, smooth, n_days],
outputs=fig,
)
demo.launch()