whyu commited on
Commit
2bf38f8
·
1 Parent(s): fe5a20d

support free gpt-4.1 with our key

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +71 -49
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🐨
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.36.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 5.46.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import openai
3
  import json
4
  import os
5
  import uuid
@@ -52,9 +52,16 @@ def schedule_cleanup(paths, delay=600):
52
  threading.Thread(target=_clean, daemon=True).start()
53
 
54
 
55
- def grade(file_obj, key, model, progress=gr.Progress()):
56
- # set set api key
57
- openai.api_key = key
 
 
 
 
 
 
 
58
  gpt_model = model
59
 
60
  workdir = tempfile.mkdtemp(prefix="mmvet_grade_")
@@ -135,9 +142,9 @@ def grade(file_obj, key, model, progress=gr.Progress()):
135
  # model_results_file = os.path.join(result_path, f"{model}.json")
136
  model_results_file = file_obj.name
137
 
138
- grade_file = os.path.join(workdir, f'{model_name}_{gpt_model}-grade-{num_run}runs_{uid}.json')
139
- cap_score_file = os.path.join(workdir, f'{model_name}_{sub_set_name}{gpt_model}-cap-score-{num_run}runs_{uid}.csv')
140
- cap_int_score_file = os.path.join(workdir, f'{model_name}_{sub_set_name}{gpt_model}-cap-int-score-{num_run}runs_{uid}.csv')
141
  zip_file = os.path.join(workdir, f"results_{uid}.zip")
142
 
143
 
@@ -189,13 +196,13 @@ def grade(file_obj, key, model, progress=gr.Progress()):
189
  num_sleep = 0
190
  while not grade_sample_run_complete:
191
  try:
192
- response = openai.ChatCompletion.create(
193
  model=gpt_model,
194
  # engine=gpt_model, # For Azure OpenAI
195
  max_tokens=3,
196
  temperature=temperature,
197
  messages=messages)
198
- content = response['choices'][0]['message']['content']
199
  flag = True
200
  try_time = 1
201
  while flag:
@@ -210,13 +217,13 @@ def grade(file_obj, key, model, progress=gr.Progress()):
210
  messages = [
211
  {"role": "user", "content": question},
212
  ]
213
- response = openai.ChatCompletion.create(
214
  model=gpt_model,
215
  # engine=gpt_model, # For Azure OpenAI
216
  max_tokens=3,
217
  temperature=temperature,
218
  messages=messages)
219
- content = response['choices'][0]['message']['content']
220
  try_time += 1
221
  temperature += 0.5
222
  print(f"{id} try {try_time} times")
@@ -238,7 +245,7 @@ def grade(file_obj, key, model, progress=gr.Progress()):
238
  time.sleep(5)
239
 
240
 
241
- resp_model = str(response.get('model', gpt_model))
242
  content_str = str(content)
243
 
244
  if len(sample_grade['model']) >= j + 1:
@@ -292,14 +299,14 @@ def grade(file_obj, key, model, progress=gr.Progress()):
292
 
293
  cap_socres['std'] = std
294
  cap_socres['runs'] = runs
295
- df.loc[model] = cap_socres
296
 
297
 
298
  for k, v in cap_socres2.items():
299
  cap_socres2[k] = round(np.mean(np.array(v) / counter2[k] *100), decimal_places)
300
  cap_socres2['std'] = std
301
  cap_socres2['runs'] = runs
302
- df2.loc[model] = cap_socres2
303
 
304
  df.to_csv(cap_score_file)
305
  df2.to_csv(cap_int_score_file)
@@ -325,37 +332,25 @@ def grade(file_obj, key, model, progress=gr.Progress()):
325
 
326
  # --- Validate key and model before running grading ---
327
  def validate_key_and_model(key: str, model: str, api_base: str = None):
328
- openai.api_key = key.strip()
329
- if api_base and api_base.strip():
330
- openai.api_base = api_base.strip() # 用户自定义 api_base
331
- else:
332
- openai.api_base = "https://api.openai.com/v1" # ���认官方 OpenAI
333
-
334
  try:
335
- openai.Model.retrieve(model)
 
 
 
 
336
  return True, "OK"
337
- except openai.error.AuthenticationError:
338
- return False, "Invalid OpenAI API key. Please check and try again."
339
- except openai.error.InvalidRequestError as e:
340
- msg = str(e)
341
- if "does not exist" in msg or "You do not have access" in msg or "model_not_found" in msg:
342
- return False, f"API key is valid, but you do not have access to model `{model}`."
343
- return False, f"Invalid request: {msg}"
344
- except openai.error.RateLimitError:
345
- return False, "Rate limit or quota exceeded. Please try again later."
346
- except openai.error.APIConnectionError:
347
- return False, "Failed to connect to OpenAI service. Please check your network."
348
- except openai.error.OpenAIError as e:
349
- return False, f"OpenAI returned an error: {e}"
350
  except Exception as e:
351
- return False, f"Unexpected error: {e}"
352
 
353
  # --- Wrapper for the grading function ---
354
  def run_grade(file_obj, key, model, api_base, progress=gr.Progress(track_tqdm=True)):
355
- ok, msg = validate_key_and_model(key, model, api_base)
356
- if not ok:
357
- raise gr.Error(msg)
358
- return grade(file_obj, key, model, progress=progress)
 
 
 
359
 
360
 
361
  markdown = """
@@ -378,21 +373,48 @@ The grading results will be downloaded as a zip file.
378
 
379
  with gr.Blocks() as demo:
380
  gr.Markdown(markdown)
381
- key = gr.Textbox(label=f"Enter your OpenAI API Key (this space will not save your API Key). [Pay Attention]: this evaluaiton may cost several dollars, please notice your OpenAI API Key balance.", type="password")
382
- api_base = gr.Textbox(
383
- label="Enter your OpenAI API Base (leave empty to use official OpenAI)",
384
- value=""
385
- )
386
  model = gr.Dropdown(
387
- choices=["gpt-4-0613", "gpt-4.1", "gpt-4-turbo"],
388
  value="gpt-4.1",
389
- label="Select GPt-4 model version (gpt-4-0613 is the default in the paper). Price per 1M input tokens: gpt-4.1 $2.00, gpt-4-turbo $10.00, gpt-4-0613 $30.00"
390
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  with gr.Row():
392
- inp = gr.File(file_types=[".json"])
393
- out = gr.File(file_types=[".zip"])
 
394
  btn = gr.Button("Start grading", variant="primary")
395
- btn.click(fn=run_grade, inputs=[inp, key, model, api_base], outputs=out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
  if __name__ == "__main__":
398
  demo.queue(max_size=8).launch()
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
  import json
4
  import os
5
  import uuid
 
52
  threading.Thread(target=_clean, daemon=True).start()
53
 
54
 
55
+ def grade(file_obj, key, model, api_base, progress=gr.Progress()):
56
+ if "mmvet" in model:
57
+ # use our api key for users
58
+ key = os.environ.get("AZURE_OPENAI_KEY")
59
+ api_base = os.environ.get("AZURE_OPENAI_ENDPOINT")
60
+
61
+ client = OpenAI(
62
+ base_url=api_base.strip() if api_base and api_base.strip() else "https://api.openai.com/v1",
63
+ api_key=key.strip()
64
+ )
65
  gpt_model = model
66
 
67
  workdir = tempfile.mkdtemp(prefix="mmvet_grade_")
 
142
  # model_results_file = os.path.join(result_path, f"{model}.json")
143
  model_results_file = file_obj.name
144
 
145
+ grade_file = os.path.join(workdir, f'{model_name}_{gpt_model.replace("-mmvet", "")}-grade-{num_run}runs_{uid}.json')
146
+ cap_score_file = os.path.join(workdir, f'{model_name}_{sub_set_name}{gpt_model.replace("-mmvet", "")}-cap-score-{num_run}runs_{uid}.csv')
147
+ cap_int_score_file = os.path.join(workdir, f'{model_name}_{sub_set_name}{gpt_model.replace("-mmvet", "")}-cap-int-score-{num_run}runs_{uid}.csv')
148
  zip_file = os.path.join(workdir, f"results_{uid}.zip")
149
 
150
 
 
196
  num_sleep = 0
197
  while not grade_sample_run_complete:
198
  try:
199
+ response = client.chat.completions.create(
200
  model=gpt_model,
201
  # engine=gpt_model, # For Azure OpenAI
202
  max_tokens=3,
203
  temperature=temperature,
204
  messages=messages)
205
+ content = response.choices[0].message.content
206
  flag = True
207
  try_time = 1
208
  while flag:
 
217
  messages = [
218
  {"role": "user", "content": question},
219
  ]
220
+ response = client.chat.completions.create(
221
  model=gpt_model,
222
  # engine=gpt_model, # For Azure OpenAI
223
  max_tokens=3,
224
  temperature=temperature,
225
  messages=messages)
226
+ content = response.choices[0].message.content
227
  try_time += 1
228
  temperature += 0.5
229
  print(f"{id} try {try_time} times")
 
245
  time.sleep(5)
246
 
247
 
248
+ resp_model = (getattr(response, "model", None) or gpt_model)
249
  content_str = str(content)
250
 
251
  if len(sample_grade['model']) >= j + 1:
 
299
 
300
  cap_socres['std'] = std
301
  cap_socres['runs'] = runs
302
+ df.loc[gpt_model.replace("-mmvet", "")] = cap_socres
303
 
304
 
305
  for k, v in cap_socres2.items():
306
  cap_socres2[k] = round(np.mean(np.array(v) / counter2[k] *100), decimal_places)
307
  cap_socres2['std'] = std
308
  cap_socres2['runs'] = runs
309
+ df2.loc[gpt_model.replace("-mmvet", "")] = cap_socres2
310
 
311
  df.to_csv(cap_score_file)
312
  df2.to_csv(cap_int_score_file)
 
332
 
333
  # --- Validate key and model before running grading ---
334
  def validate_key_and_model(key: str, model: str, api_base: str = None):
 
 
 
 
 
 
335
  try:
336
+ client = OpenAI(
337
+ base_url=api_base.strip() if api_base and api_base.strip() else "https://api.openai.com/v1",
338
+ api_key=key.strip()
339
+ )
340
+ client.models.retrieve(model)
341
  return True, "OK"
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  except Exception as e:
343
+ return False, str(e)
344
 
345
  # --- Wrapper for the grading function ---
346
  def run_grade(file_obj, key, model, api_base, progress=gr.Progress(track_tqdm=True)):
347
+ if model is None:
348
+ model = "gpt-4.1-mmvet"
349
+ if "mmvet" not in model:
350
+ ok, msg = validate_key_and_model(key, model, api_base)
351
+ if not ok:
352
+ raise gr.Error(msg)
353
+ return grade(file_obj, key, model, api_base, progress=progress)
354
 
355
 
356
  markdown = """
 
373
 
374
  with gr.Blocks() as demo:
375
  gr.Markdown(markdown)
376
+ # Model selection
 
 
 
 
377
  model = gr.Dropdown(
378
+ choices=["gpt-4.1", "gpt-4-0613", "gpt-4-turbo"],
379
  value="gpt-4.1",
380
+ label="Select model (gpt-4.1 is free with our api key)"
381
  )
382
+
383
+ # User OpenAI fields (only for non-Azure models)
384
+ with gr.Row():
385
+ user_key = gr.Textbox(
386
+ label="Your OpenAI API Key (required for gpt-4-0613 (default in the paper) / gpt-4-turbo). The evaluation may cost several dollars, please notice your OpenAI API Key balance. 1M input tokens: gpt-4-turbo $10.00, gpt-4-0613 $30.00",
387
+ type="password",
388
+ visible=False
389
+ )
390
+ user_api_base = gr.Textbox(
391
+ label="Your OpenAI Base URL (optional, leave empty for official)",
392
+ value="",
393
+ visible=False
394
+ )
395
+
396
+ # File I/O
397
  with gr.Row():
398
+ inp = gr.File(file_types=[".json"], label="Upload your model result JSON")
399
+ out = gr.File(file_types=[".zip"], label="Download grading results")
400
+
401
  btn = gr.Button("Start grading", variant="primary")
402
+
403
+ # Toggle fields based on selection
404
+ def _toggle_fields(selected):
405
+ if selected == "gpt-4.1":
406
+ return gr.update(visible=False), gr.update(visible=False)
407
+ else:
408
+ return gr.update(visible=True), gr.update(visible=True)
409
+
410
+ model.change(_toggle_fields, inputs=[model], outputs=[user_key, user_api_base])
411
+
412
+ # Click handler
413
+ btn.click(
414
+ fn=run_grade,
415
+ inputs=[inp, model, user_key, user_api_base],
416
+ outputs=out
417
+ )
418
 
419
  if __name__ == "__main__":
420
  demo.queue(max_size=8).launch()
requirements.txt CHANGED
@@ -1 +1 @@
1
- openai==0.28.1
 
1
+ openai==1.108.1