Commit
·
8f4dd40
1
Parent(s):
19421d9
Update app.py
Browse files
app.py
CHANGED
|
@@ -522,8 +522,12 @@ def start_controller():
|
|
| 522 |
return subprocess.Popen(controller_command)
|
| 523 |
|
| 524 |
|
| 525 |
-
def start_worker(model_path: str):
|
| 526 |
logger.info(f"Starting the model worker for the model {model_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
worker_command = [
|
| 528 |
"python",
|
| 529 |
"-m",
|
|
@@ -535,9 +539,10 @@ def start_worker(model_path: str):
|
|
| 535 |
"--model-path",
|
| 536 |
model_path,
|
| 537 |
"--model-name",
|
| 538 |
-
|
| 539 |
-
"--load-4bit",
|
| 540 |
]
|
|
|
|
|
|
|
| 541 |
return subprocess.Popen(worker_command)
|
| 542 |
|
| 543 |
|
|
@@ -586,8 +591,10 @@ if __name__ == "__main__":
|
|
| 586 |
logger.info(f"args: {args}")
|
| 587 |
|
| 588 |
model_path = "liuhaotian/llava-v1.5-13b"
|
|
|
|
|
|
|
| 589 |
|
| 590 |
-
preload_models(model_path)
|
| 591 |
|
| 592 |
controller_proc = start_controller()
|
| 593 |
worker_proc = start_worker(model_path)
|
|
|
|
| 522 |
return subprocess.Popen(controller_command)
|
| 523 |
|
| 524 |
|
| 525 |
+
def start_worker(model_path: str, bits=16):
|
| 526 |
logger.info(f"Starting the model worker for the model {model_path}")
|
| 527 |
+
model_name = model_path.strip('/').split('/')[-1]
|
| 528 |
+
assert bits in [4, 8, 16], "It can be only loaded with 16-bit, 8-bit, and 4-bit."
|
| 529 |
+
if bits != 16:
|
| 530 |
+
model_name += f'-{bits}bit'
|
| 531 |
worker_command = [
|
| 532 |
"python",
|
| 533 |
"-m",
|
|
|
|
| 539 |
"--model-path",
|
| 540 |
model_path,
|
| 541 |
"--model-name",
|
| 542 |
+
model_name,
|
|
|
|
| 543 |
]
|
| 544 |
+
if bits != 16:
|
| 545 |
+
worker_command += [f'--load-{bits}bit']
|
| 546 |
return subprocess.Popen(worker_command)
|
| 547 |
|
| 548 |
|
|
|
|
| 591 |
logger.info(f"args: {args}")
|
| 592 |
|
| 593 |
model_path = "liuhaotian/llava-v1.5-13b"
|
| 594 |
+
bits = 4
|
| 595 |
+
# set bits=4 for T4, bits=8 for A10G (24G), and bits=16 for A100 (40G)
|
| 596 |
|
| 597 |
+
preload_models(model_path, bits=bits)
|
| 598 |
|
| 599 |
controller_proc = start_controller()
|
| 600 |
worker_proc = start_worker(model_path)
|