@@ -458,81 +458,6 @@ jobs:
458
458
python torchchat.py generate stories15M
459
459
python torchchat.py remove stories15m
460
460
461
- test-tinystories-eager :
462
- strategy :
463
- matrix :
464
- runner : [macos-12]
465
- runs-on : ${{matrix.runner}}
466
- steps :
467
- - name : Checkout repo
468
- uses : actions/checkout@v2
469
- - name : Setup Python
470
- uses : actions/setup-python@v2
471
- with :
472
- python-version : 3.11
473
- - name : Print machine info
474
- run : |
475
- uname -a
476
- if [ $(uname -s) == Darwin ]; then
477
- sysctl machdep.cpu.brand_string
478
- sysctl machdep.cpu.core_count
479
- fi
480
- - name : Install requirements
481
- run : |
482
- pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
483
- pip install -r requirements.txt
484
- python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
485
- - name : Download checkpoints
486
- run : |
487
- mkdir -p checkpoints/stories15M
488
- pushd checkpoints/stories15M
489
- wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
490
- wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
491
- popd
492
- - name : Run inference
493
- run : |
494
- export MODEL_PATH=checkpoints/stories15M/stories15M.pt
495
- export MODEL_NAME=stories15M
496
- export MODEL_DIR=/tmp
497
- for DTYPE in bfloat16 float16 float32; do
498
- # if [ $(uname -s) == Darwin ]; then
499
- # export DTYPE=float16
500
- # fi
501
- python3 torchchat.py generate --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0
502
-
503
- echo "******************************************"
504
- echo "******* Emb: channel-wise quantized ******"
505
- echo "******************************************"
506
- python3 torchchat.py generate --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
507
-
508
- echo "******************************************"
509
- echo "******** Emb: group-wise quantized *******"
510
- echo "******************************************"
511
- python3 torchchat.py generate --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
512
-
513
- echo "******************************************"
514
- echo "******* INT8 channel-wise quantized ******"
515
- echo "******************************************"
516
- python3 torchchat.py generate --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
517
-
518
- echo "******************************************"
519
- echo "******** INT8 group-wise quantized *******"
520
- echo "******************************************"
521
- python3 torchchat.py generate --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
522
-
523
- echo "******************************************"
524
- echo "******** INT4 group-wise quantized *******"
525
- echo "******************************************"
526
-
527
- echo "INT4 should work on MacOS on x86, but cannot be tested"
528
- echo "because nightlies are too old!"
529
-
530
- # python3 torchchat.py generate --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
531
-
532
- echo "tests complete for ${DTYPE}"
533
- done
534
-
535
- echo "tests complete for all dtypes!"
536
461
test-mps :
537
462
uses : pytorch/test-infra/.github/workflows/macos_job.yml@main
538
463
with :
0 commit comments