Skip to content

Commit 1023c9e

Browse files
authored
Merge pull request pytorch#104 from pjh5/mac_cron
Edits for mac-crons plus edits to tests
2 parents 266d6b9 + c2b887a commit 1023c9e

File tree

16 files changed

+491
-339
lines changed

16 files changed

+491
-339
lines changed

conda/build_pytorch.sh

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
9797
cd "$SOURCE_DIR"
9898

9999
# Determine which build folder to use, if not given it directly
100-
if [[ -n "$TORCH_CONDA_BUILD_FOLDER" ]]; then
101-
build_folder="$TORCH_CONDA_BUILD_FOLDER"
102-
elif [[ -n "$build_nightly" ]]; then
100+
if [[ -n "$build_nightly" ]]; then
103101
build_folder='pytorch-nightly'
104102
else
105103
if [[ "$OSTYPE" == 'darwin'* || "$desired_cuda" == '9.0' ]]; then
@@ -201,7 +199,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
201199
tests_to_skip=()
202200
if [[ "$ALLOW_DISTRIBUTED_TEST_ERRORS" ]]; then
203201
# Distributed tests don't work on the shared gpus of CI
204-
tests_to_skip+=("distributed" "c10d")
202+
tests_to_skip+=("distributed" "thd_distributed" "c10d")
205203
fi
206204
if [[ "$py_ver" == '2.7' ]]; then
207205
# test_wrong_return_type doesn't work on the latest conda python 2.7
@@ -212,12 +210,12 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
212210
if [[ -n "$RUN_TEST_PARAMS" ]]; then
213211
python test/run_test.py ${RUN_TEST_PARAMS[@]}
214212
elif [[ -n "$tests_to_skip" ]]; then
215-
python test/run_test.py -x ${tests_to_skip[@]}
213+
python test/run_test.py -v -x ${tests_to_skip[@]}
216214
set +e
217-
python test/run_test.py -i ${tests_to_skip[@]}
215+
python test/run_test.py -v -i ${tests_to_skip[@]}
218216
set -e
219217
else
220-
python test/run_test.py
218+
python test/run_test.py -v
221219
fi
222220
popd
223221

cron/README

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
1-
# At its simplest you can do
2-
./prep_nightlies.sh
3-
./build_multiple.sh manywheel all all
4-
./build_multiple.sh conda all all
5-
./upload.sh
1+
2+
3+
# To run all linux builds on a single machine in serial you can run
4+
./build_multiple.sh conda,manywheel all all
5+
6+
# To run all wheel builds on a single machine in serial you can run
7+
./build_multiple.sh conda,wheel all cpu
8+
9+
# The cron jobs are split amongst three linux workers indexed 0-2. To run the
10+
# tasks for the nth worker pass n to build_cron
11+
./build_cron.sh 0
12+
./build_cron.sh 1
13+
./build_cron.sh 2
14+
./build_cron.sh mac
15+
16+
# To run these from a cron job, actually call cron_start.sh, which will clone
17+
# the latest version of this builder repo and call /that/ repo's build_cron.sh
18+
5 0 * * * /this/location/cron/cron_start.sh mac

cron/build_cron.sh

Lines changed: 61 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,126 +1,103 @@
11
#!/bin/bash
22

3+
set -ex
4+
SOURCE_DIR=$(cd $(dirname $0) && pwd)
5+
source "${SOURCE_DIR}/nightly_defaults.sh"
6+
37
# Script hardcoded to the number of worker machines we have.
48
# Divides work amongst the workers and runs the jobs in parallel on each worker
59
#
6-
# Needs NIGHTLIES_FOLDER to point to /scratch/<username>/nightlies
10+
# Command line arguments
11+
# DESIRED_PYTHONS
12+
# All Python versions to build for, separated by commas, in format '2.7mu'
13+
# for manywheels or in format '2.7' for conda/mac-wheels e.g.
14+
# '2.7m,2.7mu,3.5m,3.6m' or '2.7,3.7' . This can also just be the word
15+
# 'all', which will expand to all supported python versions.
16+
#
17+
# DESIRED_CUDAS
18+
# All CUDA versions to build for including 'cpu', separated by commas, in
19+
# format 'cpu' or 'cu80' or 'cu92' etc. e.g. 'cpu,cu80,cu90' or 'cu90,cu92'
20+
# . This can also just be the word 'all', which will expand to all
21+
# supported cpu/CUDA versions.
22+
23+
# On mac there is only one machine, so not specifying which machine is fine
24+
if [[ "$(uname)" == 'Darwin' ]]; then
25+
which_worker='mac'
26+
else
27+
if [ "$#" -ne 1 ]; then
28+
echo "Illegal number of parameters. Require which worker I am [0-2] or 'mac'"
29+
echo "e.g. ./build_cron.sh 0"
30+
exit 1
31+
fi
32+
33+
which_worker=$1
34+
35+
# This file is hardcoded to exactly 3 linux workers and 1 mac worker
36+
if [[ "$which_worker" != 0 && "$which_worker" != 1 && "$which_worker" != 2 ]]; then
37+
echo "Illegal parameter. This script is made for exactly 3 workers."
38+
echo "You must give me a worker number out of [0, 1, 2] or 'mac'"
39+
exit 1
40+
fi
41+
fi
42+
43+
mkdir -p "${today}/logs" || true
44+
touch "${today}/logs/failed"
45+
46+
# Divy up the tasks
747
#
848
# There are currently 36 jobs and 3 machines
949
# Each machine should run its 12 jobs in 4 parallel batches, about
1050
# conda jobs and gpu jobs take longer
1151
#
12-
# The list of jobs is
13-
#all_tasks=(
14-
# 'manywheel 2.7m cpu '
15-
# 'manywheel 2.7m cu80'
16-
# 'manywheel 2.7m cu90'
17-
# 'manywheel 2.7m cu92'
18-
#
19-
# 'manywheel 2.7mu cpu '
20-
# 'manywheel 2.7mu cu80'
21-
# 'manywheel 2.7mu cu90'
22-
# 'manywheel 2.7mu cu92'
23-
#
24-
# 'manywheel 3.5m cpu '
25-
# 'manywheel 3.5m cu80'
26-
# 'manywheel 3.5m cu90'
27-
# 'manywheel 3.5m cu92'
28-
#
29-
# 'manywheel 3.6m cpu '
30-
# 'manywheel 3.6m cu80'
31-
# 'manywheel 3.6m cu90'
32-
# 'manywheel 3.6m cu92'
33-
#
34-
# 'manywheel 3.7m cpu '
35-
# 'manywheel 3.7m cu80'
36-
# 'manywheel 3.7m cu90'
37-
# 'manywheel 3.7m cu92'
38-
#
39-
# 'conda 2.7 cpu '
40-
# 'conda 2.7 cu80'
41-
# 'conda 2.7 cu90'
42-
# 'conda 2.7 cu92'
43-
#
44-
# 'conda 3.5 cpu '
45-
# 'conda 3.5 cu80'
46-
# 'conda 3.5 cu90'
47-
# 'conda 3.5 cu92'
48-
#
49-
# 'conda 3.6 cpu '
50-
# 'conda 3.6 cu80'
51-
# 'conda 3.6 cu90'
52-
# 'conda 3.6 cu92'
53-
#
54-
# 'conda 3.7 cpu '
55-
# 'conda 3.7 cu80'
56-
# 'conda 3.7 cu90'
57-
# 'conda 3.7 cu92'
58-
#)
52+
# The jobs is the combination of all:
53+
# manywheel X [2.7m 2.7mu 3.5m 3.6m 3.7m] X [cpu cu80 cu90 cu92]
54+
# conda X [2.7 3.5 3.6 3.7 ] X [cpu cu80 cu90 cu92]
55+
# wheel X [2.7 3.5 3.6 3.7 ] X [cpu ]
5956
#
6057
# cpu builds ~ 15 minutes. gpu builds > 1 hr
6158
# Try to divide the cpu jobs evenly among the tasks
62-
63-
set -ex
64-
65-
if [ "$#" -ne 1 ]; then
66-
echo "Illegal number of parameters. Require which worker I am [0-2]"
67-
echo "e.g. ./build_cron.sh 0"
68-
exit 1
69-
fi
70-
71-
which_worker=$1
72-
73-
# This file is hardcoded to exactly 3 workers
74-
if [[ "$which_worker" != 0 && "$which_worker" != 1 && "$which_worker" != 2 ]]; then
75-
echo "Illegal parameter. This script is made for exactly 3 workers."
76-
echo "You must give me a worker number out of [0, 1, 2]"
77-
exit 1
78-
fi
79-
80-
if [[ -z "$NIGHTLIES_FOLDER" ]]; then
81-
if [[ "$(uname)" == 'Darwin' ]]; then
82-
export NIGHTLIES_FOLDER='/Users/administrator/nightlies/'
83-
else
84-
export NIGHTLIES_FOLDER='/scratch/hellemn/nightlies'
85-
fi
86-
fi
87-
if [[ -z "$NIGHTLIES_DATE" ]]; then
88-
# cron can use a different time than is returned by `date`, so we save
89-
# the date that we're starting with so all builds use the same date
90-
export NIGHTLIES_DATE="$(date +%Y_%m_%d)"
91-
fi
92-
today="$NIGHTLIES_FOLDER/$NIGHTLIES_DATE"
93-
SOURCE_DIR=$(cd $(dirname $0) && pwd)
94-
95-
# Divy up the tasks
9659
if [[ "$which_worker" == 0 ]]; then
60+
# manywheel 2.7m,2.7mu,3.5m all
9761
tasks=(
9862
'manywheel 2.7m cpu,cu80,cu90'
9963
'manywheel 2.7mu cpu,cu80,cu90'
10064
'manywheel 3.5m cpu,cu80,cu90'
10165
'manywheel 2.7m,2.7mu,3.5m cu92'
10266
)
10367
elif [[ "$which_worker" == 1 ]]; then
68+
# manywheel 3.6m,3.7, all
69+
# conda 2.7 all
10470
tasks=(
10571
'manywheel 3.6m cpu,cu80,cu90'
10672
'manywheel 3.7m cpu,cu80,cu90'
10773
'conda 2.7 cpu,cu80,cu90'
10874
'manywheel 3.6m,3.7m cu92 -- conda 2.7 cu92'
10975
)
11076
elif [[ "$which_worker" == 2 ]]; then
77+
# conda 3.5,3.6,3.7 all
11178
tasks=(
11279
'conda 3.5 cpu,cu80,cu90'
11380
'conda 3.6 cpu,cu80,cu90'
11481
'conda 3.7 cpu,cu80,cu90'
11582
'conda 3.5,3.6,3.7 cu92'
11683
)
84+
elif [[ "$which_worker" == 'mac' ]]; then
85+
# wheel all
86+
# conda all cpu
87+
# 'conda 3.5,3.6,3.7 cpu'
88+
# 'wheel 3.7 cpu -- conda 2.7 cpu'
89+
tasks=(
90+
'wheel 2.7,3.5 cpu'
91+
'wheel 3.6,3.7 cpu'
92+
)
11793
fi
11894

11995
# Run the tasks
12096
log_root="$today/logs/master/worker_$which_worker"
12197
mkdir -p "$log_root"
122-
"$SOURCE_DIR/prep_nightlies.sh" 2>&1 | tee "$log_root/prep_nightlies.log"
12398
for task in "${tasks[@]}"; do
12499
log_file="$log_root/$(echo $task | tr ' ' '_' | tr -d ',-').log"
125-
"$SOURCE_DIR/build_multiple.sh" $task > "$log_file" 2>&1 &
100+
"${NIGHTLIES_BUILDER_ROOT}/cron/build_multiple.sh" $task > "$log_file" 2>&1 &
126101
done
102+
103+
# TODO capture PIDs of processes and wait for them to call upload.sh

0 commit comments

Comments
 (0)