From 07ebf4dba823d58654824f8d5d5cf5289a3b83d3 Mon Sep 17 00:00:00 2001 From: betterpig Date: Thu, 28 Jul 2022 08:18:18 +0000 Subject: [PATCH 1/2] set parallel_job according to CUDA memory --- tools/windows/run_unittests.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index 3c1a95f3691d7..ed530c1077cbf 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -336,7 +336,13 @@ function run_unittest_gpu() { if [ "$2" == "" ]; then parallel_job=$parallel_level_base else - parallel_job=`expr $2 \* $parallel_level_base` + # set parallel_job according to CUDA memory and suggested parallel num, + # the latter is derived in linux server with 16G CUDA memory. + cuda_memory=$(nvidia-smi --query-gpu=memory.total --format=csv | tail -1 | awk -F ' ' '{print $1}') + parallel_job=$(($2 * $cuda_memory / 16000)) + if [$parallel_job -lt 1]; then + parallel_job=1 + fi fi echo "************************************************************************" echo "********These unittests run $parallel_job job each time with 1 GPU**********" From 057f97cf0c13fe86e8a4120bdd354f726aea1573 Mon Sep 17 00:00:00 2001 From: betterpig Date: Fri, 29 Jul 2022 02:27:31 +0000 Subject: [PATCH 2/2] fix bug: add whitespace between conten and [] or condition wont work --- tools/windows/run_unittests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index ed530c1077cbf..1b8da6aa51e98 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -340,7 +340,7 @@ function run_unittest_gpu() { # the latter is derived in linux server with 16G CUDA memory. cuda_memory=$(nvidia-smi --query-gpu=memory.total --format=csv | tail -1 | awk -F ' ' '{print $1}') parallel_job=$(($2 * $cuda_memory / 16000)) - if [$parallel_job -lt 1]; then + if [ $parallel_job -lt 1 ]; then parallel_job=1 fi fi