...
 
Commits (14)
......@@ -49,6 +49,7 @@ else ()
set(MATIO_DEFAULT OFF)
endif ()
option(USE_MATIO "Enable Matlab output via libmatio?" ${MATIO_DEFAULT})
option(USE_TRACEANALYZER "Add instrumentation for Intel Traceanalyzer?" OFF)
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
......@@ -518,8 +519,19 @@ endif ()
add_subdirectory(tools/ssp)
add_executable(mpi-speed-test src/mpi-speed-test.cpp)
if (NOT WIN32)
# OpenMPI on trusty won't compile with -std=c++11 or later
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=796985
set_target_properties(mpi-speed-test PROPERTIES COMPILE_FLAGS -std=c++03)
endif ()
install(TARGETS mpi-speed-test DESTINATION bin)
if (USE_TRACEANALYZER)
set_target_properties(mpi-speed-test PROPERTIES LINK_FLAGS -trace)
set_target_properties(${MPI_MASTER_NAME} PROPERTIES LINK_FLAGS -trace)
endif ()
# CPack setup
set(CPACK_PACKAGE_NAME "fmigo")
# set(CPACK_PACKAGE_VENDOR "")
......
......@@ -21,3 +21,6 @@ else
ulimit -c unlimited
ulimit -n 2048 #should be some value greater than 2*max(Nseq), probably a power of two too
fi
# Grab configuration, for figuring if we have GPL enabled or not
for e in $(fmigo-mpi -e); do export "$e"; done
ml purge
ml load gimpi/2017b itac/2017.3.030
export PREFIX=$(pwd)/../build/local
alias python=$PREFIX/bin/python3
export PATH=$PREFIX/bin:$PATH
export MANPATH=$PREFIX/man:$MANPATH:
export LD_LIBRARY_PATH=$PREFIX/lib
export PYTHONHOME=$PREFIX
#export PYTHONPATH=$PYTHONPATH:$PREFIX/lib/python3.7/site-packages
export PYTHONPATH=$PREFIX/lib/python3.7/site-packages
# To build for HPC2N (abisko.hpc2n.umu.se or kebnekaise.hpc2n.umu.se), cd into here and run this script-
# To be able to use the compiled fmigo-mpi you'll anso want to source boilerplate.sh into your shell.
set -e
# Separated out so we can bring the required variables
# into the shell when fiddling with the build.
source boilerplate.sh
cd ..
mkdir -p build
cd build
wget --no-verbose --continue https://github.com/zeromq/libzmq/releases/download/v4.2.3/zeromq-4.2.3.tar.gz
wget --no-verbose --continue https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protobuf-all-3.6.1.tar.gz
wget --no-verbose --continue https://bootstrap.pypa.io/get-pip.py
wget --no-verbose --continue https://www.python.org/ftp/python/3.7.0/Python-3.7.0.tar.xz
sha256sum -c <<EOF
8f1e2b2aade4dbfde98d82366d61baef2f62e812530160d2e6d0a5bb24e40bc0 zeromq-4.2.3.tar.gz
fd65488e618032ac924879a3a94fa68550b3b5bcb445b93b7ddf3c925b1a351f protobuf-all-3.6.1.tar.gz
b89554206d31aeadb8bea05afe53552ed1370ff416b7b49d1abccc0d60b3dca8 get-pip.py
0382996d1ee6aafe59763426cf0139ffebe36984474d0ec4126dd1c40a8b3549 Python-3.7.0.tar.xz
EOF
if true
then
rm -rf zeromq-4.2.3
tar xfvz zeromq-4.2.3.tar.gz
pushd zeromq-4.2.3
./configure --prefix=$PREFIX
make -j12
make install
popd
fi
if true
then
rm -rf protobuf-3.6.1
tar xfvz protobuf-all-3.6.1.tar.gz
pushd protobuf-3.6.1
./configure --prefix=$PREFIX
make -j12
make install
popd
fi
if true
then
rm -rf Python-3.7.0
tar xfvJ Python-3.7.0.tar.xz
pushd Python-3.7.0
./configure --prefix=$PREFIX
make -j12
make install
pushd $PREFIX/bin
ln -s python3 python
popd
popd
fi
if true
then
python3 get-pip.py --prefix $PREFIX
fi
if true
then
pip3 install -r ../Buildstuff/requirements.txt
cmake .. -DCMAKE_CXX_FLAGS="-I$PREFIX/include" -DCMAKE_C_FLAGS="-I$PREFIX/include" -DCMAKE_EXE_LINKER_FLAGS="-L$PREFIX/lib" -DCMAKE_INSTALL_PREFIX=$PREFIX -DUSE_TRACEANALYZER=ON
# call cmake a second time to tell it not to add any "impi" stuff to the link commands
cmake .. -DMPI_CXX_LIBRARIES="" -DMPI_C_LIBRARIES="" -DMPI_CXX_LINK_FLAGS="" -DMPI_C_LINK_FLAGS="" -DMPI_EXTRA_LIBRARY=""
make -j12
make install
fi
6,4.906
12,7.517
24,14.049
48,29.287
96,45.746
192,94.484
200,100.349
220,113.541
240,121.644
255,132.839
256,131.019
260,137.556
300,206.166
310,224.828
320,230.597
330,232.501
340,243.008
384,314.487
768,844.736
t-an01 [~/pfs/local/src/fmigo/tests/umit-fmus/tests/perftest]$ for d in 6 12 24 48 96 192 200 220 240 255 256 260 300 310 320 330 384 768; do echo $d $(grep real fmigo-kinematic-N$d/*.out); done
6 real 0m4.906s
12 real 0m7.517s
24 real 0m14.049s
48 real 0m29.287s
96 real 0m45.746s
192 real 1m34.484s
200 real 1m40.349s
220 real 1m53.541s
240 real 2m1.644s
255 real 2m12.839s
256 real 2m11.019s
260 real 2m17.556s
300 real 3m26.166s
310 real 3m44.828s
320 real 3m50.597s
330 real 3m52.501s
340 real 4m3.008s
384 real 5m14.487s
768 real 14m4.736s
6,2.999
12,4.355
24,8.386
48,17.200
96,31.659
192,62.680
384,153.075
768,462.266
\ No newline at end of file
Like kinematic-chain-walltimes but no traceanalyzer instrumentation -> fast!
t-an01 [~/pfs/local/src/fmigo/tests/umit-fmus/tests/perftest]$ for d in 6 12 24 48 96 192 384 768; do echo $d $(grep real noninstrumented-fmigo-kinematic-N$d/*.out); done
6 real 0m2.999s
12 real 0m4.355s
24 real 0m8.386s
48 real 0m17.200s
96 real 0m31.659s
192 real 1m2.680s
384 real 2m33.075s
768 real 7m42.266s
......@@ -2,9 +2,6 @@
set -e
source boilerplate.sh
# Grab configuration, for figuring if we have GPL enabled or not
for e in $(fmigo-mpi -e); do export "$e"; done
if [ $USE_GPL -eq 1 ]
then
(cd tests/umit-fmus/me && ( ./test_me.sh || ( echo "failed modelExchange" && exit 1 ) ) )
......
......@@ -780,6 +780,11 @@ void fmitcp_master::parseArguments( int argc,
printf("USE_GPL=1\n");
#else
printf("USE_GPL=0\n");
#endif
#ifdef USE_MATIO
printf("USE_MATIO=1\n");
#else
printf("USE_MATIO=0\n");
#endif
exit(0);
......
#include <mpi.h>
#include <memory.h>
#include <string>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
//#define USE_ISEND //using MPI_Send() is actually faster on granular
......@@ -24,6 +27,25 @@ static void mpi_recv_string(int world_rank_in, int *world_rank_out, int *tag, st
if (tag) *tag = status.MPI_TAG;
}
static void delay(int us) {
timeval tv, tv2;
gettimeofday(&tv, NULL);
tv.tv_usec = tv.tv_usec + us;
if (tv.tv_usec > 1000000) {
tv.tv_usec -= 1000000;
tv.tv_sec++;
}
for (;;) {
gettimeofday(&tv2, NULL);
if (tv2.tv_usec >= tv.tv_usec &&
tv2.tv_sec >= tv.tv_sec) {
break;
}
}
}
int main(int argc, char *argv[]) {
//These were gathered by running perftest2.sh in MPI mode
//
......@@ -62,6 +84,13 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "world_size %i != 8, results may be inaccurate\n", world_size);
}
//fake delays in microseconds
int fake_master = 0, fake_server = 0;
if (argc >= 3) {
fake_master = atoi(argv[1]);
fake_server = atoi(argv[2]);
}
//max of all packet sizes
#define MAXSZ 13756
char *data = (char*)malloc(MAXSZ*N);
......@@ -116,6 +145,8 @@ int main(int argc, char *argv[]) {
}
#endif
delay(fake_master);
pingpongs++;
if (--sizes_out[out_ofs][1] == 0) {
out_ofs++;
......@@ -131,6 +162,10 @@ int main(int argc, char *argv[]) {
//fprintf(stderr, "server recv %i\n", world_rank);
mpi_recv_string(0, &rank, &tag, recv_str);
//fprintf(stderr, "server recv %i, %zu B\n", world_rank, recv_str.length());
//fake calculations
delay(fake_server);
memset(data, in_ofs, sizes_in[in_ofs][0]);
MPI_Send(data, sizes_in[in_ofs][0], MPI_CHAR, 0, 0, MPI_COMM_WORLD);
......
......@@ -12,11 +12,17 @@ mpiexec -np 2 fmigo-mpi -t 0.4 -d 0.1 -H $FMU > /dev/null
mpiexec -np 2 fmigo-mpi -t 0.4 -d 0.1 -f tikz $FMU > /dev/null
# Two copies of the same FMU, CSV with header
mpiexec -np 3 fmigo-mpi -t 0.4 -d 0.1 -H $FMU $FMU > /dev/null
# mat5 output with two FMUs
mpiexec -np 3 fmigo-mpi -t 0.4 -d 0.1 -f mat5 -o temp.mat $FMU $FMU
# mat5_zlib output with two FMUs
mpiexec -np 3 fmigo-mpi -t 0.4 -d 0.1 -f mat5_zlib -o temp.mat $FMU $FMU
rm temp.mat
if [ $USE_MATIO = 1 ]
then
# mat5 output with two FMUs
mpiexec -np 3 fmigo-mpi -t 0.4 -d 0.1 -f mat5 -o temp.mat $FMU $FMU
# mat5_zlib output with two FMUs
mpiexec -np 3 fmigo-mpi -t 0.4 -d 0.1 -f mat5_zlib -o temp.mat $FMU $FMU
rm temp.mat
else
echo "USE_MATIO=OFF, skipping mat5 test"
fi
echo alltypestest seems to work fine
#!/bin/bash
# https://www.hpc2n.umu.se/documentation/batchsystem/slurm-submit-file-design
# Project id
#SBATCH -A SNIC2018-5-35
#SBATCH -n 6
#SBATCH --time=00:10:00
# Purge modules before loading new ones in a script
#ml purge
#ml load intel/2017a itac/2018.1.017
#export VT_ROOT=/pfs/nobackup/home/t/thardin/vt
set -e
pushd ../../../../../
source boilerplate.sh
popd
./perftest3.sh
#mpiicc -trace mpi-speed-test.c -o mpi-speed-test-icc
#time mpirun -trace -n 8 ./mpi-speed-test-icc 1
set -e
#for N in 6 12 24 48 96
#for N in 192 384 768
#for N in 200 220 240 255 256 260
#for N in 300 340 380
#for N in 310 320 330
#for N in 340
for N in 768
#for N in 1536
do
D=fmigo-kinematic-N$N
mkdir -p $D
cp abisko-perftest3.sh perftest3.sh $D
(
cd $D
sed -i -e "s/-n 6/-n $N/;s|../../../../../|../../../../../../|" abisko-perftest3.sh
sed -i -e "s/^n=6/n=$N/;s|pushd ../../../..|pushd ../../../../..|;s|../../../../buildi/fmigo-mpi|../../../../../buildi/fmigo-mpi|" perftest3.sh
sbatch abisko-perftest3.sh
)
done
set -e
pushd ../../../..
source boilerplate.sh
popd
# Designed not to oversubscribe on 8-core machine (ThinkPad W540, granular)
n=6
N=$(bc <<< "$n - 1")
FMU=${FMUS_DIR}/gsl/clutch/clutch.fmu
FMUS="$FMU"
CONNS=
for i in $(seq 0 $(python <<< "print($N - 2)"))
do
ip1=$(python <<< "print($i + 1)")
CONNS="$CONNS -C shaft,$i,$ip1,x_s,v_s,a_s,force_in_s,x_e,v_e,a_e,force_in_e"
FMUS="$FMUS $FMU"
done
echo $(python <<< "print($N - 1)") kinematic connections
# Walltime test
# 100k steps
if true
then
#(
#for x in `seq 1 11`
#do
# 100k steps
time mpirun -np $(python <<< "print($N + 1)") ../../../../buildi/fmigo-mpi -t 1 -d 0.0001 -f none -l 2 -a - $FMUS <<< "$CONNS"
#done
#) 2>&1 | grep real | sort
fi
# Valgrind slow awesomeness
# 10k steps
if false
then
URIS=
for j in $(seq 1 $N)
do
PORT=$(python <<< "print(1023 + $j)")
if [ $j -eq 1 ]
then
valgrind --tool=callgrind --callgrind-out-file=fmigo-server-2.callgrind fmigo-server -p $PORT $EXTRA -l 4 $FMU &
else
fmigo-server -p $PORT $EXTRA $FMU &
fi
URIS="$URIS tcp://localhost:$PORT"
done
# 10k steps
time valgrind --tool=callgrind --callgrind-out-file=fmigo-master-2.callgrind fmigo-master -l 4 -t 5 -d 0.0005 -f none $CONNS $URIS
#time fmigo-master -l 4 -t 50 -d 0.0005 -f none -a - $URIS <<< "$CONNS"
# real 0m17,574s
#time perf record -a -F 999 -g -- fmigo-master -l 4 -t 5 -d 0.0005 -f none -a - $URIS <<< "$CONNS"
#perf script | ~/FlameGraph/stackcollapse-perf.pl > out.perf-folded
#~/FlameGraph/flamegraph.pl out.perf-folded > perf-fmigo-master.svg
fi
# Flamegraph test
if false
then
# real 0m41,211s
for x in `seq 1 5`
do
time perf record -a -F 5000 -g -- mpiexec -np $(python <<< "print($N + 1)") fmigo-mpi -t 10 -d 0.0005 -f none -a - $FMUS <<< "$CONNS"
done
perf script | ~/FlameGraph/stackcollapse-perf.pl > out.perf-folded-2
#~/FlameGraph/flamegraph.pl out.perf-folded-2 > perf-fmigo-mpi.svg
~/FlameGraph/difffolded.pl out.perf-folded out.perf-folded-2 | ~/FlameGraph/flamegraph.pl > perf-fmigo-mpi.svg
fi
#echo mpi-speed-test comparison:
#time mpiexec -np $(python <<< "print($N + 1)") mpi-speed-test
This diff is collapsed.