集計済みプロファイルを標準出力および CSV へ書き出す。
| Type | Intent | Optional | Attributes | Name | ||
|---|---|---|---|---|---|---|
| type(mpi_context), | intent(in) | :: | mpi |
subroutine perf_write_outputs(mpi) type(mpi_context), intent(in) :: mpi integer(i32) :: rank, world_size, region_idx integer(i32) :: call_sum(perf_region_count) real(dp) :: total_sum(perf_region_count), total_min(perf_region_count), total_max(perf_region_count) real(dp) :: total_mean, imbalance integer :: u, ios character(len=1024) :: path if (.not. perf_state%enabled) return call mpi_get_rank_size(rank, world_size, mpi) do region_idx = 1, perf_region_count total_sum(region_idx) = perf_state%regions(region_idx)%total_s total_min(region_idx) = perf_state%regions(region_idx)%total_s total_max(region_idx) = perf_state%regions(region_idx)%total_s call_sum(region_idx) = perf_state%regions(region_idx)%call_count end do call mpi_allreduce_sum_real_dp_array(mpi, total_sum) call mpi_allreduce_min_real_dp_array(mpi, total_min) call mpi_allreduce_max_real_dp_array(mpi, total_max) call mpi_allreduce_sum_i32_array(mpi, call_sum) if (.not. mpi_is_root(mpi)) return write (output_unit, '(a,es12.4,a,es12.4,a,i0,a,i0)') & 'performance: program_total(rank_max)=', total_max(perf_region_program_total), & ' simulation_total(rank_max)=', total_max(perf_region_simulation_total), & ' mpi=', world_size, ' omp=', perf_state%omp_max_threads if (.not. perf_state%write_files .or. len_trim(perf_state%output_dir) == 0) then flush (output_unit) return end if path = trim(perf_state%output_dir)//'/performance_profile.csv' open (newunit=u, file=trim(path), status='replace', action='write', iostat=ios) if (ios /= 0) error stop 'Failed to open performance_profile.csv.' write (u, '(a)') '# BEACH performance profile' write (u, '(a,i0)') '# mpi_world_size=', world_size write (u, '(a,i0)') '# omp_max_threads=', perf_state%omp_max_threads write (u, '(a)') '# use rank_max_s of simulation_total for scaling comparisons' write (u, '(a)') 'region,calls_sum,calls_mean,rank_min_s,rank_mean_s,rank_max_s,imbalance_ratio' do region_idx = 1, perf_region_count total_mean = total_sum(region_idx)/real(max(1_i32, world_size), dp) if (total_mean > tiny(1.0d0)) then imbalance = total_max(region_idx)/total_mean else imbalance = 0.0d0 end if write (u, '(a,a,i0,a,es24.16,a,es24.16,a,es24.16,a,es24.16,a,es24.16)') & trim(perf_region_names(region_idx)), ',', call_sum(region_idx), ',', & real(call_sum(region_idx), dp)/real(max(1_i32, world_size), dp), ',', & total_min(region_idx), ',', total_mean, ',', total_max(region_idx), ',', imbalance end do close (u) write (output_unit, '(a,a)') 'performance profile written to ', trim(path) flush (output_unit) end subroutine perf_write_outputs