perf_write_outputs Subroutine

public subroutine perf_write_outputs(mpi)

集計済みプロファイルを標準出力および CSV へ書き出す。

Arguments

Type IntentOptional Attributes Name
type(mpi_context), intent(in) :: mpi

Calls

proc~~perf_write_outputs~~CallsGraph proc~perf_write_outputs perf_write_outputs proc~mpi_allreduce_max_real_dp_array mpi_allreduce_max_real_dp_array proc~perf_write_outputs->proc~mpi_allreduce_max_real_dp_array proc~mpi_allreduce_min_real_dp_array mpi_allreduce_min_real_dp_array proc~perf_write_outputs->proc~mpi_allreduce_min_real_dp_array proc~mpi_allreduce_sum_i32_array mpi_allreduce_sum_i32_array proc~perf_write_outputs->proc~mpi_allreduce_sum_i32_array proc~mpi_allreduce_sum_real_dp_array mpi_allreduce_sum_real_dp_array proc~perf_write_outputs->proc~mpi_allreduce_sum_real_dp_array proc~mpi_get_rank_size mpi_get_rank_size proc~perf_write_outputs->proc~mpi_get_rank_size proc~mpi_is_root mpi_is_root proc~perf_write_outputs->proc~mpi_is_root

Called by

proc~~perf_write_outputs~~CalledByGraph proc~perf_write_outputs perf_write_outputs program~main main program~main->proc~perf_write_outputs

Source Code

  subroutine perf_write_outputs(mpi)
    type(mpi_context), intent(in) :: mpi

    integer(i32) :: rank, world_size, region_idx
    integer(i32) :: call_sum(perf_region_count)
    real(dp) :: total_sum(perf_region_count), total_min(perf_region_count), total_max(perf_region_count)
    real(dp) :: total_mean, imbalance
    integer :: u, ios
    character(len=1024) :: path

    if (.not. perf_state%enabled) return

    call mpi_get_rank_size(rank, world_size, mpi)

    do region_idx = 1, perf_region_count
      total_sum(region_idx) = perf_state%regions(region_idx)%total_s
      total_min(region_idx) = perf_state%regions(region_idx)%total_s
      total_max(region_idx) = perf_state%regions(region_idx)%total_s
      call_sum(region_idx) = perf_state%regions(region_idx)%call_count
    end do

    call mpi_allreduce_sum_real_dp_array(mpi, total_sum)
    call mpi_allreduce_min_real_dp_array(mpi, total_min)
    call mpi_allreduce_max_real_dp_array(mpi, total_max)
    call mpi_allreduce_sum_i32_array(mpi, call_sum)

    if (.not. mpi_is_root(mpi)) return

    write (output_unit, '(a,es12.4,a,es12.4,a,i0,a,i0)') &
      'performance: program_total(rank_max)=', total_max(perf_region_program_total), &
      ' simulation_total(rank_max)=', total_max(perf_region_simulation_total), &
      ' mpi=', world_size, ' omp=', perf_state%omp_max_threads

    if (.not. perf_state%write_files .or. len_trim(perf_state%output_dir) == 0) then
      flush (output_unit)
      return
    end if

    path = trim(perf_state%output_dir)//'/performance_profile.csv'
    open (newunit=u, file=trim(path), status='replace', action='write', iostat=ios)
    if (ios /= 0) error stop 'Failed to open performance_profile.csv.'

    write (u, '(a)') '# BEACH performance profile'
    write (u, '(a,i0)') '# mpi_world_size=', world_size
    write (u, '(a,i0)') '# omp_max_threads=', perf_state%omp_max_threads
    write (u, '(a)') '# use rank_max_s of simulation_total for scaling comparisons'
    write (u, '(a)') 'region,calls_sum,calls_mean,rank_min_s,rank_mean_s,rank_max_s,imbalance_ratio'

    do region_idx = 1, perf_region_count
      total_mean = total_sum(region_idx)/real(max(1_i32, world_size), dp)
      if (total_mean > tiny(1.0d0)) then
        imbalance = total_max(region_idx)/total_mean
      else
        imbalance = 0.0d0
      end if
      write (u, '(a,a,i0,a,es24.16,a,es24.16,a,es24.16,a,es24.16,a,es24.16)') &
        trim(perf_region_names(region_idx)), ',', call_sum(region_idx), ',', &
        real(call_sum(region_idx), dp)/real(max(1_i32, world_size), dp), ',', &
        total_min(region_idx), ',', total_mean, ',', total_max(region_idx), ',', imbalance
    end do

    close (u)
    write (output_unit, '(a,a)') 'performance profile written to ', trim(path)
    flush (output_unit)
  end subroutine perf_write_outputs