!> Coulomb FMM state 更新と upward/downward pass。 module bem_coulomb_fmm_state_ops use bem_kinds, only: dp, i32 use bem_coulomb_fmm_types, only: fmm_plan_type, fmm_state_type, initialize_fmm_state, reset_fmm_state use bem_coulomb_fmm_tree_utils, only: active_tree_nnode, active_tree_max_depth, active_tree_level_bounds, & active_tree_level_node implicit none private public :: core_update_state_impl public :: core_destroy_state_impl contains !> ソース電荷から FMM state を更新する。 !! @param[in] plan 構築済みの FMM 計画。 !! @param[inout] state 更新対象の state。 !! @param[in] src_q ソース電荷量。 subroutine core_update_state_impl(plan, state, src_q) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state real(dp), intent(in) :: src_q(:) integer(i32) :: n_target_nodes if (.not. plan%built) error stop 'FMM plan is not built.' if (size(src_q) /= plan%nsrc) error stop 'src_q size does not match plan.' state%ready = .false. call ensure_state_capacity(plan, state) n_target_nodes = merge(plan%target_nnode, plan%nnode, plan%target_tree_ready) !$omp parallel default(none) shared(plan, state, src_q, n_target_nodes) if (associated(state%multipole_active)) then !$omp workshare state%multipole_active = 0_i32 !$omp end workshare end if if (associated(state%local_active)) then !$omp workshare state%local_active = 0_i32 !$omp end workshare end if if (plan%nsrc > 0_i32) then !$omp workshare state%src_q = src_q !$omp end workshare end if if (plan%nsource_leaf <= 0_i32 .and. associated(state%multipole)) then !$omp workshare state%multipole = 0.0d0 !$omp end workshare end if if (plan%m2l_pair_count <= 0_i32 .and. associated(state%local)) then !$omp workshare state%local = 0.0d0 !$omp end workshare end if call p2m_leaf_moments(plan, state) call m2m_upward_pass(plan, state) if (n_target_nodes > 0_i32) then call m2l_accumulate(plan, state) call apply_periodic_target_correction(plan, state) call l2l_downward_pass(plan, state) end if !$omp end parallel state%ready = .true. state%update_count = state%update_count + 1_i32 end subroutine core_update_state_impl !> FMM state に確保した資源を解放する。 !! @param[inout] state 解放対象の state。 subroutine core_destroy_state_impl(state) type(fmm_state_type), intent(inout) :: state call reset_fmm_state(state) end subroutine core_destroy_state_impl subroutine ensure_state_capacity(plan, state) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state integer(i32) :: n_target_nodes n_target_nodes = plan%nnode if (plan%target_tree_ready) n_target_nodes = plan%target_nnode if (state%update_count <= 0_i32) then call initialize_fmm_state(state) if (associated(state%src_q)) deallocate (state%src_q) if (associated(state%multipole)) deallocate (state%multipole) if (associated(state%multipole_active)) deallocate (state%multipole_active) if (associated(state%local)) deallocate (state%local) if (associated(state%local_active)) deallocate (state%local_active) allocate (state%src_q(plan%nsrc)) allocate (state%multipole(plan%ncoef, max(1_i32, plan%nnode))) allocate (state%multipole_active(max(1_i32, plan%nnode))) allocate (state%local(plan%ncoef, max(1_i32, n_target_nodes))) allocate (state%local_active(max(1_i32, n_target_nodes))) return end if if (associated(state%src_q)) then if (size(state%src_q) /= plan%nsrc) deallocate (state%src_q) end if if (.not. associated(state%src_q)) allocate (state%src_q(plan%nsrc)) if (associated(state%multipole)) then if (size(state%multipole, 1) /= plan%ncoef .or. size(state%multipole, 2) /= max(1_i32, plan%nnode)) then deallocate (state%multipole) end if end if if (.not. associated(state%multipole)) allocate (state%multipole(plan%ncoef, max(1_i32, plan%nnode))) if (associated(state%multipole_active)) then if (size(state%multipole_active) /= max(1_i32, plan%nnode)) deallocate (state%multipole_active) end if if (.not. associated(state%multipole_active)) allocate (state%multipole_active(max(1_i32, plan%nnode))) if (associated(state%local)) then if (size(state%local, 1) /= plan%ncoef .or. size(state%local, 2) /= max(1_i32, n_target_nodes)) then deallocate (state%local) end if end if if (.not. associated(state%local)) allocate (state%local(plan%ncoef, max(1_i32, n_target_nodes))) if (associated(state%local_active)) then if (size(state%local_active) /= max(1_i32, n_target_nodes)) deallocate (state%local_active) end if if (.not. associated(state%local_active)) allocate (state%local_active(max(1_i32, n_target_nodes))) end subroutine ensure_state_capacity subroutine p2m_leaf_moments(plan, state) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state integer(i32) :: leaf_idx, node_idx, p, idx, p_end, alpha_idx real(dp) :: leaf_multipole(plan%ncoef) logical :: leaf_active if (plan%nsource_leaf <= 0_i32) return !$omp do schedule(static) do leaf_idx = 1_i32, plan%nsource_leaf node_idx = plan%source_leaf_nodes(leaf_idx) leaf_multipole = 0.0d0 leaf_active = .false. p_end = plan%node_start(node_idx) + plan%node_count(node_idx) - 1_i32 do p = plan%node_start(node_idx), p_end idx = plan%elem_order(p) if (abs(state%src_q(idx)) <= tiny(1.0d0)) cycle do alpha_idx = 1_i32, plan%ncoef leaf_multipole(alpha_idx) = leaf_multipole(alpha_idx) + state%src_q(idx)*plan%source_p2m_basis(alpha_idx, p) end do end do leaf_active = coefficient_vector_is_active(leaf_multipole) state%multipole(:, node_idx) = leaf_multipole state%multipole_active(node_idx) = merge(1_i32, 0_i32, leaf_active) end do !$omp end do end subroutine p2m_leaf_moments subroutine m2m_upward_pass(plan, state) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state integer(i32) :: depth, level_pos, level_start_pos, level_end_pos integer(i32) :: node_idx, child_k, child_node, beta_idx, term_idx, alpha_idx, delta_idx real(dp) :: node_multipole(plan%ncoef) logical :: node_active if (plan%node_max_depth <= 0_i32) return do depth = plan%node_max_depth - 1_i32, 0_i32, -1_i32 level_start_pos = plan%node_level_start(depth + 1_i32) level_end_pos = plan%node_level_start(depth + 2_i32) - 1_i32 !$omp do schedule(static) do level_pos = level_start_pos, level_end_pos node_idx = plan%node_level_nodes(level_pos) if (plan%child_count(node_idx) <= 0_i32) cycle node_multipole = 0.0d0 do child_k = 1_i32, plan%child_count(node_idx) child_node = plan%child_idx(child_k, node_idx) if (state%multipole_active(child_node) == 0_i32) cycle do beta_idx = 1_i32, plan%ncoef do term_idx = 1_i32, plan%m2m_term_count(beta_idx) alpha_idx = plan%m2m_alpha_list(term_idx, beta_idx) delta_idx = plan%m2m_delta_list(term_idx, beta_idx) node_multipole(beta_idx) = node_multipole(beta_idx) & + state%multipole(alpha_idx, child_node) & *plan%source_shift_monomial(delta_idx, child_node) end do end do end do node_active = coefficient_vector_is_active(node_multipole) state%multipole(:, node_idx) = node_multipole state%multipole_active(node_idx) = merge(1_i32, 0_i32, node_active) end do !$omp end do end do end subroutine m2m_upward_pass subroutine m2l_accumulate(plan, state) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state integer(i32) :: t_node, pair_pos, pair_idx, s_node, alpha_idx, beta_idx, deriv_idx, n_target_nodes real(dp) :: local_acc(plan%ncoef), source_coeff logical :: local_active if (plan%m2l_pair_count <= 0_i32) return n_target_nodes = active_tree_nnode(plan, plan%target_tree_ready) !$omp do schedule(static) do t_node = 1_i32, n_target_nodes local_acc = 0.0d0 local_active = .false. do pair_pos = plan%m2l_target_start(t_node), plan%m2l_target_start(t_node + 1_i32) - 1_i32 pair_idx = plan%m2l_pair_order(pair_pos) s_node = plan%m2l_source_nodes(pair_idx) if (state%multipole_active(s_node) == 0_i32) cycle do beta_idx = 1_i32, plan%ncoef source_coeff = plan%alpha_sign(beta_idx)*state%multipole(beta_idx, s_node) if (abs(source_coeff) <= tiny(1.0d0)) cycle do alpha_idx = 1_i32, plan%ncoef deriv_idx = plan%alpha_beta_deriv_idx(alpha_idx, beta_idx) local_acc(alpha_idx) = local_acc(alpha_idx) + source_coeff*plan%m2l_deriv(deriv_idx, pair_idx) end do end do end do local_active = coefficient_vector_is_active(local_acc) state%local(:, t_node) = local_acc state%local_active(t_node) = merge(1_i32, 0_i32, local_active) end do !$omp end do end subroutine m2l_accumulate subroutine apply_periodic_target_correction(plan, state) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state integer(i32) :: target_idx, node_idx real(dp) :: node_local(plan%ncoef) logical :: node_active if (.not. plan%periodic_root_operator_ready) return if (size(state%local, 2) <= 0) return if (plan%periodic_root_target_count <= 0_i32) return if (plan%nnode <= 0_i32 .or. state%multipole_active(1_i32) == 0_i32) return !$omp do schedule(static) do target_idx = 1_i32, plan%periodic_root_target_count node_idx = plan%periodic_root_target_nodes(target_idx) node_local = state%local(:, node_idx) + matmul(plan%periodic_root_operator(:, :, target_idx), state%multipole(:, 1_i32)) node_active = coefficient_vector_is_active(node_local) state%local(:, node_idx) = node_local state%local_active(node_idx) = merge(1_i32, 0_i32, node_active) end do !$omp end do end subroutine apply_periodic_target_correction subroutine l2l_downward_pass(plan, state) type(fmm_plan_type), intent(in) :: plan type(fmm_state_type), intent(inout) :: state integer(i32) :: depth, max_depth, level_pos, level_start_pos, level_end_pos integer(i32) :: node_idx, parent_node, alpha_idx, term_idx, gamma_idx, delta_idx real(dp) :: parent_local(plan%ncoef), local_acc(plan%ncoef) logical :: use_target_tree logical :: parent_active, node_active use_target_tree = plan%target_tree_ready max_depth = active_tree_max_depth(plan, use_target_tree) if (max_depth <= 0_i32) return do depth = 1_i32, max_depth call active_tree_level_bounds(plan, use_target_tree, depth, level_start_pos, level_end_pos) !$omp do schedule(static) do level_pos = level_start_pos, level_end_pos node_idx = active_tree_level_node(plan, use_target_tree, level_pos) parent_node = plan%parent_of(node_idx) if (parent_node <= 0_i32) cycle parent_active = state%local_active(parent_node) /= 0_i32 node_active = state%local_active(node_idx) /= 0_i32 if (.not. parent_active .and. .not. node_active) cycle parent_local = state%local(:, parent_node) local_acc = state%local(:, node_idx) if (parent_active) then do alpha_idx = 1_i32, plan%ncoef do term_idx = 1_i32, plan%l2l_term_count(alpha_idx) gamma_idx = plan%l2l_gamma_list(term_idx, alpha_idx) delta_idx = plan%l2l_delta_list(term_idx, alpha_idx) local_acc(alpha_idx) = local_acc(alpha_idx) + parent_local(gamma_idx) & *plan%target_shift_monomial(delta_idx, node_idx) end do end do end if node_active = coefficient_vector_is_active(local_acc) state%local(:, node_idx) = local_acc state%local_active(node_idx) = merge(1_i32, 0_i32, node_active) end do !$omp end do end do end subroutine l2l_downward_pass pure logical function coefficient_vector_is_active(coeff) real(dp), intent(in) :: coeff(:) coefficient_vector_is_active = any(coeff /= 0.0d0) end function coefficient_vector_is_active end module bem_coulomb_fmm_state_ops