deal.II version 9.7.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
dof_info.cc
Go to the documentation of this file.
1// ------------------------------------------------------------------------
2//
3// SPDX-License-Identifier: LGPL-2.1-or-later
4// Copyright (C) 2020 - 2025 by the deal.II authors
5//
6// This file is part of the deal.II library.
7//
8// Part of the source code is dual licensed under Apache-2.0 WITH
9// LLVM-exception OR LGPL-2.1-or-later. Detailed license information
10// governing the source code and code contributions can be found in
11// LICENSE.md and CONTRIBUTING.md at the top level directory of deal.II.
12//
13// ------------------------------------------------------------------------
14
15
19
21
24
25#include <deal.II/matrix_free/dof_info.templates.h>
27
28#include <iostream>
29
31
32namespace internal
33{
34 namespace MatrixFreeFunctions
35 {
36 // ensure that the type defined in both dof_info.h and
37 // hanging_nodes_internal.h is consistent
38 static_assert(std::is_same_v<compressed_constraint_kind, std::uint8_t>,
39 "Unexpected type for compressed hanging node indicators!");
40
41
42
44 {
45 clear();
46 }
47
48
49
50 void
52 {
53 row_starts.clear();
54 dof_indices.clear();
56 vector_partitioner.reset();
57 ghost_dofs.clear();
58 dofs_per_cell.clear();
59 dofs_per_face.clear();
63 n_components.clear();
64 start_components.clear();
66 plain_dof_indices.clear();
68 for (unsigned int i = 0; i < 3; ++i)
69 {
70 index_storage_variants[i].clear();
71 dof_indices_contiguous[i].clear();
74 }
75 store_plain_indices = false;
77 max_fe_index = 0;
78 fe_index_conversion.clear();
79 }
80
81
82
83 void
84 DoFInfo::get_dof_indices_on_cell_batch(std::vector<unsigned int> &my_rows,
85 const unsigned int cell,
86 const bool apply_constraints) const
87 {
88 const unsigned int n_fe_components = start_components.back();
89 const unsigned int fe_index =
90 dofs_per_cell.size() == 1 ? 0 : cell_active_fe_index[cell];
91 const unsigned int dofs_this_cell = dofs_per_cell[fe_index];
92
93 const unsigned int n_vectorization = vectorization_length;
94 constexpr auto dof_access_index = dof_access_cell;
96 n_vectorization_lanes_filled[dof_access_index].size());
97 const unsigned int n_vectorization_actual =
98 n_vectorization_lanes_filled[dof_access_index][cell];
99
100 // we might have constraints, so the final number
101 // of indices is not known a priori.
102 // conservatively reserve the maximum without constraints
103 my_rows.reserve(n_vectorization * dofs_this_cell);
104 my_rows.resize(0);
105 unsigned int total_size = 0;
106 for (unsigned int v = 0; v < n_vectorization_actual; ++v)
107 {
108 const unsigned int ib =
109 (cell * n_vectorization + v) * n_fe_components;
110 const unsigned int ie =
111 (cell * n_vectorization + v + 1) * n_fe_components;
112
113 // figure out constraints by comparing constraint_indicator row
114 // shift for this cell within the block as compared to the next
115 // one
116 const bool has_constraints =
117 (hanging_node_constraint_masks.size() != 0 &&
119 hanging_node_constraint_masks[cell * n_vectorization + v] !=
121 hanging_node_constraint_masks_comp[fe_index][0 /*TODO*/]) ||
122 (row_starts[ib].second != row_starts[ib + n_fe_components].second);
123
124 auto do_copy = [&](const unsigned int *begin,
125 const unsigned int *end) {
126 const unsigned int shift = total_size;
127 total_size += (end - begin);
128 my_rows.resize(total_size);
129 std::copy(begin, end, my_rows.begin() + shift);
130 };
131
132 if (!has_constraints || apply_constraints)
133 {
134 const unsigned int *begin =
135 dof_indices.data() + row_starts[ib].first;
136 const unsigned int *end =
137 dof_indices.data() + row_starts[ie].first;
138 do_copy(begin, end);
139 }
140 else
141 {
142 Assert(row_starts_plain_indices[cell * n_vectorization + v] !=
145 const unsigned int *begin =
146 plain_dof_indices.data() +
147 row_starts_plain_indices[cell * n_vectorization + v];
148 const unsigned int *end = begin + dofs_this_cell;
149 do_copy(begin, end);
150 }
151 }
152 }
153
154
155
156 void
157 DoFInfo::assign_ghosts(const std::vector<unsigned int> &boundary_cells,
158 const MPI_Comm communicator_sm,
159 const bool use_vector_data_exchanger_full)
160 {
161 Assert(boundary_cells.size() < row_starts.size(), ExcInternalError());
162
163 // sort ghost dofs and compress out duplicates
164 const unsigned int n_owned = (vector_partitioner->local_range().second -
165 vector_partitioner->local_range().first);
166 const std::size_t n_ghosts = ghost_dofs.size();
167 if constexpr (running_in_debug_mode())
168 {
169 for (const auto dof_index : dof_indices)
170 AssertIndexRange(dof_index, n_owned + n_ghosts);
171 }
172
173 const unsigned int n_components = start_components.back();
174 std::vector<unsigned int> ghost_numbering(n_ghosts);
175 IndexSet ghost_indices(vector_partitioner->size());
176 if (n_ghosts > 0)
177 {
178 unsigned int n_unique_ghosts = 0;
179 // since we need to go back to the local_to_global indices and
180 // replace the temporary numbering of ghosts by the real number in
181 // the index set, we need to store these values
182 std::vector<std::pair<types::global_dof_index, unsigned int>>
183 ghost_origin(n_ghosts);
184 for (std::size_t i = 0; i < n_ghosts; ++i)
185 {
186 ghost_origin[i].first = ghost_dofs[i];
187 ghost_origin[i].second = i;
188 }
189 std::sort(ghost_origin.begin(), ghost_origin.end());
190
191 types::global_dof_index last_contiguous_start = ghost_origin[0].first;
192 ghost_numbering[ghost_origin[0].second] = 0;
193 for (std::size_t i = 1; i < n_ghosts; ++i)
194 {
195 if (ghost_origin[i].first > ghost_origin[i - 1].first + 1)
196 {
197 ghost_indices.add_range(last_contiguous_start,
198 ghost_origin[i - 1].first + 1);
199 last_contiguous_start = ghost_origin[i].first;
200 }
201 if (ghost_origin[i].first > ghost_origin[i - 1].first)
202 ++n_unique_ghosts;
203 ghost_numbering[ghost_origin[i].second] = n_unique_ghosts;
204 }
205 ++n_unique_ghosts;
206 ghost_indices.add_range(last_contiguous_start,
207 ghost_origin.back().first + 1);
208 ghost_indices.compress();
209
210 // make sure that we got the correct local numbering of the ghost
211 // dofs. the ghost index set should store the same number
212 {
213 AssertDimension(n_unique_ghosts, ghost_indices.n_elements());
214 for (std::size_t i = 0; i < n_ghosts; ++i)
215 Assert(ghost_numbering[i] ==
216 ghost_indices.index_within_set(ghost_dofs[i]),
218 }
219
220 // apply correct numbering for ghost indices: We previously just
221 // enumerated them according to their appearance in the
222 // local_to_global structure. Above, we derived a relation between
223 // this enumeration and the actual number
224 const unsigned int n_boundary_cells = boundary_cells.size();
225 for (unsigned int i = 0; i < n_boundary_cells; ++i)
226 {
227 unsigned int *data_ptr =
228 dof_indices.data() +
229 row_starts[boundary_cells[i] * n_components].first;
230 const unsigned int *row_end =
231 dof_indices.data() +
232 row_starts[(boundary_cells[i] + 1) * n_components].first;
233 for (; data_ptr != row_end; ++data_ptr)
234 *data_ptr = ((*data_ptr < n_owned) ?
235 *data_ptr :
236 n_owned + ghost_numbering[*data_ptr - n_owned]);
237
238 // now the same procedure for plain indices
239 if (store_plain_indices == true)
240 {
241 bool has_hanging_nodes = false;
242
243 const unsigned int fe_index =
244 (cell_active_fe_index.empty() ||
245 dofs_per_cell.size() == 1) ?
246 0 :
247 cell_active_fe_index[boundary_cells[i]];
248 AssertIndexRange(fe_index, dofs_per_cell.size());
249
250 if (hanging_node_constraint_masks.size() > 0 &&
252 hanging_node_constraint_masks[boundary_cells[i]] !=
254 for (unsigned int comp = 0; comp < n_components; ++comp)
255 has_hanging_nodes |=
257
258 if (has_hanging_nodes ||
259 row_starts[boundary_cells[i] * n_components].second !=
260 row_starts[(boundary_cells[i] + 1) * n_components]
261 .second)
262 {
263 unsigned int *data_ptr =
264 plain_dof_indices.data() +
265 row_starts_plain_indices[boundary_cells[i]];
266 const unsigned int *row_end =
267 data_ptr + dofs_per_cell[fe_index];
268 for (; data_ptr != row_end; ++data_ptr)
269 *data_ptr =
270 ((*data_ptr < n_owned) ?
271 *data_ptr :
272 n_owned + ghost_numbering[*data_ptr - n_owned]);
273 }
274 }
275 }
276 }
277
278 std::vector<types::global_dof_index> empty;
279 ghost_dofs.swap(empty);
280
281 // set the ghost indices now. need to cast away constness here, but that
282 // is uncritical since we reset the Partitioner in the same initialize
283 // call as this call here.
286 vec_part->set_ghost_indices(ghost_indices);
287
288 if (use_vector_data_exchanger_full == false)
289 vector_exchanger = std::make_shared<
292 else
294 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
295 vector_partitioner, communicator_sm);
296 }
297
298
299
300 void
302 const TaskInfo &task_info,
303 const std::vector<unsigned int> &renumbering,
304 const std::vector<unsigned int> &constraint_pool_row_index,
305 const std::vector<unsigned char> &irregular_cells)
306 {
307 // first reorder the active FE index.
308 const bool have_hp = dofs_per_cell.size() > 1;
309 if (cell_active_fe_index.size() > 0)
310 {
311 std::vector<unsigned int> new_active_fe_index;
312 new_active_fe_index.reserve(task_info.cell_partition_data.back());
313 unsigned int position_cell = 0;
314 for (unsigned int cell = 0;
315 cell < task_info.cell_partition_data.back();
316 ++cell)
317 {
318 const unsigned int n_comp =
319 (irregular_cells[cell] > 0 ? irregular_cells[cell] :
321
322 // take maximum FE index among the ones present (we might have
323 // lumped some lower indices into higher ones)
324 unsigned int fe_index =
325 cell_active_fe_index[renumbering[position_cell]];
326 for (unsigned int j = 1; j < n_comp; ++j)
327 fe_index = std::max(
328 fe_index,
329 cell_active_fe_index[renumbering[position_cell + j]]);
330
331 new_active_fe_index.push_back(fe_index);
332 position_cell += n_comp;
333 }
334 std::swap(new_active_fe_index, cell_active_fe_index);
335 }
336 if (have_hp)
338 task_info.cell_partition_data.back());
339
340 const unsigned int n_components = start_components.back();
341
342 std::vector<std::pair<unsigned int, unsigned int>> new_row_starts(
344 task_info.cell_partition_data.back() +
345 1);
346 std::vector<unsigned int> new_dof_indices;
347 std::vector<std::pair<unsigned short, unsigned short>>
348 new_constraint_indicator;
349 std::vector<unsigned int> new_plain_indices, new_rowstart_plain;
350 unsigned int position_cell = 0;
351 new_dof_indices.reserve(dof_indices.size());
352 new_constraint_indicator.reserve(constraint_indicator.size());
353
354 std::vector<compressed_constraint_kind> new_hanging_node_constraint_masks;
355 new_hanging_node_constraint_masks.reserve(
357
358 if (store_plain_indices == true)
359 {
360 new_rowstart_plain.resize(vectorization_length *
361 task_info.cell_partition_data.back() +
362 1,
364 new_plain_indices.reserve(plain_dof_indices.size());
365 }
366
367 // copy the indices and the constraint indicators to the new data field,
368 // where we will go through the cells in the renumbered way. in case the
369 // vectorization length does not exactly match up, we fill invalid
370 // numbers to the rowstart data. for contiguous cell indices, we skip
371 // the rowstarts field completely and directly go into the
372 // new_dof_indices field (this layout is used in FEEvaluation).
373 for (unsigned int i = 0; i < task_info.cell_partition_data.back(); ++i)
374 {
375 const unsigned int n_vect =
376 (irregular_cells[i] > 0 ? irregular_cells[i] :
378 const unsigned int dofs_per_cell =
379 have_hp ? this->dofs_per_cell[cell_active_fe_index[i]] :
380 this->dofs_per_cell[0];
381
382 for (unsigned int j = 0; j < n_vect; ++j)
383 {
384 const unsigned int cell_no =
385 renumbering[position_cell + j] * n_components;
386
387 bool has_hanging_nodes = false;
388
389 if (hanging_node_constraint_masks.size() > 0 &&
391 {
392 const auto mask =
393 hanging_node_constraint_masks[renumbering[position_cell +
394 j]];
395 new_hanging_node_constraint_masks.push_back(mask);
396
398 for (unsigned int comp = 0; comp < n_components; ++comp)
399 has_hanging_nodes |= hanging_node_constraint_masks_comp
400 [have_hp ? cell_active_fe_index[i] : 0][comp];
401 }
402
403 for (unsigned int comp = 0; comp < n_components; ++comp)
404 {
405 new_row_starts[(i * vectorization_length + j) * n_components +
406 comp]
407 .first = new_dof_indices.size();
408 new_row_starts[(i * vectorization_length + j) * n_components +
409 comp]
410 .second = new_constraint_indicator.size();
411
412 new_dof_indices.insert(
413 new_dof_indices.end(),
414 dof_indices.data() + row_starts[cell_no + comp].first,
415 dof_indices.data() + row_starts[cell_no + comp + 1].first);
416 for (unsigned int index = row_starts[cell_no + comp].second;
417 index != row_starts[cell_no + comp + 1].second;
418 ++index)
419 new_constraint_indicator.push_back(
421 }
423 ((row_starts[cell_no].second !=
424 row_starts[cell_no + n_components].second) ||
425 has_hanging_nodes))
426 {
427 new_rowstart_plain[i * vectorization_length + j] =
428 new_plain_indices.size();
429 new_plain_indices.insert(
430 new_plain_indices.end(),
431 plain_dof_indices.data() +
433 plain_dof_indices.data() +
436 }
437 }
438 for (unsigned int j = n_vect; j < vectorization_length; ++j)
439 for (unsigned int comp = 0; comp < n_components; ++comp)
440 {
441 new_row_starts[(i * vectorization_length + j) * n_components +
442 comp]
443 .first = new_dof_indices.size();
444 new_row_starts[(i * vectorization_length + j) * n_components +
445 comp]
446 .second = new_constraint_indicator.size();
447 }
448
449 for (unsigned int j = n_vect; j < vectorization_length; ++j)
450 if (hanging_node_constraint_masks.size() > 0)
451 new_hanging_node_constraint_masks.push_back(
453
454 position_cell += n_vect;
455 }
456 AssertDimension(position_cell * n_components + 1, row_starts.size());
457
458 AssertDimension(dof_indices.size(), new_dof_indices.size());
459 new_row_starts[task_info.cell_partition_data.back() *
461 .first = new_dof_indices.size();
462 new_row_starts[task_info.cell_partition_data.back() *
464 .second = new_constraint_indicator.size();
465
467 new_constraint_indicator.size());
468
469 new_row_starts.swap(row_starts);
470 new_dof_indices.swap(dof_indices);
471 new_constraint_indicator.swap(constraint_indicator);
472 new_plain_indices.swap(plain_dof_indices);
473 new_rowstart_plain.swap(row_starts_plain_indices);
474 new_hanging_node_constraint_masks.swap(hanging_node_constraint_masks);
475
476 if constexpr (running_in_debug_mode())
477 {
478 // sanity check 1: all indices should be smaller than the number of
479 // dofs locally owned plus the number of ghosts
480 const unsigned int index_range =
481 (vector_partitioner->local_range().second -
482 vector_partitioner->local_range().first) +
483 vector_partitioner->ghost_indices().n_elements();
484 for (const auto dof_index : dof_indices)
485 AssertIndexRange(dof_index, index_range);
486
487 // sanity check 2: for the constraint indicators, the first index
488 // should be smaller than the number of indices in the row, and the
489 // second index should be smaller than the number of constraints in
490 // the constraint pool.
491 for (unsigned int row = 0; row < task_info.cell_partition_data.back();
492 ++row)
493 {
494 const unsigned int row_length_ind =
496 .first -
500 .second,
501 constraint_indicator.size() + 1);
502 const std::pair<unsigned short, unsigned short>
503 *con_it =
504 constraint_indicator.data() +
506 *end_con =
507 constraint_indicator.data() +
509 .second;
510 for (; con_it != end_con; ++con_it)
511 {
512 AssertIndexRange(con_it->first, row_length_ind + 1);
513 AssertIndexRange(con_it->second,
514 constraint_pool_row_index.size() - 1);
515 }
516 }
517
518 // sanity check 3: check the number of cells once again
519 unsigned int n_active_cells = 0;
520 for (unsigned int c = 0;
521 c < *(task_info.cell_partition_data.end() - 2);
522 ++c)
523 if (irregular_cells[c] > 0)
524 n_active_cells += irregular_cells[c];
525 else
526 n_active_cells += vectorization_length;
527 AssertDimension(n_active_cells, task_info.n_active_cells);
528 }
529
530 compute_cell_index_compression(irregular_cells);
531 }
532
533
534
535 void
537 const std::vector<unsigned char> &irregular_cells)
538 {
539 const bool have_hp = dofs_per_cell.size() > 1;
540 const unsigned int n_components = start_components.back();
541
543 row_starts.size() % vectorization_length == 1,
545 if (vectorization_length > 1)
547 irregular_cells.size());
549 irregular_cells.size(), IndexStorageVariants::full);
551 irregular_cells.size());
552 for (unsigned int i = 0; i < irregular_cells.size(); ++i)
553 if (irregular_cells[i] > 0)
554 n_vectorization_lanes_filled[dof_access_cell][i] = irregular_cells[i];
555 else
558
560 irregular_cells.size() * vectorization_length,
565 irregular_cells.size() * vectorization_length,
567
568 std::vector<unsigned int> index_kinds(
569 static_cast<unsigned int>(
571 1);
572 std::vector<unsigned int> offsets(vectorization_length);
573 for (unsigned int i = 0; i < irregular_cells.size(); ++i)
574 {
575 const unsigned int ndofs =
576 dofs_per_cell[have_hp ? cell_active_fe_index[i] : 0];
577 const unsigned int n_comp =
579
580 // check 1: Check if there are constraints -> no compression possible
581 bool has_constraints = false;
582 for (unsigned int j = 0; j < n_comp; ++j)
583 {
584 const unsigned int cell_no = i * vectorization_length + j;
585 if (row_starts[cell_no * n_components].second !=
586 row_starts[(cell_no + 1) * n_components].second)
587 {
588 has_constraints = true;
589 break;
590 }
591 }
592 if (has_constraints)
595 else
596 {
597 bool indices_are_contiguous = (ndofs > 0);
598 for (unsigned int j = 0; j < n_comp; ++j)
599 {
600 const unsigned int cell_no = i * vectorization_length + j;
601 const unsigned int *dof_indices =
602 this->dof_indices.data() +
603 row_starts[cell_no * n_components].first;
605 ndofs,
606 row_starts[(cell_no + 1) * n_components].first -
607 row_starts[cell_no * n_components].first);
608 for (unsigned int i = 1; i < ndofs; ++i)
609 if (dof_indices[i] != dof_indices[0] + i)
610 {
611 indices_are_contiguous = false;
612 break;
613 }
614 }
615
616 bool indices_are_interleaved_and_contiguous =
617 (ndofs > 1 && n_comp == vectorization_length);
618
619 {
620 const unsigned int *dof_indices =
621 this->dof_indices.data() +
623 for (unsigned int k = 0;
624 k < ndofs && indices_are_interleaved_and_contiguous;
625 ++k)
626 for (unsigned int j = 0; j < n_comp; ++j)
627 if (dof_indices[j * ndofs + k] !=
628 dof_indices[0] + k * n_comp + j)
629 {
630 indices_are_interleaved_and_contiguous = false;
631 break;
632 }
633 }
634
635 if (indices_are_contiguous ||
636 indices_are_interleaved_and_contiguous)
637 {
638 for (unsigned int j = 0; j < n_comp; ++j)
639 {
640 const unsigned int start_index =
643 .first;
644 AssertIndexRange(start_index, dof_indices.size());
646 [i * vectorization_length + j] =
647 this->dof_indices.empty() ?
648 0 :
649 this->dof_indices[start_index];
650 }
651 }
652
653 if (indices_are_interleaved_and_contiguous)
654 {
658 for (unsigned int j = 0; j < n_comp; ++j)
660 j] = n_comp;
661 }
662 else if (indices_are_contiguous)
663 {
666 for (unsigned int j = 0; j < n_comp; ++j)
668 j] = 1;
669 }
670 else if (ndofs > 0)
671 {
672 int indices_are_interleaved_and_mixed = 2;
673 const unsigned int *dof_indices =
674 &this->dof_indices[row_starts[i * vectorization_length *
676 .first];
677 for (unsigned int j = 0; j < n_comp; ++j)
678 offsets[j] =
679 dof_indices[j * ndofs + 1] - dof_indices[j * ndofs];
680 for (unsigned int k = 0;
681 k < ndofs && indices_are_interleaved_and_mixed != 0;
682 ++k)
683 for (unsigned int j = 0; j < n_comp; ++j)
684 // the first if case is to avoid negative offsets
685 // (invalid)
686 if (dof_indices[j * ndofs + 1] < dof_indices[j * ndofs] ||
687 dof_indices[j * ndofs + k] !=
688 dof_indices[j * ndofs] + k * offsets[j])
689 {
690 indices_are_interleaved_and_mixed = 0;
691 break;
692 }
693 if (indices_are_interleaved_and_mixed == 2)
694 {
695 for (unsigned int j = 0; j < n_comp; ++j)
698 offsets[j];
699 for (unsigned int j = 0; j < n_comp; ++j)
701 [i * vectorization_length + j] =
702 dof_indices[j * ndofs];
703 for (unsigned int j = 0; j < n_comp; ++j)
704 if (offsets[j] != vectorization_length)
705 {
706 indices_are_interleaved_and_mixed = 1;
707 break;
708 }
709 if (indices_are_interleaved_and_mixed == 1 ||
710 n_comp != vectorization_length)
712 IndexStorageVariants::
713 interleaved_contiguous_mixed_strides;
714 else
717 }
718 else
719 {
720 const unsigned int *dof_indices =
721 this->dof_indices.data() +
723 .first;
724 if (n_comp == vectorization_length)
727 else
730
731 // do not use interleaved storage if two entries within
732 // vectorized array point to the same index (scatter not
733 // possible due to race condition)
734 for (const unsigned int *indices = dof_indices;
735 indices != dof_indices + ndofs;
736 ++indices)
737 {
738 bool is_sorted = true;
739 unsigned int previous = indices[0];
740 for (unsigned int l = 1; l < n_comp; ++l)
741 {
742 const unsigned int current = indices[l * ndofs];
743 if (current <= previous)
744 is_sorted = false;
745
746 // the simple check failed, must compare all
747 // indices manually - due to short sizes this
748 // O(n^2) algorithm is better than sorting
749 if (!is_sorted)
750 for (unsigned int j = 0; j < l; ++j)
751 if (indices[j * ndofs] == current)
752 {
754 [dof_access_cell][i] =
756 break;
757 }
758 previous = current;
759 }
760 }
761 }
762 }
763 else // ndofs == 0
766 }
767 index_kinds[static_cast<unsigned int>(
769 }
770
771 // Cleanup phase: we want to avoid single cells with different properties
772 // than the bulk of the domain in order to avoid extra checks in the face
773 // identification.
774
775 // Step 1: check whether the interleaved indices were only assigned to
776 // the single cell within a vectorized array.
777 auto fix_single_interleaved_indices =
778 [&](const IndexStorageVariants variant) {
779 if (index_kinds[static_cast<unsigned int>(
781 0 &&
782 index_kinds[static_cast<unsigned int>(variant)] > 0)
783 for (unsigned int i = 0; i < irregular_cells.size(); ++i)
784 {
791 [i * vectorization_length] ==
792 1))
793 {
795 index_kinds[static_cast<unsigned int>(
796 IndexStorageVariants::
797 interleaved_contiguous_mixed_strides)]--;
798 index_kinds[static_cast<unsigned int>(variant)]++;
799 }
800 }
801 };
802
803 fix_single_interleaved_indices(IndexStorageVariants::full);
804 fix_single_interleaved_indices(IndexStorageVariants::contiguous);
805 fix_single_interleaved_indices(IndexStorageVariants::interleaved);
806
807 unsigned int n_interleaved =
808 index_kinds[static_cast<unsigned int>(
810 index_kinds[static_cast<unsigned int>(
812 index_kinds[static_cast<unsigned int>(
814
815 // Step 2: fix single contiguous cell among others with interleaved
816 // storage
817 if (n_interleaved > 0 && index_kinds[static_cast<unsigned int>(
819 for (unsigned int i = 0; i < irregular_cells.size(); ++i)
822 {
825 index_kinds[static_cast<unsigned int>(
827 index_kinds[static_cast<unsigned int>(
829 }
830
831 // Step 3: Interleaved cells are left but also some non-contiguous ones
832 // -> revert all to full storage
833 if (n_interleaved > 0 &&
834 index_kinds[static_cast<unsigned int>(IndexStorageVariants::full)] +
835 index_kinds[static_cast<unsigned int>(
837 0)
838 for (unsigned int i = 0; i < irregular_cells.size(); ++i)
841 {
842 index_kinds[static_cast<unsigned int>(
843 index_storage_variants[2][i])]--;
848 else
851 index_kinds[static_cast<unsigned int>(
853 }
854
855 // Step 4: Copy the interleaved indices into their own data structure
856 for (unsigned int i = 0; i < irregular_cells.size(); ++i)
859 {
862 {
865 continue;
866 }
867 const unsigned int ndofs =
868 dofs_per_cell[have_hp ? cell_active_fe_index[i] : 0];
869 const unsigned int *dof_indices =
870 &this->dof_indices
872 unsigned int *interleaved_dof_indices =
875 AssertDimension(this->dof_indices.size(),
876 this->dof_indices_interleaved.size());
881 this->dof_indices_interleaved.size() + 1);
884 ndofs * vectorization_length,
885 this->dof_indices_interleaved.size() + 1);
886 for (unsigned int k = 0; k < ndofs; ++k)
887 {
888 const unsigned int *my_dof_indices = dof_indices + k;
889 const unsigned int *end =
890 interleaved_dof_indices + vectorization_length;
891 for (; interleaved_dof_indices != end;
892 ++interleaved_dof_indices, my_dof_indices += ndofs)
893 *interleaved_dof_indices = *my_dof_indices;
894 }
895 }
896 }
897
898
899
900 void
902 const Table<2, ShapeInfo<double>> &shape_info,
903 const unsigned int n_owned_cells,
904 const unsigned int n_lanes,
905 const std::vector<FaceToCellTopology<1>> &inner_faces,
906 const std::vector<FaceToCellTopology<1>> &ghosted_faces,
907 const bool fill_cell_centric,
908 const MPI_Comm communicator_sm,
909 const bool use_vector_data_exchanger_full)
910 {
912
913 // partitioner 0: no face integrals, simply use the indices present
914 // on the cells
915 std::vector<types::global_dof_index> ghost_indices;
916 {
917 const unsigned int n_components = start_components.back();
918 for (unsigned int cell = 0; cell < n_owned_cells; ++cell)
919 {
920 for (unsigned int i = row_starts[cell * n_components].first;
921 i < row_starts[(cell + 1) * n_components].first;
922 ++i)
923 if (dof_indices[i] >= part.locally_owned_size())
924 ghost_indices.push_back(part.local_to_global(dof_indices[i]));
925
926 const unsigned int fe_index =
927 dofs_per_cell.size() == 1 ? 0 :
928 cell_active_fe_index[cell / n_lanes];
929 const unsigned int dofs_this_cell = dofs_per_cell[fe_index];
930
931 for (unsigned int i = row_starts_plain_indices[cell];
932 i < row_starts_plain_indices[cell] + dofs_this_cell;
933 ++i)
934 if (plain_dof_indices[i] >= part.locally_owned_size())
935 ghost_indices.push_back(
937 }
938 std::sort(ghost_indices.begin(), ghost_indices.end());
939 IndexSet compressed_set(part.size());
940 compressed_set.add_indices(ghost_indices.begin(), ghost_indices.end());
941 compressed_set.subtract_set(part.locally_owned_range());
942 const bool all_ghosts_equal =
943 Utilities::MPI::min<int>(static_cast<int>(
944 compressed_set.n_elements() ==
945 part.ghost_indices().n_elements()),
946 part.get_mpi_communicator()) != 0;
947
948 std::shared_ptr<const Utilities::MPI::Partitioner> temp_0;
949
950 if (all_ghosts_equal)
951 temp_0 = vector_partitioner;
952 else
953 {
954 temp_0 = std::make_shared<Utilities::MPI::Partitioner>(
956 const_cast<Utilities::MPI::Partitioner *>(temp_0.get())
957 ->set_ghost_indices(compressed_set, part.ghost_indices());
958 }
959
960 if (use_vector_data_exchanger_full == false)
961 vector_exchanger_face_variants[0] = std::make_shared<
963 temp_0);
964 else
966 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
967 temp_0, communicator_sm);
968 }
969
970 // construct a numbering of faces
971 std::vector<FaceToCellTopology<1>> all_faces(inner_faces);
972 all_faces.insert(all_faces.end(),
973 ghosted_faces.begin(),
974 ghosted_faces.end());
975 Table<2, unsigned int> cell_and_face_to_faces(
976 (row_starts.size() - 1) / start_components.back(),
977 2 * shape_info(0, 0).n_dimensions);
978 cell_and_face_to_faces.fill(numbers::invalid_unsigned_int);
979 for (unsigned int f = 0; f < all_faces.size(); ++f)
980 {
981 cell_and_face_to_faces(all_faces[f].cells_interior[0],
982 all_faces[f].interior_face_no) = f;
983 Assert(all_faces[f].cells_exterior[0] !=
986 cell_and_face_to_faces(all_faces[f].cells_exterior[0],
987 all_faces[f].exterior_face_no) = f;
988 }
989
990 // lambda function to detect objects on face pairs
991 const auto loop_over_faces =
992 [&](const std::function<
993 void(const unsigned int, const unsigned int, const bool)> &fu) {
994 for (const auto &face : inner_faces)
995 {
996 AssertIndexRange(face.cells_interior[0], n_owned_cells);
997 fu(face.cells_exterior[0], face.exterior_face_no, false /*flag*/);
998 }
999 };
1000
1001 const auto loop_over_all_faces =
1002 [&](const std::function<
1003 void(const unsigned int, const unsigned int, const bool)> &fu) {
1004 for (unsigned int c = 0; c < cell_and_face_to_faces.size(0); ++c)
1005 for (unsigned int d = 0; d < cell_and_face_to_faces.size(1); ++d)
1006 {
1007 const unsigned int f = cell_and_face_to_faces(c, d);
1009 continue;
1010
1011 const unsigned int cell_m = all_faces[f].cells_interior[0];
1012 const unsigned int cell_p = all_faces[f].cells_exterior[0];
1013
1014 const bool ext = c == cell_m;
1015
1016 if (ext && cell_p == numbers::invalid_unsigned_int)
1017 continue;
1018
1019 const unsigned int p = ext ? cell_p : cell_m;
1020 const unsigned int face_no = ext ?
1021 all_faces[f].exterior_face_no :
1022 all_faces[f].interior_face_no;
1023
1024 fu(p, face_no, true);
1025 }
1026 };
1027
1028 const auto process_values =
1029 [&](
1030 std::shared_ptr<const Utilities::MPI::Partitioner>
1031 &vector_partitioner_values,
1032 const std::function<void(
1033 const std::function<void(
1034 const unsigned int, const unsigned int, const bool)> &)> &loop) {
1035 bool all_nodal_and_tensorial = shape_info.size(1) == 1;
1036
1037 if (all_nodal_and_tensorial)
1038 for (unsigned int c = 0; c < n_base_elements; ++c)
1039 {
1040 const auto &si =
1041 shape_info(global_base_element_offset + c, 0).data.front();
1042 if (!si.nodal_at_cell_boundaries ||
1043 (si.element_type ==
1045 all_nodal_and_tensorial = false;
1046 }
1047
1048 if (all_nodal_and_tensorial == false)
1049 vector_partitioner_values = vector_partitioner;
1050 else
1051 {
1052 bool has_noncontiguous_cell = false;
1053
1054 loop([&](const unsigned int cell_no,
1055 const unsigned int face_no,
1056 const bool flag) {
1057 const unsigned int index =
1059 if (flag || (index != numbers::invalid_unsigned_int &&
1060 index >= part.locally_owned_size()))
1061 {
1062 const unsigned int stride =
1064 unsigned int i = 0;
1065 for (unsigned int e = 0; e < n_base_elements; ++e)
1066 for (unsigned int c = 0; c < n_components[e]; ++c)
1067 {
1068 const ShapeInfo<double> &shape =
1069 shape_info(global_base_element_offset + e, 0);
1070 for (unsigned int j = 0;
1072 ++j)
1073 ghost_indices.push_back(part.local_to_global(
1074 index + i +
1075 shape.face_to_cell_index_nodal(face_no, j) *
1076 stride));
1077 i += shape.dofs_per_component_on_cell * stride;
1078 }
1079 AssertDimension(i, dofs_per_cell[0] * stride);
1080 }
1082 has_noncontiguous_cell = true;
1083 });
1084 has_noncontiguous_cell =
1085 Utilities::MPI::min<int>(static_cast<int>(
1086 has_noncontiguous_cell),
1087 part.get_mpi_communicator()) != 0;
1088
1089 std::sort(ghost_indices.begin(), ghost_indices.end());
1090 IndexSet compressed_set(part.size());
1091 compressed_set.add_indices(ghost_indices.begin(),
1092 ghost_indices.end());
1093 compressed_set.subtract_set(part.locally_owned_range());
1094 const bool all_ghosts_equal =
1095 Utilities::MPI::min<int>(static_cast<int>(
1096 compressed_set.n_elements() ==
1097 part.ghost_indices().n_elements()),
1098 part.get_mpi_communicator()) != 0;
1099 if (all_ghosts_equal || has_noncontiguous_cell)
1100 vector_partitioner_values = vector_partitioner;
1101 else
1102 {
1103 vector_partitioner_values =
1104 std::make_shared<Utilities::MPI::Partitioner>(
1106 const_cast<Utilities::MPI::Partitioner *>(
1107 vector_partitioner_values.get())
1108 ->set_ghost_indices(compressed_set, part.ghost_indices());
1109 }
1110 }
1111 };
1112
1113
1114 const auto process_gradients =
1115 [&](
1116 const std::shared_ptr<const Utilities::MPI::Partitioner>
1117 &vector_partitoner_values,
1118 std::shared_ptr<const Utilities::MPI::Partitioner>
1119 &vector_partitioner_gradients,
1120 const std::function<void(
1121 const std::function<void(
1122 const unsigned int, const unsigned int, const bool)> &)> &loop) {
1123 bool all_hermite = shape_info.size(1) == 1;
1124
1125 if (all_hermite)
1126 for (unsigned int c = 0; c < n_base_elements; ++c)
1127 if (shape_info(global_base_element_offset + c, 0).element_type !=
1129 all_hermite = false;
1130 if (all_hermite == false ||
1131 vector_partitoner_values.get() == vector_partitioner.get())
1132 vector_partitioner_gradients = vector_partitioner;
1133 else
1134 {
1135 loop([&](const unsigned int cell_no,
1136 const unsigned int face_no,
1137 const bool flag) {
1138 const unsigned int index =
1140 if (flag || (index != numbers::invalid_unsigned_int &&
1141 index >= part.locally_owned_size()))
1142 {
1143 const unsigned int stride =
1145 unsigned int i = 0;
1146 for (unsigned int e = 0; e < n_base_elements; ++e)
1147 for (unsigned int c = 0; c < n_components[e]; ++c)
1148 {
1149 const ShapeInfo<double> &shape =
1150 shape_info(global_base_element_offset + e, 0);
1151 for (unsigned int j = 0;
1152 j < 2 * shape.dofs_per_component_on_face;
1153 ++j)
1154 ghost_indices.push_back(part.local_to_global(
1155 index + i +
1156 shape.face_to_cell_index_hermite(face_no, j) *
1157 stride));
1158 i += shape.dofs_per_component_on_cell * stride;
1159 }
1160 AssertDimension(i, dofs_per_cell[0] * stride);
1161 }
1162 });
1163 std::sort(ghost_indices.begin(), ghost_indices.end());
1164 IndexSet compressed_set(part.size());
1165 compressed_set.add_indices(ghost_indices.begin(),
1166 ghost_indices.end());
1167 compressed_set.subtract_set(part.locally_owned_range());
1168 const bool all_ghosts_equal =
1169 Utilities::MPI::min<int>(static_cast<int>(
1170 compressed_set.n_elements() ==
1171 part.ghost_indices().n_elements()),
1172 part.get_mpi_communicator()) != 0;
1173 if (all_ghosts_equal)
1174 vector_partitioner_gradients = vector_partitioner;
1175 else
1176 {
1177 vector_partitioner_gradients =
1178 std::make_shared<Utilities::MPI::Partitioner>(
1180 const_cast<Utilities::MPI::Partitioner *>(
1181 vector_partitioner_gradients.get())
1182 ->set_ghost_indices(compressed_set, part.ghost_indices());
1183 }
1184 }
1185 };
1186
1187 std::shared_ptr<const Utilities::MPI::Partitioner> temp_1, temp_2, temp_3,
1188 temp_4;
1189
1190 // partitioner 1: values on faces
1191 process_values(temp_1, loop_over_faces);
1192
1193 // partitioner 2: values and gradients on faces
1194 process_gradients(temp_1, temp_2, loop_over_faces);
1195
1196 if (fill_cell_centric)
1197 {
1198 ghost_indices.clear();
1199 // partitioner 3: values on all faces
1200 process_values(temp_3, loop_over_all_faces);
1201 // partitioner 4: values and gradients on faces
1202 process_gradients(temp_3, temp_4, loop_over_all_faces);
1203 }
1204 else
1205 {
1206 temp_3 = std::make_shared<Utilities::MPI::Partitioner>(
1208 temp_4 = std::make_shared<Utilities::MPI::Partitioner>(
1210 }
1211
1212 if (use_vector_data_exchanger_full == false)
1213 {
1214 vector_exchanger_face_variants[1] = std::make_shared<
1216 temp_1);
1217 vector_exchanger_face_variants[2] = std::make_shared<
1219 temp_2);
1220 vector_exchanger_face_variants[3] = std::make_shared<
1222 temp_3);
1223 vector_exchanger_face_variants[4] = std::make_shared<
1225 temp_4);
1226 }
1227 else
1228 {
1230 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1231 temp_1, communicator_sm);
1233 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1234 temp_2, communicator_sm);
1236 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1237 temp_3, communicator_sm);
1239 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1240 temp_4, communicator_sm);
1241 }
1242 }
1243
1244
1245
1246 void
1248 std::array<std::vector<std::pair<unsigned int, unsigned int>>, 3>
1249 &cell_indices_contiguous_sm)
1250 {
1251 AssertDimension(dofs_per_cell.size(), 1);
1252
1253 for (unsigned int i = 0; i < 3; ++i)
1254 {
1255 dof_indices_contiguous_sm[i].resize(
1256 cell_indices_contiguous_sm[i].size());
1257
1258 for (unsigned int j = 0; j < cell_indices_contiguous_sm[i].size();
1259 ++j)
1260 if (cell_indices_contiguous_sm[i][j].first !=
1263 cell_indices_contiguous_sm[i][j].first,
1264 cell_indices_contiguous_sm[i][j].second * dofs_per_cell[0]};
1265 else
1268 }
1269 }
1270
1271
1272
1273 namespace internal
1274 {
1275 // We construct the connectivity graph in parallel. we use one lock for
1276 // 256 degrees of freedom to keep the number of locks down to a
1277 // reasonable level and reduce the cost of locking to some extent.
1278 static constexpr unsigned int bucket_size_threading = 256;
1279
1280
1281
1282 void
1283 compute_row_lengths(const unsigned int begin,
1284 const unsigned int end,
1285 const DoFInfo &dof_info,
1286 std::vector<std::mutex> &mutexes,
1287 std::vector<unsigned int> &row_lengths)
1288 {
1289 std::vector<unsigned int> scratch;
1290 const unsigned int n_components = dof_info.start_components.back();
1291 for (unsigned int block = begin; block < end; ++block)
1292 {
1293 scratch.assign(
1294 dof_info.dof_indices.data() +
1295 dof_info.row_starts[block * n_components].first,
1296 dof_info.dof_indices.data() +
1297 dof_info.row_starts[(block + 1) * n_components].first);
1298 std::sort(scratch.begin(), scratch.end());
1299
1300 const std::vector<unsigned int>::const_iterator end_unique =
1301 std::unique(scratch.begin(), scratch.end());
1302 for (std::vector<unsigned int>::const_iterator it = scratch.begin();
1303 it != end_unique;
1304 /* update in loop body */)
1305 {
1306 // In this code, the procedure is that we insert all elements
1307 // that are within the range of one lock at once
1308 const unsigned int next_bucket =
1310
1311 std::lock_guard<std::mutex> lock(
1312 mutexes[*it / bucket_size_threading]);
1313 for (; it != end_unique && *it < next_bucket; ++it)
1314 {
1315 AssertIndexRange(*it, row_lengths.size());
1316 ++row_lengths[*it];
1317 }
1318 }
1319 }
1320 }
1321
1322 void
1323 fill_connectivity_dofs(const unsigned int begin,
1324 const unsigned int end,
1325 const DoFInfo &dof_info,
1326 const std::vector<unsigned int> &row_lengths,
1327 std::vector<std::mutex> &mutexes,
1328 ::SparsityPattern &connectivity_dof)
1329 {
1330 std::vector<unsigned int> scratch;
1331 const unsigned int n_components = dof_info.start_components.back();
1332 for (unsigned int block = begin; block < end; ++block)
1333 {
1334 scratch.assign(
1335 dof_info.dof_indices.data() +
1336 dof_info.row_starts[block * n_components].first,
1337 dof_info.dof_indices.data() +
1338 dof_info.row_starts[(block + 1) * n_components].first);
1339 std::sort(scratch.begin(), scratch.end());
1340
1341 const std::vector<unsigned int>::const_iterator end_unique =
1342 std::unique(scratch.begin(), scratch.end());
1343 for (std::vector<unsigned int>::const_iterator it = scratch.begin();
1344 it != end_unique;
1345 /* update in loop body */)
1346 {
1347 const unsigned int next_bucket =
1349
1350 std::lock_guard<std::mutex> lock(
1351 mutexes[*it / bucket_size_threading]);
1352 for (; it != end_unique && *it < next_bucket; ++it)
1353 if (row_lengths[*it] > 0)
1354 connectivity_dof.add(*it, block);
1355 }
1356 }
1357 }
1358
1359
1360
1361 void
1362 fill_connectivity(const unsigned int begin,
1363 const unsigned int end,
1364 const DoFInfo &dof_info,
1365 const std::vector<unsigned int> &renumbering,
1366 const ::SparsityPattern &connectivity_dof,
1367 DynamicSparsityPattern &connectivity)
1368 {
1369 ordered_vector row_entries;
1370 const unsigned int n_components = dof_info.start_components.back();
1371 for (unsigned int block = begin; block < end; ++block)
1372 {
1373 row_entries.clear();
1374
1375 const unsigned int
1376 *it = dof_info.dof_indices.data() +
1377 dof_info.row_starts[block * n_components].first,
1378 *end_cell = dof_info.dof_indices.data() +
1379 dof_info.row_starts[(block + 1) * n_components].first;
1380 for (; it != end_cell; ++it)
1381 {
1382 SparsityPattern::iterator sp = connectivity_dof.begin(*it);
1383 std::vector<types::global_dof_index>::iterator insert_pos =
1384 row_entries.begin();
1385 for (; sp != connectivity_dof.end(*it); ++sp)
1386 if (sp->column() != block)
1387 row_entries.insert(renumbering[sp->column()], insert_pos);
1388 }
1389 connectivity.add_entries(renumbering[block],
1390 row_entries.begin(),
1391 row_entries.end());
1392 }
1393 }
1394
1395 } // namespace internal
1396
1397 void
1399 const TaskInfo &task_info,
1400 const std::vector<unsigned int> &renumbering,
1401 DynamicSparsityPattern &connectivity) const
1402 {
1403 unsigned int n_rows = (vector_partitioner->local_range().second -
1404 vector_partitioner->local_range().first) +
1405 vector_partitioner->ghost_indices().n_elements();
1406
1407 // Avoid square sparsity patterns that allocate the diagonal entry
1408 if (n_rows == task_info.n_active_cells)
1409 ++n_rows;
1410
1411 // first determine row lengths
1412 std::vector<unsigned int> row_lengths(n_rows);
1413 std::vector<std::mutex> mutexes(n_rows / internal::bucket_size_threading +
1414 1);
1416 0,
1417 task_info.n_active_cells,
1418 [this, &mutexes, &row_lengths](const unsigned int begin,
1419 const unsigned int end) {
1420 internal::compute_row_lengths(
1421 begin, end, *this, mutexes, row_lengths);
1422 },
1423 20);
1424
1425 // disregard dofs that only sit on a single cell because they cannot
1426 // couple
1427 for (unsigned int row = 0; row < n_rows; ++row)
1428 if (row_lengths[row] <= 1)
1429 row_lengths[row] = 0;
1430
1431 // Create a temporary sparsity pattern that holds to each degree of
1432 // freedom on which cells it appears, i.e., store the connectivity
1433 // between cells and dofs
1434 SparsityPattern connectivity_dof(n_rows,
1435 task_info.n_active_cells,
1436 row_lengths);
1438 0,
1439 task_info.n_active_cells,
1440 [this, &row_lengths, &mutexes, &connectivity_dof](
1441 const unsigned int begin, const unsigned int end) {
1442 internal::fill_connectivity_dofs(
1443 begin, end, *this, row_lengths, mutexes, connectivity_dof);
1444 },
1445 20);
1446 connectivity_dof.compress();
1447
1448
1449 // Invert renumbering for use in fill_connectivity.
1450 std::vector<unsigned int> reverse_numbering(task_info.n_active_cells);
1451 reverse_numbering = Utilities::invert_permutation(renumbering);
1452
1453 // From the above connectivity between dofs and cells, we can finally
1454 // create a connectivity list between cells. The connectivity graph
1455 // should apply the renumbering, i.e., the entry for cell j is the entry
1456 // for cell renumbering[j] in the original ordering.
1458 0,
1459 task_info.n_active_cells,
1460 [this, &reverse_numbering, &connectivity_dof, &connectivity](
1461 const unsigned int begin, const unsigned int end) {
1462 internal::fill_connectivity(begin,
1463 end,
1464 *this,
1465 reverse_numbering,
1466 connectivity_dof,
1467 connectivity);
1468 },
1469 20);
1470 }
1471
1472
1473
1474 void
1476 std::vector<types::global_dof_index> &renumbering)
1477 {
1478 const unsigned int locally_owned_size =
1479 vector_partitioner->locally_owned_size();
1480 renumbering.resize(0);
1481 renumbering.resize(locally_owned_size, numbers::invalid_dof_index);
1482
1483 types::global_dof_index counter = 0;
1484 const unsigned int n_components = start_components.back();
1485 const unsigned int n_cell_batches =
1487 Assert(n_cell_batches <=
1490 for (unsigned int cell_no = 0; cell_no < n_cell_batches; ++cell_no)
1491 {
1492 // do not renumber in case we have constraints
1494 .second ==
1496 .second)
1497 {
1498 const unsigned int ndofs =
1499 dofs_per_cell.size() == 1 ?
1500 dofs_per_cell[0] :
1502 cell_active_fe_index[cell_no] :
1503 0]);
1504 const unsigned int *dof_ind =
1505 dof_indices.data() +
1506 row_starts[cell_no * n_components * vectorization_length].first;
1507 for (unsigned int i = 0; i < ndofs; ++i)
1508 for (unsigned int j = 0;
1510 ++j)
1511 if (dof_ind[j * ndofs + i] < locally_owned_size)
1512 if (renumbering[dof_ind[j * ndofs + i]] ==
1514 renumbering[dof_ind[j * ndofs + i]] = counter++;
1515 }
1516 }
1517
1518 AssertIndexRange(counter, locally_owned_size + 1);
1519 for (types::global_dof_index &dof_index : renumbering)
1520 if (dof_index == numbers::invalid_dof_index)
1521 dof_index = counter++;
1522
1523 // transform indices to global index space
1524 for (types::global_dof_index &dof_index : renumbering)
1525 dof_index = vector_partitioner->local_to_global(dof_index);
1526
1527 AssertDimension(counter, renumbering.size());
1528 }
1529
1530
1531
1532 std::size_t
1534 {
1535 std::size_t memory = sizeof(*this);
1536 for (const auto &storage : index_storage_variants)
1537 memory += storage.capacity() * sizeof(storage[0]);
1538 memory +=
1539 (row_starts.capacity() * sizeof(std::pair<unsigned int, unsigned int>));
1543 memory +=
1545 memory +=
1547 memory +=
1551 memory +=
1561 memory +=
1567 memory +=
1572 memory +=
1575 return memory;
1576 }
1577 } // namespace MatrixFreeFunctions
1578} // namespace internal
1579
1580namespace internal
1581{
1582 namespace MatrixFreeFunctions
1583 {
1584 template void
1586 const std::vector<types::global_dof_index> &,
1587 const std::vector<types::global_dof_index> &,
1588 const bool,
1589 const ::AffineConstraints<double> &,
1590 const unsigned int,
1591 ConstraintValues<double> &,
1592 bool &);
1593
1594 template void
1596 const std::vector<types::global_dof_index> &,
1597 const std::vector<types::global_dof_index> &,
1598 const bool,
1599 const ::AffineConstraints<float> &,
1600 const unsigned int,
1601 ConstraintValues<double> &,
1602 bool &);
1603
1604 template bool
1606 const HangingNodes<1> &,
1607 const std::vector<std::vector<unsigned int>> &,
1608 const unsigned int,
1610 std::vector<types::global_dof_index> &);
1611 template bool
1613 const HangingNodes<2> &,
1614 const std::vector<std::vector<unsigned int>> &,
1615 const unsigned int,
1617 std::vector<types::global_dof_index> &);
1618 template bool
1620 const HangingNodes<3> &,
1621 const std::vector<std::vector<unsigned int>> &,
1622 const unsigned int,
1624 std::vector<types::global_dof_index> &);
1625
1626 template void
1628 const std::vector<FaceToCellTopology<1>> &,
1629 bool);
1630 template void
1632 const std::vector<FaceToCellTopology<2>> &,
1633 bool);
1634 template void
1636 const std::vector<FaceToCellTopology<4>> &,
1637 bool);
1638 template void
1640 const std::vector<FaceToCellTopology<8>> &,
1641 bool);
1642 template void
1644 const std::vector<FaceToCellTopology<16>> &,
1645 bool);
1646
1647 template void
1649 const TaskInfo &,
1650 const std::vector<FaceToCellTopology<1>> &);
1651 template void
1653 const TaskInfo &,
1654 const std::vector<FaceToCellTopology<2>> &);
1655 template void
1657 const TaskInfo &,
1658 const std::vector<FaceToCellTopology<4>> &);
1659 template void
1661 const TaskInfo &,
1662 const std::vector<FaceToCellTopology<8>> &);
1663 template void
1665 const TaskInfo &,
1666 const std::vector<FaceToCellTopology<16>> &);
1667
1668 template void
1670 const TaskInfo &) const;
1671 template void
1674 const TaskInfo &) const;
1675
1676 template void
1677 DoFInfo::print<double>(const std::vector<double> &,
1678 const std::vector<unsigned int> &,
1679 std::ostream &) const;
1680
1681 template void
1682 DoFInfo::print<float>(const std::vector<float> &,
1683 const std::vector<unsigned int> &,
1684 std::ostream &) const;
1685 } // namespace MatrixFreeFunctions
1686} // namespace internal
1687
void add_entries(const size_type row, ForwardIterator begin, ForwardIterator end, const bool indices_are_unique_and_sorted=false)
size_type index_within_set(const size_type global_index) const
Definition index_set.h:2017
size_type n_elements() const
Definition index_set.h:1949
void subtract_set(const IndexSet &other)
Definition index_set.cc:480
void add_range(const size_type begin, const size_type end)
Definition index_set.h:1818
void compress() const
Definition index_set.h:1799
void add_indices(const ForwardIterator &begin, const ForwardIterator &end)
Definition index_set.h:1846
SparsityPatternIterators::Iterator iterator
void add(const size_type i, const size_type j)
const IndexSet & locally_owned_range() const
const IndexSet & ghost_indices() const
unsigned int locally_owned_size() const
types::global_dof_index local_to_global(const unsigned int local_index) const
virtual MPI_Comm get_mpi_communicator() const override
void set_ghost_indices(const IndexSet &ghost_indices, const IndexSet &larger_ghost_index_set=IndexSet())
types::global_dof_index size() const
#define DEAL_II_NAMESPACE_OPEN
Definition config.h:40
constexpr bool running_in_debug_mode()
Definition config.h:78
#define DEAL_II_NAMESPACE_CLOSE
Definition config.h:41
#define Assert(cond, exc)
#define AssertDimension(dim1, dim2)
#define AssertIndexRange(index, range)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcNotInitialized()
std::enable_if_t< std::is_fundamental_v< T >, std::size_t > memory_consumption(const T &t)
T min(const T &t, const MPI_Comm mpi_communicator)
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
Definition utilities.h:1700
void fill_connectivity_dofs(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &row_lengths, std::vector< std::mutex > &mutexes, ::SparsityPattern &connectivity_dof)
Definition dof_info.cc:1323
void fill_connectivity(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &renumbering, const ::SparsityPattern &connectivity_dof, DynamicSparsityPattern &connectivity)
Definition dof_info.cc:1362
void compute_row_lengths(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, std::vector< std::mutex > &mutexes, std::vector< unsigned int > &row_lengths)
Definition dof_info.cc:1283
static constexpr unsigned int bucket_size_threading
Definition dof_info.cc:1278
constexpr compressed_constraint_kind unconstrained_compressed_constraint_kind
constexpr types::global_dof_index invalid_dof_index
Definition types.h:269
constexpr unsigned int invalid_unsigned_int
Definition types.h:238
void apply_to_subranges(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, const Function &f, const unsigned int grainsize)
Definition parallel.h:540
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
unsigned int global_dof_index
Definition types.h:94
void print(const std::vector< Number > &constraint_pool_data, const std::vector< unsigned int > &constraint_pool_row_index, std::ostream &out) const
void reorder_cells(const TaskInfo &task_info, const std::vector< unsigned int > &renumbering, const std::vector< unsigned int > &constraint_pool_row_index, const std::vector< unsigned char > &irregular_cells)
Definition dof_info.cc:301
std::vector< std::pair< unsigned short, unsigned short > > constraint_indicator
Definition dof_info.h:538
std::vector< unsigned int > cell_loop_pre_list_index
Definition dof_info.h:749
std::vector< std::pair< unsigned int, unsigned int > > row_starts
Definition dof_info.h:497
std::vector< std::vector< unsigned int > > component_dof_indices_offset
Definition dof_info.h:690
void compute_tight_partitioners(const Table< 2, ShapeInfo< double > > &shape_info, const unsigned int n_owned_cells, const unsigned int n_lanes, const std::vector< FaceToCellTopology< 1 > > &inner_faces, const std::vector< FaceToCellTopology< 1 > > &ghosted_faces, const bool fill_cell_centric, const MPI_Comm communicator_sm, const bool use_vector_data_exchanger_full)
Definition dof_info.cc:901
std::vector< std::vector< bool > > hanging_node_constraint_masks_comp
Definition dof_info.h:520
void get_dof_indices_on_cell_batch(std::vector< unsigned int > &local_indices, const unsigned int cell_batch, const bool with_constraints=true) const
Definition dof_info.cc:84
void compute_cell_index_compression(const std::vector< unsigned char > &irregular_cells)
Definition dof_info.cc:536
std::vector< unsigned int > cell_loop_post_list_index
Definition dof_info.h:762
std::vector< unsigned int > dofs_per_cell
Definition dof_info.h:695
void assign_ghosts(const std::vector< unsigned int > &boundary_cells, const MPI_Comm communicator_sm, const bool use_vector_data_exchanger_full)
Definition dof_info.cc:157
void compute_dof_renumbering(std::vector< types::global_dof_index > &renumbering)
Definition dof_info.cc:1475
std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base > vector_exchanger
Definition dof_info.h:600
std::vector< std::vector< unsigned int > > fe_index_conversion
Definition dof_info.h:723
std::vector< unsigned int > dof_indices
Definition dof_info.h:514
std::vector< std::pair< unsigned int, unsigned int > > vector_zero_range_list
Definition dof_info.h:742
std::shared_ptr< const Utilities::MPI::Partitioner > vector_partitioner
Definition dof_info.h:593
std::vector< compressed_constraint_kind > hanging_node_constraint_masks
Definition dof_info.h:526
std::array< std::vector< unsigned int >, 3 > dof_indices_interleave_strides
Definition dof_info.h:574
std::array< std::vector< std::pair< unsigned int, unsigned int > >, 3 > dof_indices_contiguous_sm
Definition dof_info.h:564
std::vector< unsigned int > row_starts_plain_indices
Definition dof_info.h:637
std::vector< unsigned int > dofs_per_face
Definition dof_info.h:700
std::vector< unsigned int > component_to_base_index
Definition dof_info.h:677
void compute_vector_zero_access_pattern(const TaskInfo &task_info, const std::vector< FaceToCellTopology< length > > &faces)
std::vector< unsigned int > n_components
Definition dof_info.h:665
void print_memory_consumption(StreamType &out, const TaskInfo &size_info) const
void compute_face_index_compression(const std::vector< FaceToCellTopology< length > > &faces, bool hold_all_faces_to_owned_cells)
std::vector< std::pair< unsigned int, unsigned int > > cell_loop_pre_list
Definition dof_info.h:755
std::vector< types::global_dof_index > ghost_dofs
Definition dof_info.h:730
void read_dof_indices(const std::vector< types::global_dof_index > &local_indices_resolved, const std::vector< types::global_dof_index > &local_indices, const bool cell_has_hanging_nodes, const ::AffineConstraints< number > &constraints, const unsigned int cell_number, ConstraintValues< double > &constraint_values, bool &cell_at_boundary)
std::array< std::vector< unsigned int >, 3 > dof_indices_contiguous
Definition dof_info.h:553
bool process_hanging_node_constraints(const HangingNodes< dim > &hanging_nodes, const std::vector< std::vector< unsigned int > > &lexicographic_mapping, const unsigned int cell_number, const TriaIterator< DoFCellAccessor< dim, dim, false > > &cell, std::vector< types::global_dof_index > &dof_indices)
std::vector< unsigned int > cell_active_fe_index
Definition dof_info.h:710
std::array< std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base >, 5 > vector_exchanger_face_variants
Definition dof_info.h:625
std::vector< unsigned int > plain_dof_indices
Definition dof_info.h:647
std::array< std::vector< unsigned char >, 3 > n_vectorization_lanes_filled
Definition dof_info.h:585
std::vector< unsigned int > start_components
Definition dof_info.h:671
std::vector< unsigned int > dof_indices_interleaved
Definition dof_info.h:543
std::vector< unsigned int > constrained_dofs
Definition dof_info.h:631
std::array< std::vector< IndexStorageVariants >, 3 > index_storage_variants
Definition dof_info.h:489
void make_connectivity_graph(const TaskInfo &task_info, const std::vector< unsigned int > &renumbering, DynamicSparsityPattern &connectivity) const
Definition dof_info.cc:1398
std::vector< unsigned int > vector_zero_range_list_index
Definition dof_info.h:737
void compute_shared_memory_contiguous_indices(std::array< std::vector< std::pair< unsigned int, unsigned int > >, 3 > &cell_indices_contiguous_sm)
Definition dof_info.cc:1247
std::vector< std::pair< unsigned int, unsigned int > > cell_loop_post_list
Definition dof_info.h:768
::Table< 2, unsigned int > face_to_cell_index_nodal
Definition shape_info.h:568
::Table< 2, unsigned int > face_to_cell_index_hermite
Definition shape_info.h:608
std::vector< unsigned int > cell_partition_data
Definition task_info.h:474