deal.II version 9.7.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
step-22.h
Go to the documentation of this file.
1) const
1342 *   {
1343 *   return Tensor<1, dim>();
1344 *   }
1345 *  
1346 *  
1347 *   template <int dim>
1348 *   void RightHandSide<dim>::value_list(const std::vector<Point<dim>> &vp,
1349 *   std::vector<Tensor<1, dim>> &values) const
1350 *   {
1351 *   for (unsigned int c = 0; c < vp.size(); ++c)
1352 *   {
1353 *   values[c] = RightHandSide<dim>::value(vp[c]);
1354 *   }
1355 *   }
1356 *  
1357 *  
1358 * @endcode
1359 *
1360 *
1361 * <a name="step_22-Linearsolversandpreconditioners"></a>
1362 * <h3>Linear solvers and preconditioners</h3>
1363 *
1364
1365 *
1366 * The linear solvers and preconditioners are discussed extensively in the
1367 * introduction. Here, we create the respective objects that will be used.
1368 *
1369
1370 *
1371 *
1372 * <a name="step_22-ThecodeInverseMatrixcodeclasstemplate"></a>
1373 * <h4>The <code>InverseMatrix</code> class template</h4>
1374 * The <code>InverseMatrix</code> class represents the data structure for an
1375 * inverse matrix. Unlike @ref step_20 "step-20", we implement this with a class instead of
1376 * the helper function inverse_linear_operator() we will apply this class to
1377 * different kinds of matrices that will require different preconditioners
1378 * (in @ref step_20 "step-20" we only used a non-identity preconditioner for the mass
1379 * matrix). The types of matrix and preconditioner are passed to this class
1380 * via template parameters, and matrix and preconditioner objects of these
1381 * types will then be passed to the constructor when an
1382 * <code>InverseMatrix</code> object is created. The member function
1383 * <code>vmult</code> is obtained by solving a linear system:
1384 *
1385 * @code
1386 *   template <class MatrixType, class PreconditionerType>
1387 *   class InverseMatrix : public EnableObserverPointer
1388 *   {
1389 *   public:
1390 *   InverseMatrix(const MatrixType &m,
1391 *   const PreconditionerType &preconditioner);
1392 *  
1393 *   void vmult(Vector<double> &dst, const Vector<double> &src) const;
1394 *  
1395 *   private:
1397 *   const ObserverPointer<const PreconditionerType> preconditioner;
1398 *   };
1399 *  
1400 *  
1401 *   template <class MatrixType, class PreconditionerType>
1402 *   InverseMatrix<MatrixType, PreconditionerType>::InverseMatrix(
1403 *   const MatrixType &m,
1404 *   const PreconditionerType &preconditioner)
1405 *   : matrix(&m)
1406 *   , preconditioner(&preconditioner)
1407 *   {}
1408 *  
1409 *  
1410 * @endcode
1411 *
1412 * This is the implementation of the <code>vmult</code> function.
1413 *
1414
1415 *
1416 * In this class we use a rather large tolerance for the solver control. The
1417 * reason for this is that the function is used very frequently, and hence,
1418 * any additional effort to make the residual in the CG solve smaller makes
1419 * the solution more expensive. Note that we do not only use this class as a
1420 * preconditioner for the Schur complement, but also when forming the
1421 * inverse of the Laplace matrix &ndash; which is hence directly responsible
1422 * for the accuracy of the solution itself, so we can't choose a too large
1423 * tolerance, either.
1424 *
1425 * @code
1426 *   template <class MatrixType, class PreconditionerType>
1427 *   void InverseMatrix<MatrixType, PreconditionerType>::vmult(
1428 *   Vector<double> &dst,
1429 *   const Vector<double> &src) const
1430 *   {
1431 *   SolverControl solver_control(src.size(), 1e-6 * src.l2_norm());
1432 *   SolverCG<Vector<double>> cg(solver_control);
1433 *  
1434 *   dst = 0;
1435 *  
1436 *   cg.solve(*matrix, dst, src, *preconditioner);
1437 *   }
1438 *  
1439 *  
1440 * @endcode
1441 *
1442 *
1443 * <a name="step_22-ThecodeSchurComplementcodeclasstemplate"></a>
1444 * <h4>The <code>SchurComplement</code> class template</h4>
1445 *
1446
1447 *
1448 * This class implements the Schur complement discussed in the introduction.
1449 * It is in analogy to @ref step_20 "step-20". Though, we now call it with a template
1450 * parameter <code>PreconditionerType</code> in order to access that when
1451 * specifying the respective type of the inverse matrix class. As a
1452 * consequence of the definition above, the declaration
1453 * <code>InverseMatrix</code> now contains the second template parameter for
1454 * a preconditioner class as above, which affects the
1455 * <code>ObserverPointer</code> object <code>m_inverse</code> as well.
1456 *
1457 * @code
1458 *   template <class PreconditionerType>
1459 *   class SchurComplement : public EnableObserverPointer
1460 *   {
1461 *   public:
1462 *   SchurComplement(
1463 *   const BlockSparseMatrix<double> &system_matrix,
1464 *   const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse);
1465 *  
1466 *   void vmult(Vector<double> &dst, const Vector<double> &src) const;
1467 *  
1468 *   private:
1469 *   const ObserverPointer<const BlockSparseMatrix<double>> system_matrix;
1470 *   const ObserverPointer<
1471 *   const InverseMatrix<SparseMatrix<double>, PreconditionerType>>
1472 *   A_inverse;
1473 *  
1474 *   mutable Vector<double> tmp1, tmp2;
1475 *   };
1476 *  
1477 *  
1478 *  
1479 *   template <class PreconditionerType>
1480 *   SchurComplement<PreconditionerType>::SchurComplement(
1481 *   const BlockSparseMatrix<double> &system_matrix,
1482 *   const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse)
1483 *   : system_matrix(&system_matrix)
1484 *   , A_inverse(&A_inverse)
1485 *   , tmp1(system_matrix.block(0, 0).m())
1486 *   , tmp2(system_matrix.block(0, 0).m())
1487 *   {}
1488 *  
1489 *  
1490 *   template <class PreconditionerType>
1491 *   void
1492 *   SchurComplement<PreconditionerType>::vmult(Vector<double> &dst,
1493 *   const Vector<double> &src) const
1494 *   {
1495 *   system_matrix->block(0, 1).vmult(tmp1, src);
1496 *   A_inverse->vmult(tmp2, tmp1);
1497 *   system_matrix->block(1, 0).vmult(dst, tmp2);
1498 *   }
1499 *  
1500 *  
1501 * @endcode
1502 *
1503 *
1504 * <a name="step_22-StokesProblemclassimplementation"></a>
1505 * <h3>StokesProblem class implementation</h3>
1506 *
1507
1508 *
1509 *
1510 * <a name="step_22-StokesProblemStokesProblem"></a>
1511 * <h4>StokesProblem::StokesProblem</h4>
1512 *
1513
1514 *
1515 * The constructor of this class looks very similar to the one of
1516 * @ref step_20 "step-20". The constructor initializes the variables for the polynomial
1517 * degree, triangulation, finite element system and the dof handler. The
1518 * underlying polynomial functions are of order <code>degree+1</code> for
1519 * the vector-valued velocity components and of order <code>degree</code>
1520 * for the pressure. This gives the LBB-stable element pair
1521 * @f$Q_{degree+1}^d\times Q_{degree}@f$, often referred to as the Taylor-Hood
1522 * element for degree@f$\geq 1@f$.
1523 *
1524
1525 *
1526 * Note that we initialize the triangulation with a MeshSmoothing argument,
1527 * which ensures that the refinement of cells is done in a way that the
1528 * approximation of the PDE solution remains well-behaved (problems arise if
1529 * grids are too unstructured), see the documentation of
1530 * <code>Triangulation::MeshSmoothing</code> for details.
1531 *
1532 * @code
1533 *   template <int dim>
1534 *   StokesProblem<dim>::StokesProblem(const unsigned int degree)
1535 *   : degree(degree)
1536 *   , triangulation(Triangulation<dim>::maximum_smoothing)
1537 *   , fe(FE_Q<dim>(degree + 1) ^ dim, FE_Q<dim>(degree))
1538 *   , dof_handler(triangulation)
1539 *   {}
1540 *  
1541 *  
1542 * @endcode
1543 *
1544 *
1545 * <a name="step_22-StokesProblemsetup_dofs"></a>
1546 * <h4>StokesProblem::setup_dofs</h4>
1547 *
1548
1549 *
1550 * Given a mesh, this function associates the degrees of freedom with it and
1551 * creates the corresponding matrices and vectors. At the beginning it also
1552 * releases the pointer to the preconditioner object (if the shared pointer
1553 * pointed at anything at all at this point) since it will definitely not be
1554 * needed any more after this point and will have to be re-computed after
1555 * assembling the matrix, and unties the sparse matrices from their sparsity
1556 * pattern objects.
1557 *
1558
1559 *
1560 * We then proceed with distributing degrees of freedom and renumbering
1561 * them: In order to make the ILU preconditioner (in 3d) work efficiently,
1562 * it is important to enumerate the degrees of freedom in such a way that it
1563 * reduces the bandwidth of the matrix, or maybe more importantly: in such a
1564 * way that the ILU is as close as possible to a real LU decomposition. On
1565 * the other hand, we need to preserve the block structure of velocity and
1566 * pressure already seen in @ref step_20 "step-20" and @ref step_21 "step-21". This is done in two
1567 * steps: First, all dofs are renumbered to improve the ILU and then we
1568 * renumber once again by components. Since
1569 * <code>DoFRenumbering::component_wise</code> does not touch the
1570 * renumbering within the individual blocks, the basic renumbering from the
1571 * first step remains. As for how the renumber degrees of freedom to improve
1572 * the ILU: deal.II has a number of algorithms that attempt to find
1573 * orderings to improve ILUs, or reduce the bandwidth of matrices, or
1574 * optimize some other aspect. The DoFRenumbering namespace shows a
1575 * comparison of the results we obtain with several of these algorithms
1576 * based on the testcase discussed here in this tutorial program. Here, we
1577 * will use the traditional Cuthill-McKee algorithm already used in some of
1578 * the previous tutorial programs. In the
1579 * @ref step_22-ImprovedILU "section on improved ILU" we're going to discuss
1580 * this issue in more detail.
1581 *
1582
1583 *
1584 * There is one more change compared to previous tutorial programs: There is
1585 * no reason in sorting the <code>dim</code> velocity components
1586 * individually. In fact, rather than first enumerating all @f$x@f$-velocities,
1587 * then all @f$y@f$-velocities, etc, we would like to keep all velocities at the
1588 * same location together and only separate between velocities (all
1589 * components) and pressures. By default, this is not what the
1590 * DoFRenumbering::component_wise function does: it treats each vector
1591 * component separately; what we have to do is group several components into
1592 * "blocks" and pass this block structure to that function. Consequently, we
1593 * allocate a vector <code>block_component</code> with as many elements as
1594 * there are components and describe all velocity components to correspond
1595 * to block 0, while the pressure component will form block 1:
1596 *
1597 * @code
1598 *   template <int dim>
1599 *   void StokesProblem<dim>::setup_dofs()
1600 *   {
1601 *   A_preconditioner.reset();
1602 *   system_matrix.clear();
1603 *   preconditioner_matrix.clear();
1604 *  
1605 *   dof_handler.distribute_dofs(fe);
1606 *   DoFRenumbering::Cuthill_McKee(dof_handler);
1607 *  
1608 *   std::vector<unsigned int> block_component(dim + 1, 0);
1609 *   block_component[dim] = 1;
1610 *   DoFRenumbering::component_wise(dof_handler, block_component);
1611 *  
1612 * @endcode
1613 *
1614 * Now comes the implementation of Dirichlet boundary conditions, which
1615 * should be evident after the discussion in the introduction. All that
1616 * changed is that the function already appears in the setup functions,
1617 * whereas we were used to see it in some assembly routine. Further down
1618 * below where we set up the mesh, we will associate the top boundary
1619 * where we impose Dirichlet boundary conditions with boundary indicator
1620 * 1. We will have to pass this boundary indicator as second argument to
1621 * the function below interpolating boundary values. There is one more
1622 * thing, though. The function describing the Dirichlet conditions was
1623 * defined for all components, both velocity and pressure. However, the
1624 * Dirichlet conditions are to be set for the velocity only. To this end,
1625 * we use a ComponentMask that only selects the velocity components. The
1626 * component mask is obtained from the finite element by specifying the
1627 * particular components we want. Since we use adaptively refined grids,
1628 * the affine constraints object needs to be first filled with hanging node
1629 * constraints generated from the DoF handler. Note the order of the two
1630 * functions &mdash; we first compute the hanging node constraints, and
1631 * then insert the boundary values into the constraints object. This makes
1632 * sure that we respect H<sup>1</sup> conformity on boundaries with
1633 * hanging nodes (in three space dimensions), where the hanging node needs
1634 * to dominate the Dirichlet boundary values.
1635 *
1636 * @code
1637 *   {
1638 *   constraints.clear();
1639 *  
1640 *   const FEValuesExtractors::Vector velocities(0);
1641 *   DoFTools::make_hanging_node_constraints(dof_handler, constraints);
1643 *   1,
1644 *   BoundaryValues<dim>(),
1645 *   constraints,
1646 *   fe.component_mask(velocities));
1647 *   }
1648 *  
1649 *   constraints.close();
1650 *  
1651 * @endcode
1652 *
1653 * In analogy to @ref step_20 "step-20", we count the dofs in the individual components.
1654 * We could do this in the same way as there, but we want to operate on
1655 * the block structure we used already for the renumbering: The function
1656 * <code>DoFTools::count_dofs_per_fe_block</code> does the same as
1657 * <code>DoFTools::count_dofs_per_fe_component</code>, but now grouped as
1658 * velocity and pressure block via <code>block_component</code>.
1659 *
1660 * @code
1661 *   const std::vector<types::global_dof_index> dofs_per_block =
1662 *   DoFTools::count_dofs_per_fe_block(dof_handler, block_component);
1663 *   const types::global_dof_index n_u = dofs_per_block[0];
1664 *   const types::global_dof_index n_p = dofs_per_block[1];
1665 *  
1666 *   std::cout << " Number of active cells: " << triangulation.n_active_cells()
1667 *   << std::endl
1668 *   << " Number of degrees of freedom: " << dof_handler.n_dofs()
1669 *   << " (" << n_u << '+' << n_p << ')' << std::endl;
1670 *  
1671 * @endcode
1672 *
1673 * The next task is to allocate a sparsity pattern for the system matrix we
1674 * will create and one for the preconditioner matrix. We could do this in
1675 * the same way as in @ref step_20 "step-20", i.e. directly build an object of type
1676 * SparsityPattern through DoFTools::make_sparsity_pattern. However, there
1677 * is a major reason not to do so:
1678 * In 3d, the function DoFTools::max_couplings_between_dofs yields a
1679 * conservative but rather large number for the coupling between the
1680 * individual dofs, so that the memory initially provided for the creation
1681 * of the sparsity pattern of the matrix is far too much -- so much actually
1682 * that the initial sparsity pattern won't even fit into the physical memory
1683 * of most systems already for moderately-sized 3d problems, see also the
1684 * discussion in @ref step_18 "step-18". Instead, we first build temporary objects that use
1685 * a different data structure that doesn't require allocating more memory
1686 * than necessary but isn't suitable for use as a basis of SparseMatrix or
1687 * BlockSparseMatrix objects; in a second step we then copy these objects
1688 * into objects of type BlockSparsityPattern. This is entirely analogous to
1689 * what we already did in @ref step_11 "step-11" and @ref step_18 "step-18". In particular, we make use of
1690 * the fact that we will never write into the @f$(1,1)@f$ block of the system
1691 * matrix and that this is the only block to be filled for the
1692 * preconditioner matrix.
1693 *
1694
1695 *
1696 * All this is done inside new scopes, which means that the memory of
1697 * <code>dsp</code> will be released once the information has been copied to
1698 * <code>sparsity_pattern</code>.
1699 *
1700 * @code
1701 *   {
1702 *   BlockDynamicSparsityPattern dsp(dofs_per_block, dofs_per_block);
1703 *  
1704 *   Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1705 *   for (unsigned int c = 0; c < dim + 1; ++c)
1706 *   for (unsigned int d = 0; d < dim + 1; ++d)
1707 *   if (!((c == dim) && (d == dim)))
1708 *   coupling[c][d] = DoFTools::always;
1709 *   else
1710 *   coupling[c][d] = DoFTools::none;
1711 *  
1712 *   DoFTools::make_sparsity_pattern(
1713 *   dof_handler, coupling, dsp, constraints, false);
1714 *  
1715 *   sparsity_pattern.copy_from(dsp);
1716 *   }
1717 *  
1718 *   {
1719 *   BlockDynamicSparsityPattern preconditioner_dsp(dofs_per_block,
1720 *   dofs_per_block);
1721 *  
1722 *   Table<2, DoFTools::Coupling> preconditioner_coupling(dim + 1, dim + 1);
1723 *   for (unsigned int c = 0; c < dim + 1; ++c)
1724 *   for (unsigned int d = 0; d < dim + 1; ++d)
1725 *   if (((c == dim) && (d == dim)))
1726 *   preconditioner_coupling[c][d] = DoFTools::always;
1727 *   else
1728 *   preconditioner_coupling[c][d] = DoFTools::none;
1729 *  
1730 *   DoFTools::make_sparsity_pattern(dof_handler,
1731 *   preconditioner_coupling,
1732 *   preconditioner_dsp,
1733 *   constraints,
1734 *   false);
1735 *  
1736 *   preconditioner_sparsity_pattern.copy_from(preconditioner_dsp);
1737 *   }
1738 *  
1739 * @endcode
1740 *
1741 * Finally, the system matrix, the preconsitioner matrix, the solution and
1742 * the right hand side vector are created from the block structure similar
1743 * to the approach in @ref step_20 "step-20":
1744 *
1745 * @code
1746 *   system_matrix.reinit(sparsity_pattern);
1747 *   preconditioner_matrix.reinit(preconditioner_sparsity_pattern);
1748 *  
1749 *   solution.reinit(dofs_per_block);
1750 *   system_rhs.reinit(dofs_per_block);
1751 *   }
1752 *  
1753 *  
1754 * @endcode
1755 *
1756 *
1757 * <a name="step_22-StokesProblemassemble_system"></a>
1758 * <h4>StokesProblem::assemble_system</h4>
1759 *
1760
1761 *
1762 * The assembly process follows the discussion in @ref step_20 "step-20" and in the
1763 * introduction. We use the well-known abbreviations for the data structures
1764 * that hold the local matrices, right hand side, and global numbering of the
1765 * degrees of freedom for the present cell.
1766 *
1767 * @code
1768 *   template <int dim>
1769 *   void StokesProblem<dim>::assemble_system()
1770 *   {
1771 *   system_matrix = 0;
1772 *   system_rhs = 0;
1773 *   preconditioner_matrix = 0;
1774 *  
1775 *   const QGauss<dim> quadrature_formula(degree + 2);
1776 *  
1777 *   FEValues<dim> fe_values(fe,
1778 *   quadrature_formula,
1779 *   update_values | update_quadrature_points |
1780 *   update_JxW_values | update_gradients);
1781 *  
1782 *   const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
1783 *  
1784 *   const unsigned int n_q_points = quadrature_formula.size();
1785 *  
1786 *   FullMatrix<double> local_matrix(dofs_per_cell, dofs_per_cell);
1787 *   FullMatrix<double> local_preconditioner_matrix(dofs_per_cell,
1788 *   dofs_per_cell);
1789 *   Vector<double> local_rhs(dofs_per_cell);
1790 *  
1791 *   std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
1792 *  
1793 *   const RightHandSide<dim> right_hand_side;
1794 *   std::vector<Tensor<1, dim>> rhs_values(n_q_points, Tensor<1, dim>());
1795 *  
1796 * @endcode
1797 *
1798 * Next, we need two objects that work as extractors for the FEValues
1799 * object. Their use is explained in detail in the report on @ref
1800 * vector_valued :
1801 *
1802 * @code
1803 *   const FEValuesExtractors::Vector velocities(0);
1804 *   const FEValuesExtractors::Scalar pressure(dim);
1805 *  
1806 * @endcode
1807 *
1808 * As an extension over @ref step_20 "step-20" and @ref step_21 "step-21", we include a few optimizations
1809 * that make assembly much faster for this particular problem. The
1810 * improvements are based on the observation that we do a few calculations
1811 * too many times when we do as in @ref step_20 "step-20": The symmetric gradient actually
1812 * has <code>dofs_per_cell</code> different values per quadrature point, but
1813 * we extract it <code>dofs_per_cell*dofs_per_cell</code> times from the
1814 * FEValues object - for both the loop over <code>i</code> and the inner
1815 * loop over <code>j</code>. In 3d, that means evaluating it @f$89^2=7921@f$
1816 * instead of @f$89@f$ times, a not insignificant difference.
1817 *
1818
1819 *
1820 * So what we're going to do here is to avoid such repeated calculations
1821 * by getting a vector of rank-2 tensors (and similarly for the divergence
1822 * and the basis function value on pressure) at the quadrature point prior
1823 * to starting the loop over the dofs on the cell. First, we create the
1824 * respective objects that will hold these values. Then, we start the loop
1825 * over all cells and the loop over the quadrature points, where we first
1826 * extract these values. There is one more optimization we implement here:
1827 * the local matrix (as well as the global one) is going to be symmetric,
1828 * since all the operations involved are symmetric with respect to @f$i@f$ and
1829 * @f$j@f$. This is implemented by simply running the inner loop not to
1830 * <code>dofs_per_cell</code>, but only up to <code>i</code>, the index of
1831 * the outer loop.
1832 *
1833 * @code
1834 *   std::vector<SymmetricTensor<2, dim>> symgrad_phi_u(dofs_per_cell);
1835 *   std::vector<double> div_phi_u(dofs_per_cell);
1836 *   std::vector<Tensor<1, dim>> phi_u(dofs_per_cell);
1837 *   std::vector<double> phi_p(dofs_per_cell);
1838 *  
1839 *   for (const auto &cell : dof_handler.active_cell_iterators())
1840 *   {
1841 *   fe_values.reinit(cell);
1842 *  
1843 *   local_matrix = 0;
1844 *   local_preconditioner_matrix = 0;
1845 *   local_rhs = 0;
1846 *  
1847 *   right_hand_side.value_list(fe_values.get_quadrature_points(),
1848 *   rhs_values);
1849 *  
1850 *   for (unsigned int q = 0; q < n_q_points; ++q)
1851 *   {
1852 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
1853 *   {
1854 *   symgrad_phi_u[k] =
1855 *   fe_values[velocities].symmetric_gradient(k, q);
1856 *   div_phi_u[k] = fe_values[velocities].divergence(k, q);
1857 *   phi_u[k] = fe_values[velocities].value(k, q);
1858 *   phi_p[k] = fe_values[pressure].value(k, q);
1859 *   }
1860 *  
1861 * @endcode
1862 *
1863 * Now finally for the bilinear forms of both the system matrix and
1864 * the matrix we use for the preconditioner. Recall that the
1865 * formulas for these two are
1866 * @f{align*}{
1867 * A_{ij} &= a(\varphi_i,\varphi_j)
1868 * \\ &= \underbrace{2(\varepsilon(\varphi_{i,\textbf{u}}),
1869 * \varepsilon(\varphi_{j,\textbf{u}}))_{\Omega}}
1870 * _{(1)}
1871 * \;
1872 * \underbrace{- (\textrm{div}\; \varphi_{i,\textbf{u}},
1873 * \varphi_{j,p})_{\Omega}}
1874 * _{(2)}
1875 * \;
1876 * \underbrace{- (\varphi_{i,p},
1877 * \textrm{div}\;
1878 * \varphi_{j,\textbf{u}})_{\Omega}}
1879 * _{(3)}
1880 * @f}
1881 * and
1882 * @f{align*}{
1883 * M_{ij} &= \underbrace{(\varphi_{i,p},
1884 * \varphi_{j,p})_{\Omega}}
1885 * _{(4)},
1886 * @f}
1887 * respectively, where @f$\varphi_{i,\textbf{u}}@f$ and @f$\varphi_{i,p}@f$
1888 * are the velocity and pressure components of the @f$i@f$th shape
1889 * function. The various terms above are then easily recognized in
1890 * the following implementation:
1891 *
1892 * @code
1893 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
1894 *   {
1895 *   for (unsigned int j = 0; j <= i; ++j)
1896 *   {
1897 *   local_matrix(i, j) +=
1898 *   (2 * (symgrad_phi_u[i] * symgrad_phi_u[j]) // (1)
1899 *   - div_phi_u[i] * phi_p[j] // (2)
1900 *   - phi_p[i] * div_phi_u[j]) // (3)
1901 *   * fe_values.JxW(q); // * dx
1902 *  
1903 *   local_preconditioner_matrix(i, j) +=
1904 *   (phi_p[i] * phi_p[j]) // (4)
1905 *   * fe_values.JxW(q); // * dx
1906 *   }
1907 * @endcode
1908 *
1909 * Note that in the implementation of (1) above, `operator*`
1910 * is overloaded for symmetric tensors, yielding the scalar
1911 * product between the two tensors.
1912 *
1913
1914 *
1915 * For the right-hand side, we need to multiply the (vector of)
1916 * velocity shape functions with the vector of body force
1917 * right-hand side components, both evaluated at the current
1918 * quadrature point. We have implemented the body forces as a
1919 * `TensorFunction<1,dim>`, so its values at quadrature points
1920 * are already tensors for which the application of `operator*`
1921 * against the velocity components of the shape function results
1922 * in the dot product, as intended.
1923 *
1924 * @code
1925 *   local_rhs(i) += phi_u[i] // phi_u_i(x_q)
1926 *   * rhs_values[q] // * f(x_q)
1927 *   * fe_values.JxW(q); // * dx
1928 *   }
1929 *   }
1930 *  
1931 * @endcode
1932 *
1933 * Before we can write the local data into the global matrix (and
1934 * simultaneously use the AffineConstraints object to apply
1935 * Dirichlet boundary conditions and eliminate hanging node constraints,
1936 * as we discussed in the introduction), we have to be careful about one
1937 * thing, though. We have only built half of the local matrices
1938 * because of symmetry, but we're going to save the full matrices
1939 * in order to use the standard functions for solving. This is done
1940 * by flipping the indices in case we are pointing into the empty part
1941 * of the local matrices.
1942 *
1943 * @code
1944 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
1945 *   for (unsigned int j = i + 1; j < dofs_per_cell; ++j)
1946 *   {
1947 *   local_matrix(i, j) = local_matrix(j, i);
1948 *   local_preconditioner_matrix(i, j) =
1949 *   local_preconditioner_matrix(j, i);
1950 *   }
1951 *  
1952 *   cell->get_dof_indices(local_dof_indices);
1953 *   constraints.distribute_local_to_global(local_matrix,
1954 *   local_rhs,
1955 *   local_dof_indices,
1956 *   system_matrix,
1957 *   system_rhs);
1958 *   constraints.distribute_local_to_global(local_preconditioner_matrix,
1959 *   local_dof_indices,
1960 *   preconditioner_matrix);
1961 *   }
1962 *  
1963 * @endcode
1964 *
1965 * Before we're going to solve this linear system, we generate a
1966 * preconditioner for the velocity-velocity matrix, i.e.,
1967 * <code>block(0,0)</code> in the system matrix. As mentioned above, this
1968 * depends on the spatial dimension. Since the two classes described by
1969 * the <code>InnerPreconditioner::type</code> alias have the same
1970 * interface, we do not have to do anything different whether we want to
1971 * use a sparse direct solver or an ILU:
1972 *
1973 * @code
1974 *   std::cout << " Computing preconditioner..." << std::endl << std::flush;
1975 *  
1976 *   A_preconditioner =
1977 *   std::make_shared<typename InnerPreconditioner<dim>::type>();
1978 *   A_preconditioner->initialize(
1979 *   system_matrix.block(0, 0),
1980 *   typename InnerPreconditioner<dim>::type::AdditionalData());
1981 *   }
1982 *  
1983 *  
1984 *  
1985 * @endcode
1986 *
1987 *
1988 * <a name="step_22-StokesProblemsolve"></a>
1989 * <h4>StokesProblem::solve</h4>
1990 *
1991
1992 *
1993 * After the discussion in the introduction and the definition of the
1994 * respective classes above, the implementation of the <code>solve</code>
1995 * function is rather straight-forward and done in a similar way as in
1996 * @ref step_20 "step-20". To start with, we need an object of the
1997 * <code>InverseMatrix</code> class that represents the inverse of the
1998 * matrix A. As described in the introduction, the inverse is generated with
1999 * the help of an inner preconditioner of type
2000 * <code>InnerPreconditioner::type</code>.
2001 *
2002 * @code
2003 *   template <int dim>
2004 *   void StokesProblem<dim>::solve()
2005 *   {
2006 *   const InverseMatrix<SparseMatrix<double>,
2007 *   typename InnerPreconditioner<dim>::type>
2008 *   A_inverse(system_matrix.block(0, 0), *A_preconditioner);
2009 *   Vector<double> tmp(solution.block(0).size());
2010 *  
2011 * @endcode
2012 *
2013 * This is as in @ref step_20 "step-20". We generate the right hand side @f$B A^{-1} F - G@f$
2014 * for the Schur complement and an object that represents the respective
2015 * linear operation @f$B A^{-1} B^T@f$, now with a template parameter
2016 * indicating the preconditioner - in accordance with the definition of
2017 * the class.
2018 *
2019 * @code
2020 *   {
2021 *   Vector<double> schur_rhs(solution.block(1).size());
2022 *   A_inverse.vmult(tmp, system_rhs.block(0));
2023 *   system_matrix.block(1, 0).vmult(schur_rhs, tmp);
2024 *   schur_rhs -= system_rhs.block(1);
2025 *  
2026 *   SchurComplement<typename InnerPreconditioner<dim>::type> schur_complement(
2027 *   system_matrix, A_inverse);
2028 *  
2029 * @endcode
2030 *
2031 * The usual control structures for the solver call are created...
2032 *
2033 * @code
2034 *   SolverControl solver_control(solution.block(1).size(),
2035 *   1e-6 * schur_rhs.l2_norm());
2036 *   SolverCG<Vector<double>> cg(solver_control);
2037 *  
2038 * @endcode
2039 *
2040 * Now to the preconditioner to the Schur complement. As explained in
2041 * the introduction, the preconditioning is done by a @ref GlossMassMatrix "mass matrix" in the
2042 * pressure variable.
2043 *
2044
2045 *
2046 * Actually, the solver needs to have the preconditioner in the form
2047 * @f$P^{-1}@f$, so we need to create an inverse operation. Once again, we
2048 * use an object of the class <code>InverseMatrix</code>, which
2049 * implements the <code>vmult</code> operation that is needed by the
2050 * solver. In this case, we have to invert the pressure mass matrix. As
2051 * it already turned out in earlier tutorial programs, the inversion of
2052 * a mass matrix is a rather cheap and straight-forward operation
2053 * (compared to, e.g., a Laplace matrix). The CG method with ILU
2054 * preconditioning converges in 5-10 steps, independently on the mesh
2055 * size. This is precisely what we do here: We choose another ILU
2056 * preconditioner and take it along to the InverseMatrix object via the
2057 * corresponding template parameter. A CG solver is then called within
2058 * the vmult operation of the inverse matrix.
2059 *
2060
2061 *
2062 * An alternative that is cheaper to build, but needs more iterations
2063 * afterwards, would be to choose a SSOR preconditioner with factor
2064 * 1.2. It needs about twice the number of iterations, but the costs for
2065 * its generation are almost negligible.
2066 *
2067 * @code
2068 *   SparseILU<double> preconditioner;
2069 *   preconditioner.initialize(preconditioner_matrix.block(1, 1),
2070 *   SparseILU<double>::AdditionalData());
2071 *  
2072 *   InverseMatrix<SparseMatrix<double>, SparseILU<double>> m_inverse(
2073 *   preconditioner_matrix.block(1, 1), preconditioner);
2074 *  
2075 * @endcode
2076 *
2077 * With the Schur complement and an efficient preconditioner at hand, we
2078 * can solve the respective equation for the pressure (i.e. block 0 in
2079 * the solution vector) in the usual way:
2080 *
2081 * @code
2082 *   cg.solve(schur_complement, solution.block(1), schur_rhs, m_inverse);
2083 *  
2084 * @endcode
2085 *
2086 * After this first solution step, the hanging node constraints have to
2087 * be distributed to the solution in order to achieve a consistent
2088 * pressure field.
2089 *
2090 * @code
2091 *   constraints.distribute(solution);
2092 *  
2093 *   std::cout << " " << solver_control.last_step()
2094 *   << " outer CG Schur complement iterations for pressure"
2095 *   << std::endl;
2096 *   }
2097 *  
2098 * @endcode
2099 *
2100 * As in @ref step_20 "step-20", we finally need to solve for the velocity equation where
2101 * we plug in the solution to the pressure equation. This involves only
2102 * objects we already know - so we simply multiply @f$p@f$ by @f$B^T@f$, subtract
2103 * the right hand side and multiply by the inverse of @f$A@f$. At the end, we
2104 * need to distribute the constraints from hanging nodes in order to
2105 * obtain a consistent flow field:
2106 *
2107 * @code
2108 *   {
2109 *   system_matrix.block(0, 1).vmult(tmp, solution.block(1));
2110 *   tmp *= -1;
2111 *   tmp += system_rhs.block(0);
2112 *  
2113 *   A_inverse.vmult(solution.block(0), tmp);
2114 *  
2115 *   constraints.distribute(solution);
2116 *   }
2117 *   }
2118 *  
2119 *  
2120 * @endcode
2121 *
2122 *
2123 * <a name="step_22-StokesProblemoutput_results"></a>
2124 * <h4>StokesProblem::output_results</h4>
2125 *
2126
2127 *
2128 * The next function generates graphical output. In this example, we are
2129 * going to use the VTK file format. We attach names to the individual
2130 * variables in the problem: <code>velocity</code> to the <code>dim</code>
2131 * components of velocity and <code>pressure</code> to the pressure.
2132 *
2133
2134 *
2135 * Not all visualization programs have the ability to group individual
2136 * vector components into a vector to provide vector plots; in particular,
2137 * this holds for some VTK-based visualization programs. In this case, the
2138 * logical grouping of components into vectors should already be described
2139 * in the file containing the data. In other words, what we need to do is
2140 * provide our output writers with a way to know which of the components of
2141 * the finite element logically form a vector (with @f$d@f$ components in @f$d@f$
2142 * space dimensions) rather than letting them assume that we simply have a
2143 * bunch of scalar fields. This is achieved using the members of the
2144 * <code>DataComponentInterpretation</code> namespace: as with the filename,
2145 * we create a vector in which the first <code>dim</code> components refer
2146 * to the velocities and are given the tag
2147 * DataComponentInterpretation::component_is_part_of_vector; we
2148 * finally push one tag
2149 * DataComponentInterpretation::component_is_scalar to describe
2150 * the grouping of the pressure variable.
2151 *
2152
2153 *
2154 * The rest of the function is then the same as in @ref step_20 "step-20".
2155 *
2156 * @code
2157 *   template <int dim>
2158 *   void
2159 *   StokesProblem<dim>::output_results(const unsigned int refinement_cycle) const
2160 *   {
2161 *   std::vector<std::string> solution_names(dim, "velocity");
2162 *   solution_names.emplace_back("pressure");
2163 *  
2164 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
2165 *   data_component_interpretation(
2166 *   dim, DataComponentInterpretation::component_is_part_of_vector);
2167 *   data_component_interpretation.push_back(
2168 *   DataComponentInterpretation::component_is_scalar);
2169 *  
2170 *   DataOut<dim> data_out;
2171 *   data_out.attach_dof_handler(dof_handler);
2172 *   data_out.add_data_vector(solution,
2173 *   solution_names,
2174 *   DataOut<dim>::type_dof_data,
2175 *   data_component_interpretation);
2176 *   data_out.build_patches();
2177 *  
2178 *   std::ofstream output(
2179 *   "solution-" + Utilities::int_to_string(refinement_cycle, 2) + ".vtk");
2180 *   data_out.write_vtk(output);
2181 *   }
2182 *  
2183 *  
2184 * @endcode
2185 *
2186 *
2187 * <a name="step_22-StokesProblemrefine_mesh"></a>
2188 * <h4>StokesProblem::refine_mesh</h4>
2189 *
2190
2191 *
2192 * This is the last interesting function of the <code>StokesProblem</code>
2193 * class. As indicated by its name, it takes the solution to the problem
2194 * and refines the mesh where this is needed. The procedure is the same as
2195 * in the respective step in @ref step_6 "step-6", with the exception that we base the
2196 * refinement only on the change in pressure, i.e., we call the Kelly error
2197 * estimator with a mask object of type ComponentMask that selects the
2198 * single scalar component for the pressure that we are interested in (we
2199 * get such a mask from the finite element class by specifying the component
2200 * we want). Additionally, we do not coarsen the grid again:
2201 *
2202 * @code
2203 *   template <int dim>
2204 *   void StokesProblem<dim>::refine_mesh()
2205 *   {
2206 *   Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
2207 *  
2208 *   const FEValuesExtractors::Scalar pressure(dim);
2209 *   KellyErrorEstimator<dim>::estimate(
2210 *   dof_handler,
2211 *   QGauss<dim - 1>(degree + 1),
2212 *   std::map<types::boundary_id, const Function<dim> *>(),
2213 *   solution,
2214 *   estimated_error_per_cell,
2215 *   fe.component_mask(pressure));
2216 *  
2217 *   GridRefinement::refine_and_coarsen_fixed_number(triangulation,
2218 *   estimated_error_per_cell,
2219 *   0.3,
2220 *   0.0);
2221 *   triangulation.execute_coarsening_and_refinement();
2222 *   }
2223 *  
2224 *  
2225 * @endcode
2226 *
2227 *
2228 * <a name="step_22-StokesProblemrun"></a>
2229 * <h4>StokesProblem::run</h4>
2230 *
2231
2232 *
2233 * The last step in the Stokes class is, as usual, the function that
2234 * generates the initial grid and calls the other functions in the
2235 * respective order.
2236 *
2237
2238 *
2239 * We start off with a rectangle of size @f$4 \times 1@f$ (in 2d) or @f$4 \times 1
2240 * \times 1@f$ (in 3d), placed in @f$R^2/R^3@f$ as @f$(-2,2)\times(-1,0)@f$ or
2241 * @f$(-2,2)\times(0,1)\times(-1,0)@f$, respectively. It is natural to start
2242 * with equal mesh size in each direction, so we subdivide the initial
2243 * rectangle four times in the first coordinate direction. To limit the
2244 * scope of the variables involved in the creation of the mesh to the range
2245 * where we actually need them, we put the entire block between a pair of
2246 * braces:
2247 *
2248 * @code
2249 *   template <int dim>
2250 *   void StokesProblem<dim>::run()
2251 *   {
2252 *   {
2253 *   std::vector<unsigned int> subdivisions(dim, 1);
2254 *   subdivisions[0] = 4;
2255 *  
2256 *   const Point<dim> bottom_left = (dim == 2 ?
2257 *   Point<dim>(-2, -1) : // 2d case
2258 *   Point<dim>(-2, 0, -1)); // 3d case
2259 *  
2260 *   const Point<dim> top_right = (dim == 2 ?
2261 *   Point<dim>(2, 0) : // 2d case
2262 *   Point<dim>(2, 1, 0)); // 3d case
2263 *  
2264 *   GridGenerator::subdivided_hyper_rectangle(triangulation,
2265 *   subdivisions,
2266 *   bottom_left,
2267 *   top_right);
2268 *   }
2269 *  
2270 * @endcode
2271 *
2272 * A boundary indicator of 1 is set to all boundaries that are subject to
2273 * Dirichlet boundary conditions, i.e. to faces that are located at 0 in
2274 * the last coordinate direction. See the example description above for
2275 * details.
2276 *
2277 * @code
2278 *   for (const auto &cell : triangulation.active_cell_iterators())
2279 *   for (const auto &face : cell->face_iterators())
2280 *   if (face->center()[dim - 1] == 0)
2281 *   face->set_all_boundary_ids(1);
2282 *  
2283 *  
2284 * @endcode
2285 *
2286 * We then apply an initial refinement before solving for the first
2287 * time. In 3d, there are going to be more degrees of freedom, so we
2288 * refine less there:
2289 *
2290 * @code
2291 *   triangulation.refine_global(4 - dim);
2292 *  
2293 * @endcode
2294 *
2295 * As first seen in @ref step_6 "step-6", we cycle over the different refinement levels
2296 * and refine (except for the first cycle), setup the degrees of freedom
2297 * and matrices, assemble, solve and create output:
2298 *
2299 * @code
2300 *   for (unsigned int refinement_cycle = 0; refinement_cycle < 6;
2301 *   ++refinement_cycle)
2302 *   {
2303 *   std::cout << "Refinement cycle " << refinement_cycle << std::endl;
2304 *  
2305 *   if (refinement_cycle > 0)
2306 *   refine_mesh();
2307 *  
2308 *   setup_dofs();
2309 *  
2310 *   std::cout << " Assembling..." << std::endl << std::flush;
2311 *   assemble_system();
2312 *  
2313 *   std::cout << " Solving..." << std::flush;
2314 *   solve();
2315 *  
2316 *   output_results(refinement_cycle);
2317 *  
2318 *   std::cout << std::endl;
2319 *   }
2320 *   }
2321 *   } // namespace Step22
2322 *  
2323 *  
2324 * @endcode
2325 *
2326 *
2327 * <a name="step_22-Thecodemaincodefunction"></a>
2328 * <h3>The <code>main</code> function</h3>
2329 *
2330
2331 *
2332 * The main function is the same as in @ref step_20 "step-20". We pass the element degree as
2333 * a parameter and choose the space dimension at the well-known template slot.
2334 *
2335 * @code
2336 *   int main()
2337 *   {
2338 *   try
2339 *   {
2340 *   using namespace Step22;
2341 *  
2342 *   StokesProblem<2> flow_problem(1);
2343 *   flow_problem.run();
2344 *   }
2345 *   catch (std::exception &exc)
2346 *   {
2347 *   std::cerr << std::endl
2348 *   << std::endl
2349 *   << "----------------------------------------------------"
2350 *   << std::endl;
2351 *   std::cerr << "Exception on processing: " << std::endl
2352 *   << exc.what() << std::endl
2353 *   << "Aborting!" << std::endl
2354 *   << "----------------------------------------------------"
2355 *   << std::endl;
2356 *  
2357 *   return 1;
2358 *   }
2359 *   catch (...)
2360 *   {
2361 *   std::cerr << std::endl
2362 *   << std::endl
2363 *   << "----------------------------------------------------"
2364 *   << std::endl;
2365 *   std::cerr << "Unknown exception!" << std::endl
2366 *   << "Aborting!" << std::endl
2367 *   << "----------------------------------------------------"
2368 *   << std::endl;
2369 *   return 1;
2370 *   }
2371 *  
2372 *   return 0;
2373 *   }
2374 * @endcode
2375<a name="step_22-Results"></a><h1>Results</h1>
2376
2377
2378<a name="step_22-Outputoftheprogramandgraphicalvisualization"></a><h3>Output of the program and graphical visualization</h3>
2379
2380
2381<a name="step_22-2Dcalculations"></a><h4>2D calculations</h4>
2382
2383
2384Running the program with the space dimension set to 2 in the <code>main</code>
2385function yields the following output (in "release mode",
2386See also <a href="https://www.math.colostate.edu/~bangerth/videos.676.18.html">video lecture 18</a>.):
2387@code
2388examples/step-22> make run
2389Refinement cycle 0
2390 Number of active cells: 64
2391 Number of degrees of freedom: 679 (594+85)
2392 Assembling...
2393 Computing preconditioner...
2394 Solving... 11 outer CG Schur complement iterations for pressure
2395
2396Refinement cycle 1
2397 Number of active cells: 160
2398 Number of degrees of freedom: 1683 (1482+201)
2399 Assembling...
2400 Computing preconditioner...
2401 Solving... 11 outer CG Schur complement iterations for pressure
2402
2403Refinement cycle 2
2404 Number of active cells: 376
2405 Number of degrees of freedom: 3813 (3370+443)
2406 Assembling...
2407 Computing preconditioner...
2408 Solving... 11 outer CG Schur complement iterations for pressure
2409
2410Refinement cycle 3
2411 Number of active cells: 880
2412 Number of degrees of freedom: 8723 (7722+1001)
2413 Assembling...
2414 Computing preconditioner...
2415 Solving... 11 outer CG Schur complement iterations for pressure
2416
2417Refinement cycle 4
2418 Number of active cells: 2008
2419 Number of degrees of freedom: 19383 (17186+2197)
2420 Assembling...
2421 Computing preconditioner...
2422 Solving... 11 outer CG Schur complement iterations for pressure
2423
2424Refinement cycle 5
2425 Number of active cells: 4288
2426 Number of degrees of freedom: 40855 (36250+4605)
2427 Assembling...
2428 Computing preconditioner...
2429 Solving... 11 outer CG Schur complement iterations for pressure
2430@endcode
2431
2432The entire computation above takes about 2 seconds on a reasonably
2433quick (for 2015 standards) machine.
2434
2435What we see immediately from this is that the number of (outer)
2436iterations does not increase as we refine the mesh. This confirms the
2437statement in the introduction that preconditioning the Schur
2438complement with the mass matrix indeed yields a matrix spectrally
2439equivalent to the identity matrix (i.e. with eigenvalues bounded above
2440and below independently of the mesh size or the relative sizes of
2441cells). In other words, the mass matrix and the Schur complement are
2442spectrally equivalent.
2443
2444In the images below, we show the grids for the first six refinement
2445steps in the program. Observe how the grid is refined in regions
2446where the solution rapidly changes: On the upper boundary, we have
2447Dirichlet boundary conditions that are -1 in the left half of the line
2448and 1 in the right one, so there is an abrupt change at @f$x=0@f$. Likewise,
2449there are changes from Dirichlet to Neumann data in the two upper
2450corners, so there is need for refinement there as well:
2451
2452<table width="60%" align="center">
2453 <tr>
2454 <td align="center">
2455 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-0.png" alt="">
2456 </td>
2457 <td align="center">
2458 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-1.png" alt="">
2459 </td>
2460 </tr>
2461 <tr>
2462 <td align="center">
2463 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-2.png" alt="">
2464 </td>
2465 <td align="center">
2466 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-3.png" alt="">
2467 </td>
2468 </tr>
2469 <tr>
2470 <td align="center">
2471 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-4.png" alt="">
2472 </td>
2473 <td align="center">
2474 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-5.png" alt="">
2475 </td>
2476 </tr>
2477</table>
2478
2479Finally, following is a plot of the flow field. It shows fluid
2480transported along with the moving upper boundary and being replaced by
2481material coming from below:
2482
2483<img src="https://www.dealii.org/images/steps/developer/step-22.2d.solution.png" alt="">
2484
2485This plot uses the capability of VTK-based visualization programs (in
2486this case of VisIt) to show vector data; this is the result of us
2487declaring the velocity components of the finite element in use to be a
2488set of vector components, rather than independent scalar components in
2489the <code>StokesProblem@<dim@>::%output_results</code> function of this
2490tutorial program.
2491
2492
2493
2494<a name="step_22-3Dcalculations"></a><h4>3D calculations</h4>
2495
2496
2497In 3d, the screen output of the program looks like this:
2498
2499@code
2500Refinement cycle 0
2501 Number of active cells: 32
2502 Number of degrees of freedom: 1356 (1275+81)
2503 Assembling...
2504 Computing preconditioner...
2505 Solving... 13 outer CG Schur complement iterations for pressure.
2506
2507Refinement cycle 1
2508 Number of active cells: 144
2509 Number of degrees of freedom: 5088 (4827+261)
2510 Assembling...
2511 Computing preconditioner...
2512 Solving... 14 outer CG Schur complement iterations for pressure.
2513
2514Refinement cycle 2
2515 Number of active cells: 704
2516 Number of degrees of freedom: 22406 (21351+1055)
2517 Assembling...
2518 Computing preconditioner...
2519 Solving... 14 outer CG Schur complement iterations for pressure.
2520
2521Refinement cycle 3
2522 Number of active cells: 3168
2523 Number of degrees of freedom: 93176 (89043+4133)
2524 Assembling...
2525 Computing preconditioner...
2526 Solving... 15 outer CG Schur complement iterations for pressure.
2527
2528Refinement cycle 4
2529 Number of active cells: 11456
2530 Number of degrees of freedom: 327808 (313659+14149)
2531 Assembling...
2532 Computing preconditioner...
2533 Solving... 15 outer CG Schur complement iterations for pressure.
2534
2535Refinement cycle 5
2536 Number of active cells: 45056
2537 Number of degrees of freedom: 1254464 (1201371+53093)
2538 Assembling...
2539 Computing preconditioner...
2540 Solving... 14 outer CG Schur complement iterations for pressure.
2541@endcode
2542
2543Again, we see that the number of outer iterations does not increase as
2544we refine the mesh. Nevertheless, the compute time increases
2545significantly: for each of the iterations above separately, it takes about
25460.14 seconds, 0.63 seconds, 4.8 seconds, 35 seconds, 2 minutes and 33 seconds,
2547and 13 minutes and 12 seconds. This overall superlinear (in the number of
2548unknowns) increase in runtime is due to the fact that our inner solver is not
2549@f${\cal O}(N)@f$: a simple experiment shows that as we keep refining the mesh, the
2550average number of ILU-preconditioned CG iterations to invert the
2551velocity-velocity block @f$A@f$ increases.
2552
2553We will address the question of how possibly to improve our solver
2554@ref step_22-ImprovedSolver "below".
2555
2556As for the graphical output, the grids generated during the solution
2557look as follow:
2558
2559<table width="60%" align="center">
2560 <tr>
2561 <td align="center">
2562 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-0.png" alt="">
2563 </td>
2564 <td align="center">
2565 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-1.png" alt="">
2566 </td>
2567 </tr>
2568 <tr>
2569 <td align="center">
2570 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-2.png" alt="">
2571 </td>
2572 <td align="center">
2573 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-3.png" alt="">
2574 </td>
2575 </tr>
2576 <tr>
2577 <td align="center">
2578 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-4.png" alt="">
2579 </td>
2580 <td align="center">
2581 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-5.png" alt="">
2582 </td>
2583 </tr>
2584</table>
2585
2586Again, they show essentially the location of singularities introduced
2587by boundary conditions. The vector field computed makes for an
2588interesting graph:
2589
2590<img src="https://www.dealii.org/images/steps/developer/step-22.3d.solution.png" alt="">
2591
2592The isocontours shown here as well are those of the pressure
2593variable, showing the singularity at the point of discontinuous
2594velocity boundary conditions.
2595
2596
2597
2598<a name="step_22-Sparsitypattern"></a><h3>Sparsity pattern</h3>
2599
2600
2601As explained during the generation of the sparsity pattern, it is
2602important to have the numbering of degrees of freedom in mind when
2603using preconditioners like incomplete LU decompositions. This is most
2604conveniently visualized using the distribution of nonzero elements in
2605the @ref GlossStiffnessMatrix "stiffness matrix".
2606
2607If we don't do anything special to renumber degrees of freedom (i.e.,
2608without using DoFRenumbering::Cuthill_McKee, but with using
2609DoFRenumbering::component_wise to ensure that degrees of freedom are
2610appropriately sorted into their corresponding blocks of the matrix and
2611vector), then we get the following image after the first adaptive
2612refinement in two dimensions:
2613
2614<img src="https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-nor.png" alt="">
2615
2616In order to generate such a graph, you have to insert a piece of
2617code like the following to the end of the setup step.
2618@code
2619 {
2620 std::ofstream out ("sparsity_pattern.gpl");
2621 sparsity_pattern.print_gnuplot(out);
2622 }
2623@endcode
2624
2625It is clearly visible that the nonzero entries are spread over almost the
2626whole matrix. This makes preconditioning by ILU inefficient: ILU generates a
2627Gaussian elimination (LU decomposition) without fill-in elements, which means
2628that more tentative fill-ins left out will result in a worse approximation of
2629the complete decomposition.
2630
2631In this program, we have thus chosen a more advanced renumbering of
2632components. The renumbering with DoFRenumbering::Cuthill_McKee and grouping
2633the components into velocity and pressure yields the following output:
2634
2635<img src="https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-ren.png" alt="">
2636
2637It is apparent that the situation has improved a lot. Most of the elements are
2638now concentrated around the diagonal in the (0,0) block in the matrix. Similar
2639effects are also visible for the other blocks. In this case, the ILU
2640decomposition will be much closer to the full LU decomposition, which improves
2641the quality of the preconditioner. (It may be interesting to note that the
2642sparse direct solver UMFPACK does some %internal renumbering of the equations
2643before actually generating a sparse LU decomposition; that procedure leads to
2644a very similar pattern to the one we got from the Cuthill-McKee algorithm.)
2645
2646Finally, we want to have a closer
2647look at a sparsity pattern in 3D. We show only the (0,0) block of the
2648matrix, again after one adaptive refinement. Apart from the fact that the matrix
2649size has increased, it is also visible that there are many more entries
2650in the matrix. Moreover, even for the optimized renumbering, there will be a
2651considerable amount of tentative fill-in elements. This illustrates why UMFPACK
2652is not a good choice in 3D - a full decomposition needs many new entries that
2653 eventually won't fit into the physical memory (RAM):
2654
2655<img src="https://www.dealii.org/images/steps/developer/step-22.3d.sparsity_uu-ren.png" alt="">
2656
2657
2658
2659<a name="step_22-Possibilitiesforextensions"></a><h3>Possibilities for extensions</h3>
2660
2661
2662@anchor step_22-ImprovedSolver
2663<a name="step_22-Improvedlinearsolverin3D"></a><h4>Improved linear solver in 3D</h4>
2664
2665</a>
2666
2667We have seen in the section of computational results that the number of outer
2668iterations does not depend on the mesh size, which is optimal in a sense of
2669scalability. This does, however, not apply to the solver as a whole, as
2670mentioned above:
2671We did not look at the number of inner iterations when generating the inverse of
2672the matrix @f$A@f$ and the mass matrix @f$M_p@f$. Of course, this is unproblematic in
2673the 2D case where we precondition @f$A@f$ with a direct solver and the
2674<code>vmult</code> operation of the inverse matrix structure will converge in
2675one single CG step, but this changes in 3D where we only use an ILU
2676preconditioner. There, the number of required preconditioned CG steps to
2677invert @f$A@f$ increases as the mesh is refined, and each <code>vmult</code>
2678operation involves on average approximately 14, 23, 36, 59, 75 and 101 inner
2679CG iterations in the refinement steps shown above. (On the other hand,
2680the number of iterations for applying the inverse pressure mass matrix is
2681always around five, both in two and three dimensions.) To summarize, most work
2682is spent on solving linear systems with the same matrix @f$A@f$ over and over again.
2683What makes this look even worse is the fact that we
2684actually invert a matrix that is about 95 percent the size of the total system
2685matrix and stands for 85 percent of the non-zero entries in the sparsity
2686pattern. Hence, the natural question is whether it is reasonable to solve a
2687linear system with matrix @f$A@f$ for about 15 times when calculating the solution
2688to the block system.
2689
2690The answer is, of course, that we can do that in a few other (most of the time
2691better) ways.
2692Nevertheless, it has to be remarked that an indefinite system as the one
2693at hand puts indeed much higher
2694demands on the linear algebra than standard elliptic problems as we have seen
2695in the early tutorial programs. The improvements are still rather
2696unsatisfactory, if one compares with an elliptic problem of similar
2697size. Either way, we will introduce below a number of improvements to the
2698linear solver, a discussion that we will re-consider again with additional
2699options in the @ref step_31 "step-31" program.
2700
2701@anchor step_22-ImprovedILU
2702<a name="step_22-BetterILUdecompositionbysmartreordering"></a><h5>Better ILU decomposition by smart reordering</h5>
2703
2704</a>
2705A first attempt to improve the speed of the linear solution process is to choose
2706a dof reordering that makes the ILU being closer to a full LU decomposition, as
2707already mentioned in the in-code comments. The DoFRenumbering namespace compares
2708several choices for the renumbering of dofs for the Stokes equations. The best
2709result regarding the computing time was found for the King ordering, which is
2710accessed through the call DoFRenumbering::boost::king_ordering. With that
2711program, the inner solver needs considerably less operations, e.g. about 62
2712inner CG iterations for the inversion of @f$A@f$ at cycle 4 compared to about 75
2713iterations with the standard Cuthill-McKee-algorithm. Also, the computing time
2714at cycle 4 decreased from about 17 to 11 minutes for the <code>solve()</code>
2715call. However, the King ordering (and the orderings provided by the
2716DoFRenumbering::boost namespace in general) has a serious drawback - it uses
2717much more memory than the in-build deal versions, since it acts on abstract
2718graphs rather than the geometry provided by the triangulation. In the present
2719case, the renumbering takes about 5 times as much memory, which yields an
2720infeasible algorithm for the last cycle in 3D with 1.2 million
2721unknowns.
2722
2723<a name="step_22-BetterpreconditionerfortheinnerCGsolver"></a><h5>Better preconditioner for the inner CG solver</h5>
2724
2725Another idea to improve the situation even more would be to choose a
2726preconditioner that makes CG for the (0,0) matrix @f$A@f$ converge in a
2727mesh-independent number of iterations, say 10 to 30. We have seen such a
2728candidate in @ref step_16 "step-16": multigrid.
2729
2730
2731@anchor step_22-BlockSchur
2732<a name="step_22-BlockSchurcomplementpreconditioner"></a><h5>Block Schur complement preconditioner</h5>
2733
2734Even with a good preconditioner for @f$A@f$, we still
2735need to solve of the same linear system repeatedly (with different
2736right hand sides, though) in order to make the Schur complement solve
2737converge. The approach we are going to discuss here is how inner iteration
2738and outer iteration can be combined. If we persist in calculating the Schur
2739complement, there is no other possibility.
2740
2741The alternative is to attack the block system at once and use an approximate
2742Schur complement as efficient preconditioner. The idea is as
2743follows: If we find a block preconditioner @f$P@f$ such that the matrix
2744@f{eqnarray*}{
2745 P^{-1}\left(\begin{array}{cc}
2746 A & B^T \\ B & 0
2747 \end{array}\right)
2748@f}
2749is simple, then an iterative solver with that preconditioner will converge in a
2750few iterations. Using the Schur complement @f$S = B A^{-1} B^T@f$, one finds that
2751@f{eqnarray*}{
2752 P^{-1}
2753 =
2754 \left(\begin{array}{cc}
2755 A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
2756 \end{array}\right)
2757@f}
2758would appear to be a good choice since
2759@f{eqnarray*}{
2760 P^{-1}\left(\begin{array}{cc}
2761 A & B^T \\ B & 0
2762 \end{array}\right)
2763 =
2764 \left(\begin{array}{cc}
2765 A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
2766 \end{array}\right)\cdot \left(\begin{array}{cc}
2767 A & B^T \\ B & 0
2768 \end{array}\right)
2769 =
2770 \left(\begin{array}{cc}
2771 I & A^{-1} B^T \\ 0 & I
2772 \end{array}\right).
2773@f}
2774This is the approach taken by the paper by Silvester and Wathen referenced
2775to in the introduction (with the exception that Silvester and Wathen use
2776right preconditioning). In this case, a Krylov-based iterative method would
2777converge in one step only if exact inverses of @f$A@f$ and @f$S@f$ were applied,
2778since all the eigenvalues are one (and the number of iterations in such a
2779method is bounded by the number of distinct eigenvalues). Below, we will
2780discuss the choice of an adequate solver for this problem. First, we are
2781going to have a closer look at the implementation of the preconditioner.
2782
2783Since @f$P@f$ is aimed to be a preconditioner only, we shall use approximations to
2784the inverse of the Schur complement @f$S@f$ and the matrix @f$A@f$. Hence, the Schur
2785complement will be approximated by the pressure mass matrix @f$M_p@f$, and we use
2786a preconditioner to @f$A@f$ (without an InverseMatrix class around it) for
2787approximating @f$A^{-1}@f$.
2788
2789Here comes the class that implements the block Schur
2790complement preconditioner. The <code>vmult</code> operation for block vectors
2791according to the derivation above can be specified by three successive
2792operations:
2793@code
2794template <class PreconditionerA, class PreconditionerMp>
2795class BlockSchurPreconditioner : public EnableObserverPointer
2796{
2797 public:
2798 BlockSchurPreconditioner (const BlockSparseMatrix<double> &S,
2799 const InverseMatrix<SparseMatrix<double>,PreconditionerMp> &Mpinv,
2800 const PreconditionerA &Apreconditioner);
2801
2802 void vmult (BlockVector<double> &dst,
2803 const BlockVector<double> &src) const;
2804
2805 private:
2806 const ObserverPointer<const BlockSparseMatrix<double> > system_matrix;
2807 const ObserverPointer<const InverseMatrix<SparseMatrix<double>,
2808 PreconditionerMp > > m_inverse;
2809 const PreconditionerA &a_preconditioner;
2810
2811 mutable Vector<double> tmp;
2812
2813};
2814
2815template <class PreconditionerA, class PreconditionerMp>
2816BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::BlockSchurPreconditioner(
2817 const BlockSparseMatrix<double> &S,
2818 const InverseMatrix<SparseMatrix<double>,PreconditionerMp> &Mpinv,
2819 const PreconditionerA &Apreconditioner
2820 )
2821 :
2822 system_matrix (&S),
2823 m_inverse (&Mpinv),
2824 a_preconditioner (Apreconditioner),
2825 tmp (S.block(1,1).m())
2826{}
2827
2828 // Now the interesting function, the multiplication of
2829 // the preconditioner with a BlockVector.
2830template <class PreconditionerA, class PreconditionerMp>
2831void BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::vmult (
2833 const BlockVector<double> &src) const
2834{
2835 // Form u_new = A^{-1} u
2836 a_preconditioner.vmult (dst.block(0), src.block(0));
2837 // Form tmp = - B u_new + p
2838 // (<code>SparseMatrix::residual</code>
2839 // does precisely this)
2840 system_matrix->block(1,0).residual(tmp, dst.block(0), src.block(1));
2841 // Change sign in tmp
2842 tmp *= -1;
2843 // Multiply by approximate Schur complement
2844 // (i.e. a pressure mass matrix)
2845 m_inverse->vmult (dst.block(1), tmp);
2846}
2847@endcode
2848
2849Since we act on the whole block system now, we have to live with one
2850disadvantage: we need to perform the solver iterations on
2851the full block system instead of the smaller pressure space.
2852
2853Now we turn to the question which solver we should use for the block
2854system. The first observation is that the resulting preconditioned matrix cannot
2855be solved with CG since it is neither positive definite nor symmetric.
2856
2857The deal.II libraries implement several solvers that are appropriate for the
2858problem at hand. One choice is the solver @ref SolverBicgstab "BiCGStab", which
2859was used for the solution of the unsymmetric advection problem in @ref step_9 "step-9". The
2860second option, the one we are going to choose, is @ref SolverGMRES "GMRES"
2861(generalized minimum residual). Both methods have their pros and cons - there
2862are problems where one of the two candidates clearly outperforms the other, and
2863vice versa.
2864<a href="http://en.wikipedia.org/wiki/GMRES#Comparison_with_other_solvers">Wikipedia</a>'s
2865article on the GMRES method gives a comparative presentation.
2866A more comprehensive and well-founded comparison can be read e.g. in the book by
2867J.W. Demmel (Applied Numerical Linear Algebra, SIAM, 1997, section 6.6.6).
2868
2869For our specific problem with the ILU preconditioner for @f$A@f$, we certainly need
2870to perform hundreds of iterations on the block system for large problem sizes
2871(we won't beat CG!). Actually, this disfavors GMRES: During the GMRES
2872iterations, a basis of Krylov vectors is successively built up and some
2873operations are performed on these vectors. The more vectors are in this basis,
2874the more operations and memory will be needed. The number of operations scales
2875as @f${\cal O}(n + k^2)@f$ and memory as @f${\cal O}(kn)@f$, where @f$k@f$ is the number of
2876vectors in the Krylov basis and @f$n@f$ the size of the (block) matrix.
2877To not let these demands grow excessively, deal.II limits the size @f$k@f$ of the
2878basis to 30 vectors by default.
2879Then, the basis is rebuilt. This implementation of the GMRES method is called
2880GMRES(k), with default @f$k=30@f$. What we have gained by this restriction,
2881namely a bound on operations and memory requirements, will be compensated by
2882the fact that we use an incomplete basis - this will increase the number of
2883required iterations.
2884
2885BiCGStab, on the other hand, won't get slower when many iterations are needed
2886(one iteration uses only results from one preceding step and
2887not all the steps as GMRES). Besides the fact the BiCGStab is more expensive per
2888step since two matrix-vector products are needed (compared to one for
2889CG or GMRES), there is one main reason which makes BiCGStab not appropriate for
2890this problem: The preconditioner applies the inverse of the pressure
2891mass matrix by using the InverseMatrix class. Since the application of the
2892inverse matrix to a vector is done only in approximative way (an exact inverse
2893is too expensive), this will also affect the solver. In the case of BiCGStab,
2894the Krylov vectors will not be orthogonal due to that perturbation. While
2895this is uncritical for a small number of steps (up to about 50), it ruins the
2896performance of the solver when these perturbations have grown to a significant
2897magnitude in the coarse of iterations.
2898
2899We did some experiments with BiCGStab and found it to
2900be faster than GMRES up to refinement cycle 3 (in 3D), but it became very slow
2901for cycles 4 and 5 (even slower than the original Schur complement), so the
2902solver is useless in this situation. Choosing a sharper tolerance for the
2903inverse matrix class (<code>1e-10*src.l2_norm()</code> instead of
2904<code>1e-6*src.l2_norm()</code>) made BiCGStab perform well also for cycle 4,
2905but did not change the failure on the very large problems.
2906
2907GMRES is of course also effected by the approximate inverses, but it is not as
2908sensitive to orthogonality and retains a relatively good performance also for
2909large sizes, see the results below.
2910
2911With this said, we turn to the realization of the solver call with GMRES with
2912@f$k=100@f$ temporary vectors:
2913
2914@code
2915 const SparseMatrix<double> &pressure_mass_matrix
2916 = preconditioner_matrix.block(1,1);
2917 SparseILU<double> pmass_preconditioner;
2918 pmass_preconditioner.initialize (pressure_mass_matrix,
2919 SparseILU<double>::AdditionalData());
2920
2921 InverseMatrix<SparseMatrix<double>,SparseILU<double> >
2922 m_inverse (pressure_mass_matrix, pmass_preconditioner);
2923
2924 BlockSchurPreconditioner<typename InnerPreconditioner<dim>::type,
2925 SparseILU<double> >
2926 preconditioner (system_matrix, m_inverse, *A_preconditioner);
2927
2928 SolverControl solver_control (system_matrix.m(),
2929 1e-6*system_rhs.l2_norm());
2930 GrowingVectorMemory<BlockVector<double> > vector_memory;
2931 SolverGMRES<BlockVector<double> >::AdditionalData gmres_data;
2932 gmres_data.max_basis_size = 100;
2933
2934 SolverGMRES<BlockVector<double> > gmres(solver_control, vector_memory,
2935 gmres_data);
2936
2937 gmres.solve(system_matrix, solution, system_rhs,
2938 preconditioner);
2939
2940 constraints.distribute (solution);
2941
2942 std::cout << " "
2943 << solver_control.last_step()
2944 << " block GMRES iterations";
2945@endcode
2946
2947Obviously, one needs to add the include file @ref SolverGMRES
2948"<lac/solver_gmres.h>" in order to make this run.
2949We call the solver with a BlockVector template in order to enable
2950GMRES to operate on block vectors and matrices.
2951Note also that we need to set the (1,1) block in the system
2952matrix to zero (we saved the pressure mass matrix there which is not part of the
2953problem) after we copied the information to another matrix.
2954
2955Using the Timer class, we collect some statistics that compare the runtime
2956of the block solver with the one from the problem implementation above.
2957Besides the solution with the two options we also check if the solutions
2958of the two variants are close to each other (i.e. this solver gives indeed the
2959same solution as we had before) and calculate the infinity
2960norm of the vector difference.
2961
2962Let's first see the results in 2D:
2963@code
2964Refinement cycle 0
2965 Number of active cells: 64
2966 Number of degrees of freedom: 679 (594+85) [0.00162792 s]
2967 Assembling... [0.00108981 s]
2968 Computing preconditioner... [0.0025959 s]
2969 Solving...
2970 Schur complement: 11 outer CG iterations for p [0.00479603s ]
2971 Block Schur preconditioner: 12 GMRES iterations [0.00441718 s]
2972 l_infinity difference between solution vectors: 5.38258e-07
2973
2974Refinement cycle 1
2975 Number of active cells: 160
2976 Number of degrees of freedom: 1683 (1482+201) [0.00345707 s]
2977 Assembling... [0.00237417 s]
2978 Computing preconditioner... [0.00605702 s]
2979 Solving...
2980 Schur complement: 11 outer CG iterations for p [0.0123992s ]
2981 Block Schur preconditioner: 12 GMRES iterations [0.011909 s]
2982 l_infinity difference between solution vectors: 1.74658e-05
2983
2984Refinement cycle 2
2985 Number of active cells: 376
2986 Number of degrees of freedom: 3813 (3370+443) [0.00729299 s]
2987 Assembling... [0.00529909 s]
2988 Computing preconditioner... [0.0167508 s]
2989 Solving...
2990 Schur complement: 11 outer CG iterations for p [0.031672s ]
2991 Block Schur preconditioner: 12 GMRES iterations [0.029232 s]
2992 l_infinity difference between solution vectors: 7.81569e-06
2993
2994Refinement cycle 3
2995 Number of active cells: 880
2996 Number of degrees of freedom: 8723 (7722+1001) [0.017709 s]
2997 Assembling... [0.0126002 s]
2998 Computing preconditioner... [0.0435679 s]
2999 Solving...
3000 Schur complement: 11 outer CG iterations for p [0.0971651s ]
3001 Block Schur preconditioner: 12 GMRES iterations [0.0992041 s]
3002 l_infinity difference between solution vectors: 1.87249e-05
3003
3004Refinement cycle 4
3005 Number of active cells: 2008
3006 Number of degrees of freedom: 19383 (17186+2197) [0.039988 s]
3007 Assembling... [0.028281 s]
3008 Computing preconditioner... [0.118314 s]
3009 Solving...
3010 Schur complement: 11 outer CG iterations for p [0.252133s ]
3011 Block Schur preconditioner: 13 GMRES iterations [0.269125 s]
3012 l_infinity difference between solution vectors: 6.38657e-05
3013
3014Refinement cycle 5
3015 Number of active cells: 4288
3016 Number of degrees of freedom: 40855 (36250+4605) [0.0880702 s]
3017 Assembling... [0.0603511 s]
3018 Computing preconditioner... [0.278339 s]
3019 Solving...
3020 Schur complement: 11 outer CG iterations for p [0.53846s ]
3021 Block Schur preconditioner: 13 GMRES iterations [0.578667 s]
3022 l_infinity difference between solution vectors: 0.000173363
3023@endcode
3024
3025We see that there is no huge difference in the solution time between the
3026block Schur complement preconditioner solver and the Schur complement
3027itself. The reason is simple: we used a direct solve as preconditioner for
3028@f$A@f$ - so we cannot expect any gain by avoiding the inner iterations. We see
3029that the number of iterations has slightly increased for GMRES, but all in
3030all the two choices are fairly similar.
3031
3032The picture of course changes in 3D:
3033
3034@code
3035Refinement cycle 0
3036 Number of active cells: 32
3037 Number of degrees of freedom: 1356 (1275+81) [0.00845218 s]
3038 Assembling... [0.019372 s]
3039 Computing preconditioner... [0.00712395 s]
3040 Solving...
3041 Schur complement: 13 outer CG iterations for p [0.0320101s ]
3042 Block Schur preconditioner: 22 GMRES iterations [0.0048759 s]
3043 l_infinity difference between solution vectors: 2.15942e-05
3044
3045Refinement cycle 1
3046 Number of active cells: 144
3047 Number of degrees of freedom: 5088 (4827+261) [0.0346942 s]
3048 Assembling... [0.0857739 s]
3049 Computing preconditioner... [0.0465031 s]
3050 Solving...
3051 Schur complement: 14 outer CG iterations for p [0.349258s ]
3052 Block Schur preconditioner: 35 GMRES iterations [0.048759 s]
3053 l_infinity difference between solution vectors: 1.77657e-05
3054
3055Refinement cycle 2
3056 Number of active cells: 704
3057 Number of degrees of freedom: 22406 (21351+1055) [0.175669 s]
3058 Assembling... [0.437447 s]
3059 Computing preconditioner... [0.286435 s]
3060 Solving...
3061 Schur complement: 14 outer CG iterations for p [3.65519s ]
3062 Block Schur preconditioner: 63 GMRES iterations [0.497787 s]
3063 l_infinity difference between solution vectors: 5.08078e-05
3064
3065Refinement cycle 3
3066 Number of active cells: 3168
3067 Number of degrees of freedom: 93176 (89043+4133) [0.790985 s]
3068 Assembling... [1.97598 s]
3069 Computing preconditioner... [1.4325 s]
3070 Solving...
3071 Schur complement: 15 outer CG iterations for p [29.9666s ]
3072 Block Schur preconditioner: 128 GMRES iterations [5.02645 s]
3073 l_infinity difference between solution vectors: 0.000119671
3074
3075Refinement cycle 4
3076 Number of active cells: 11456
3077 Number of degrees of freedom: 327808 (313659+14149) [3.44995 s]
3078 Assembling... [7.54772 s]
3079 Computing preconditioner... [5.46306 s]
3080 Solving...
3081 Schur complement: 15 outer CG iterations for p [139.987s ]
3082 Block Schur preconditioner: 255 GMRES iterations [38.0946 s]
3083 l_infinity difference between solution vectors: 0.00020793
3084
3085Refinement cycle 5
3086 Number of active cells: 45056
3087 Number of degrees of freedom: 1254464 (1201371+53093) [19.6795 s]
3088 Assembling... [28.6586 s]
3089 Computing preconditioner... [22.401 s]
3090 Solving...
3091 Schur complement: 14 outer CG iterations for p [796.767s ]
3092 Block Schur preconditioner: 524 GMRES iterations [355.597 s]
3093 l_infinity difference between solution vectors: 0.000501219
3094@endcode
3095
3096Here, the block preconditioned solver is clearly superior to the Schur
3097complement, but the advantage gets less for more mesh points. This is
3098because GMRES(k) scales worse with the problem size than CG, as we discussed
3099above. Nonetheless, the improvement by a factor of 3-6 for moderate problem
3100sizes is quite impressive.
3101
3102
3103<a name="step_22-Combiningtheblockpreconditionerandmultigrid"></a><h5>Combining the block preconditioner and multigrid</h5>
3104
3105An ultimate linear solver for this problem could be imagined as a
3106combination of an optimal
3107preconditioner for @f$A@f$ (e.g. multigrid) and the block preconditioner
3108described above, which is the approach taken in the @ref step_31 "step-31"
3109and @ref step_32 "step-32" tutorial programs (where we use an algebraic multigrid
3110method) and @ref step_56 "step-56" (where we use a geometric multigrid method).
3111
3112
3113<a name="step_22-Noblockmatricesandvectors"></a><h5>No block matrices and vectors</h5>
3114
3115Another possibility that can be taken into account is to not set up a block
3116system, but rather solve the system of velocity and pressure all at once. The
3117options are direct solve with UMFPACK (2D) or GMRES with ILU
3118preconditioning (3D). It should be straightforward to try that.
3119
3120
3121
3122<a name="step_22-Moreinterestingtestcases"></a><h4>More interesting testcases</h4>
3123
3124
3125The program can of course also serve as a basis to compute the flow in more
3126interesting cases. The original motivation to write this program was for it to
3127be a starting point for some geophysical flow problems, such as the
3128movement of magma under places where continental plates drift apart (for
3129example mid-ocean ridges). Of course, in such places, the geometry is more
3130complicated than the examples shown above, but it is not hard to accommodate
3131for that.
3132
3133For example, by using the following modification of the boundary values
3134function
3135@code
3136template <int dim>
3137double
3138BoundaryValues<dim>::value (const Point<dim> &p,
3139 const unsigned int component) const
3140{
3141 Assert (component < this->n_components,
3142 ExcIndexRange (component, 0, this->n_components));
3143
3144 const double x_offset = std::atan(p[1]*4)/3;
3145
3146 if (component == 0)
3147 return (p[0] < x_offset ? -1 : (p[0] > x_offset ? 1 : 0));
3148 return 0;
3149}
3150@endcode
3151and the following way to generate the mesh as the domain
3152@f$[-2,2]\times[-2,2]\times[-1,0]@f$
3153@code
3154 std::vector<unsigned int> subdivisions (dim, 1);
3155 subdivisions[0] = 4;
3156 if (dim>2)
3157 subdivisions[1] = 4;
3158
3159 const Point<dim> bottom_left = (dim == 2 ?
3160 Point<dim>(-2,-1) :
3161 Point<dim>(-2,-2,-1));
3162 const Point<dim> top_right = (dim == 2 ?
3163 Point<dim>(2,0) :
3164 Point<dim>(2,2,0));
3165
3167 subdivisions,
3168 bottom_left,
3169 top_right);
3170@endcode
3171then we get images where the fault line is curved:
3172<table width="60%" align="center">
3173 <tr>
3174 <td align="center">
3175 <img src="https://www.dealii.org/images/steps/developer/step-22.3d-extension.png" alt="">
3176 </td>
3177 <td align="center">
3178 <img src="https://www.dealii.org/images/steps/developer/step-22.3d-grid-extension.png" alt="">
3179 </td>
3180 </tr>
3181</table>
3182 *
3183 *
3184<a name="step_22-PlainProg"></a>
3185<h1> The plain program</h1>
3186@include "step-22.cc"
3187*/
BlockType & block(const unsigned int i)
Definition point.h:113
#define Assert(cond, exc)
void loop(IteratorType begin, std_cxx20::type_identity_t< IteratorType > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, AssemblerType &assembler, const LoopControl &lctrl=LoopControl())
Definition loop.h:564
const bool IsBlockVector< VectorType >::value
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternBase &sparsity_pattern, const AffineConstraints< number > &constraints={}, const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
const Event initial
Definition event.cc:68
void component_wise(DoFHandler< dim, spacedim > &dof_handler, const std::vector< unsigned int > &target_component=std::vector< unsigned int >())
void Cuthill_McKee(DoFHandler< dim, spacedim > &dof_handler, const bool reversed_numbering=false, const bool use_constraints=false, const std::vector< types::global_dof_index > &starting_indices=std::vector< types::global_dof_index >())
std::vector< types::global_dof_index > count_dofs_per_fe_block(const DoFHandler< dim, spacedim > &dof, const std::vector< unsigned int > &target_block=std::vector< unsigned int >())
std::vector< types::global_dof_index > count_dofs_per_fe_component(const DoFHandler< dim, spacedim > &dof_handler, const bool vector_valued_once=false, const std::vector< unsigned int > &target_component={})
void subdivided_hyper_rectangle(Triangulation< dim, spacedim > &tria, const std::vector< unsigned int > &repetitions, const Point< dim > &p1, const Point< dim > &p2, const bool colorize=false)
constexpr char O
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
@ diagonal
Matrix is diagonal.
constexpr char T
constexpr char A
constexpr types::blas_int one
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition utilities.cc:193
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
VectorType::value_type * end(VectorType &V)
void interpolate_boundary_values(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const std::map< types::boundary_id, const Function< spacedim, number > * > &function_map, std::map< types::global_dof_index, number > &boundary_values, const ComponentMask &component_mask={})
int(& functions)(const void *v1, const void *v2)
inline ::VectorizedArray< Number, width > atan(const ::VectorizedArray< Number, width > &x)
Definition types.h:32
unsigned int global_dof_index
Definition types.h:94
DEAL_II_HOST constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &)
std::array< Number, 1 > eigenvalues(const SymmetricTensor< 2, 1, Number > &T)
std_cxx20::type_identity< T > identity