deal.II version 9.7.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
step-32.h
Go to the documentation of this file.
1
1497 *   constexpr double kappa = 1e-6; /* m^2 / s */
1498 *   constexpr double reference_density = 3300; /* kg / m^3 */
1499 *   constexpr double reference_temperature = 293; /* K */
1500 *   constexpr double expansion_coefficient = 2e-5; /* 1/K */
1501 *   constexpr double specific_heat = 1250; /* J / K / kg */
1502 *   constexpr double radiogenic_heating = 7.4e-12; /* W / kg */
1503 *  
1504 *  
1505 *   constexpr double R0 = 6371000. - 2890000.; /* m */
1506 *   constexpr double R1 = 6371000. - 35000.; /* m */
1507 *  
1508 *   constexpr double T0 = 4000 + 273; /* K */
1509 *   constexpr double T1 = 700 + 273; /* K */
1510 *  
1511 *  
1512 * @endcode
1513 *
1514 * The next set of definitions are for functions that encode the density
1515 * as a function of temperature, the gravity vector, and the initial
1516 * values for the temperature. Again, all of these (along with the values
1517 * they compute) are discussed in the introduction:
1518 *
1519 * @code
1520 *   double density(const double temperature)
1521 *   {
1522 *   return (
1523 *   reference_density *
1524 *   (1 - expansion_coefficient * (temperature - reference_temperature)));
1525 *   }
1526 *  
1527 *  
1528 *   template <int dim>
1529 *   Tensor<1, dim> gravity_vector(const Point<dim> &p)
1530 *   {
1531 *   const double r = p.norm();
1532 *   return -(1.245e-6 * r + 7.714e13 / r / r) * p / r;
1533 *   }
1534 *  
1535 *  
1536 *  
1537 *   template <int dim>
1538 *   class TemperatureInitialValues : public Function<dim>
1539 *   {
1540 *   public:
1541 *   TemperatureInitialValues()
1542 *   : Function<dim>(1)
1543 *   {}
1544 *  
1545 *   virtual double value(const Point<dim> &p,
1546 *   const unsigned int component = 0) const override;
1547 *  
1548 *   virtual void vector_value(const Point<dim> &p,
1549 *   Vector<double> &value) const override;
1550 *   };
1551 *  
1552 *  
1553 *  
1554 *   template <int dim>
1555 *   double TemperatureInitialValues<dim>::value(const Point<dim> &p,
1556 *   const unsigned int) const
1557 *   {
1558 *   const double r = p.norm();
1559 *   const double h = R1 - R0;
1560 *  
1561 *   const double s = (r - R0) / h;
1562 *   const double q =
1563 *   (dim == 3) ? std::max(0.0, cos(numbers::PI * abs(p[2] / R1))) : 1.0;
1564 *   const double phi = std::atan2(p[0], p[1]);
1565 *   const double tau = s + 0.2 * s * (1 - s) * std::sin(6 * phi) * q;
1566 *  
1567 *   return T0 * (1.0 - tau) + T1 * tau;
1568 *   }
1569 *  
1570 *  
1571 *   template <int dim>
1572 *   void
1573 *   TemperatureInitialValues<dim>::vector_value(const Point<dim> &p,
1574 *   Vector<double> &values) const
1575 *   {
1576 *   for (unsigned int c = 0; c < this->n_components; ++c)
1577 *   values(c) = TemperatureInitialValues<dim>::value(p, c);
1578 *   }
1579 *  
1580 *  
1581 * @endcode
1582 *
1583 * As mentioned in the introduction we need to rescale the pressure to
1584 * avoid the relative ill-conditioning of the momentum and mass
1585 * conservation equations. The scaling factor is @f$\frac{\eta}{L}@f$ where
1586 * @f$L@f$ was a typical length scale. By experimenting it turns out that a
1587 * good length scale is the diameter of plumes, which is around 10 km:
1588 *
1589 * @code
1590 *   constexpr double pressure_scaling = eta / 10000;
1591 *  
1592 * @endcode
1593 *
1594 * The final number in this namespace is a constant that denotes the
1595 * number of seconds per (average, tropical) year. We use this only when
1596 * generating screen output: internally, all computations of this program
1597 * happen in SI units (kilogram, meter, seconds) but writing geological
1598 * times in seconds yields numbers that one can't relate to reality, and
1599 * so we convert to years using the factor defined here:
1600 *
1601 * @code
1602 *   const double year_in_seconds = 60 * 60 * 24 * 365.2425;
1603 *  
1604 *   } // namespace EquationData
1605 *  
1606 *  
1607 *  
1608 * @endcode
1609 *
1610 *
1611 * <a name="step_32-PreconditioningtheStokessystem"></a>
1612 * <h3>Preconditioning the Stokes system</h3>
1613 *
1614
1615 *
1616 * This namespace implements the preconditioner. As discussed in the
1617 * introduction, this preconditioner differs in a number of key portions
1618 * from the one used in @ref step_31 "step-31". Specifically, it is a right preconditioner,
1619 * implementing the matrix
1620 * @f{align*}{
1621 * \left(\begin{array}{cc}A^{-1} & A^{-1}B^TS^{-1}
1622 * \\0 & -S^{-1}
1623 * \end{array}\right)
1624 * @f}
1625 * where the two inverse matrix operations
1626 * are approximated by linear solvers or, if the right flag is given to the
1627 * constructor of this class, by a single AMG V-cycle for the velocity
1628 * block. The three code blocks of the <code>vmult</code> function implement
1629 * the multiplications with the three blocks of this preconditioner matrix
1630 * and should be self explanatory if you have read through @ref step_31 "step-31" or the
1631 * discussion of composing solvers in @ref step_20 "step-20".
1632 *
1633 * @code
1634 *   namespace LinearSolvers
1635 *   {
1636 *   template <class PreconditionerTypeA, class PreconditionerTypeMp>
1637 *   class BlockSchurPreconditioner : public EnableObserverPointer
1638 *   {
1639 *   public:
1640 *   BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix &S,
1641 *   const TrilinosWrappers::BlockSparseMatrix &Spre,
1642 *   const PreconditionerTypeMp &Mppreconditioner,
1643 *   const PreconditionerTypeA &Apreconditioner,
1644 *   const bool do_solve_A)
1645 *   : stokes_matrix(&S)
1646 *   , stokes_preconditioner_matrix(&Spre)
1647 *   , mp_preconditioner(Mppreconditioner)
1648 *   , a_preconditioner(Apreconditioner)
1649 *   , do_solve_A(do_solve_A)
1650 *   {}
1651 *  
1652 *   void vmult(TrilinosWrappers::MPI::BlockVector &dst,
1653 *   const TrilinosWrappers::MPI::BlockVector &src) const
1654 *   {
1655 *   TrilinosWrappers::MPI::Vector utmp(src.block(0));
1656 *  
1657 *   {
1658 *   SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
1659 *  
1660 *   SolverCG<TrilinosWrappers::MPI::Vector> solver(solver_control);
1661 *  
1662 *   solver.solve(stokes_preconditioner_matrix->block(1, 1),
1663 *   dst.block(1),
1664 *   src.block(1),
1665 *   mp_preconditioner);
1666 *  
1667 *   dst.block(1) *= -1.0;
1668 *   }
1669 *  
1670 *   {
1671 *   stokes_matrix->block(0, 1).vmult(utmp, dst.block(1));
1672 *   utmp *= -1.0;
1673 *   utmp.add(src.block(0));
1674 *   }
1675 *  
1676 *   if (do_solve_A == true)
1677 *   {
1678 *   SolverControl solver_control(5000, utmp.l2_norm() * 1e-2);
1679 *   TrilinosWrappers::SolverCG solver(solver_control);
1680 *   solver.solve(stokes_matrix->block(0, 0),
1681 *   dst.block(0),
1682 *   utmp,
1683 *   a_preconditioner);
1684 *   }
1685 *   else
1686 *   a_preconditioner.vmult(dst.block(0), utmp);
1687 *   }
1688 *  
1689 *   private:
1690 *   const ObserverPointer<const TrilinosWrappers::BlockSparseMatrix>
1691 *   stokes_matrix;
1692 *   const ObserverPointer<const TrilinosWrappers::BlockSparseMatrix>
1693 *   stokes_preconditioner_matrix;
1694 *   const PreconditionerTypeMp &mp_preconditioner;
1695 *   const PreconditionerTypeA &a_preconditioner;
1696 *   const bool do_solve_A;
1697 *   };
1698 *   } // namespace LinearSolvers
1699 *  
1700 *  
1701 *  
1702 * @endcode
1703 *
1704 *
1705 * <a name="step_32-Definitionofassemblydatastructures"></a>
1706 * <h3>Definition of assembly data structures</h3>
1707 *
1708
1709 *
1710 * As described in the introduction, we will use the WorkStream mechanism
1711 * discussed in the @ref threads topic to parallelize operations among the
1712 * processors of a single machine. The WorkStream class requires that data
1713 * is passed around in two kinds of data structures, one for scratch data
1714 * and one to pass data from the assembly function to the function that
1715 * copies local contributions into global objects.
1716 *
1717
1718 *
1719 * The following namespace (and the two sub-namespaces) contains a
1720 * collection of data structures that serve this purpose, one pair for each
1721 * of the four operations discussed in the introduction that we will want to
1722 * parallelize. Each assembly routine gets two sets of data: a Scratch array
1723 * that collects all the classes and arrays that are used for the
1724 * calculation of the cell contribution, and a CopyData array that keeps
1725 * local matrices and vectors which will be written into the global
1726 * matrix. Whereas CopyData is a container for the final data that is
1727 * written into the global matrices and vector (and, thus, absolutely
1728 * necessary), the Scratch arrays are merely there for performance reasons
1729 * &mdash; it would be much more expensive to set up a FEValues object on
1730 * each cell, than creating it only once and updating some derivative data.
1731 *
1732
1733 *
1734 * @ref step_31 "step-31" had four assembly routines: One for the preconditioner matrix of
1735 * the Stokes system, one for the Stokes matrix and right hand side, one for
1736 * the temperature matrices and one for the right hand side of the
1737 * temperature equation. We here organize the scratch arrays and CopyData
1738 * objects for each of those four assembly components using a
1739 * <code>struct</code> environment (since we consider these as temporary
1740 * objects we pass around, rather than classes that implement functionality
1741 * of their own, though this is a more subjective point of view to
1742 * distinguish between <code>struct</code>s and <code>class</code>es).
1743 *
1744
1745 *
1746 * Regarding the Scratch objects, each struct is equipped with a constructor
1747 * that creates an @ref FEValues object using the @ref FiniteElement,
1748 * Quadrature, @ref Mapping (which describes the interpolation of curved
1749 * boundaries), and @ref UpdateFlags instances. Moreover, we manually
1750 * implement a copy constructor (since the FEValues class is not copyable by
1751 * itself), and provide some additional vector fields that are used to hold
1752 * intermediate data during the computation of local contributions.
1753 *
1754
1755 *
1756 * Let us start with the scratch arrays and, specifically, the one used for
1757 * assembly of the Stokes preconditioner:
1758 *
1759 * @code
1760 *   namespace Assembly
1761 *   {
1762 *   namespace Scratch
1763 *   {
1764 *   template <int dim>
1765 *   struct StokesPreconditioner
1766 *   {
1767 *   StokesPreconditioner(const FiniteElement<dim> &stokes_fe,
1768 *   const Quadrature<dim> &stokes_quadrature,
1769 *   const Mapping<dim> &mapping,
1770 *   const UpdateFlags update_flags);
1771 *  
1772 *   StokesPreconditioner(const StokesPreconditioner &data);
1773 *  
1774 *  
1775 *   FEValues<dim> stokes_fe_values;
1776 *  
1777 *   std::vector<Tensor<2, dim>> grad_phi_u;
1778 *   std::vector<double> phi_p;
1779 *   };
1780 *  
1781 *   template <int dim>
1782 *   StokesPreconditioner<dim>::StokesPreconditioner(
1783 *   const FiniteElement<dim> &stokes_fe,
1784 *   const Quadrature<dim> &stokes_quadrature,
1785 *   const Mapping<dim> &mapping,
1786 *   const UpdateFlags update_flags)
1787 *   : stokes_fe_values(mapping, stokes_fe, stokes_quadrature, update_flags)
1788 *   , grad_phi_u(stokes_fe.n_dofs_per_cell())
1789 *   , phi_p(stokes_fe.n_dofs_per_cell())
1790 *   {}
1791 *  
1792 *  
1793 *  
1794 *   template <int dim>
1795 *   StokesPreconditioner<dim>::StokesPreconditioner(
1796 *   const StokesPreconditioner &scratch)
1797 *   : stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
1798 *   scratch.stokes_fe_values.get_fe(),
1799 *   scratch.stokes_fe_values.get_quadrature(),
1800 *   scratch.stokes_fe_values.get_update_flags())
1801 *   , grad_phi_u(scratch.grad_phi_u)
1802 *   , phi_p(scratch.phi_p)
1803 *   {}
1804 *  
1805 *  
1806 *  
1807 * @endcode
1808 *
1809 * The next one is the scratch object used for the assembly of the full
1810 * Stokes system. Observe that we derive the StokesSystem scratch class
1811 * from the StokesPreconditioner class above. We do this because all the
1812 * objects that are necessary for the assembly of the preconditioner are
1813 * also needed for the actual matrix system and right hand side, plus
1814 * some extra data. This makes the program more compact. Note also that
1815 * the assembly of the Stokes system and the temperature right hand side
1816 * further down requires data from temperature and velocity,
1817 * respectively, so we actually need two FEValues objects for those two
1818 * cases.
1819 *
1820 * @code
1821 *   template <int dim>
1822 *   struct StokesSystem : public StokesPreconditioner<dim>
1823 *   {
1824 *   StokesSystem(const FiniteElement<dim> &stokes_fe,
1825 *   const Mapping<dim> &mapping,
1826 *   const Quadrature<dim> &stokes_quadrature,
1827 *   const UpdateFlags stokes_update_flags,
1828 *   const FiniteElement<dim> &temperature_fe,
1829 *   const UpdateFlags temperature_update_flags);
1830 *  
1831 *   StokesSystem(const StokesSystem<dim> &data);
1832 *  
1833 *  
1834 *   FEValues<dim> temperature_fe_values;
1835 *  
1836 *   std::vector<Tensor<1, dim>> phi_u;
1837 *   std::vector<SymmetricTensor<2, dim>> grads_phi_u;
1838 *   std::vector<double> div_phi_u;
1839 *  
1840 *   std::vector<double> old_temperature_values;
1841 *   };
1842 *  
1843 *  
1844 *   template <int dim>
1845 *   StokesSystem<dim>::StokesSystem(
1846 *   const FiniteElement<dim> &stokes_fe,
1847 *   const Mapping<dim> &mapping,
1848 *   const Quadrature<dim> &stokes_quadrature,
1849 *   const UpdateFlags stokes_update_flags,
1850 *   const FiniteElement<dim> &temperature_fe,
1851 *   const UpdateFlags temperature_update_flags)
1852 *   : StokesPreconditioner<dim>(stokes_fe,
1853 *   stokes_quadrature,
1854 *   mapping,
1855 *   stokes_update_flags)
1856 *   , temperature_fe_values(mapping,
1857 *   temperature_fe,
1858 *   stokes_quadrature,
1859 *   temperature_update_flags)
1860 *   , phi_u(stokes_fe.n_dofs_per_cell())
1861 *   , grads_phi_u(stokes_fe.n_dofs_per_cell())
1862 *   , div_phi_u(stokes_fe.n_dofs_per_cell())
1863 *   , old_temperature_values(stokes_quadrature.size())
1864 *   {}
1865 *  
1866 *  
1867 *   template <int dim>
1868 *   StokesSystem<dim>::StokesSystem(const StokesSystem<dim> &scratch)
1869 *   : StokesPreconditioner<dim>(scratch)
1870 *   , temperature_fe_values(
1871 *   scratch.temperature_fe_values.get_mapping(),
1872 *   scratch.temperature_fe_values.get_fe(),
1873 *   scratch.temperature_fe_values.get_quadrature(),
1874 *   scratch.temperature_fe_values.get_update_flags())
1875 *   , phi_u(scratch.phi_u)
1876 *   , grads_phi_u(scratch.grads_phi_u)
1877 *   , div_phi_u(scratch.div_phi_u)
1878 *   , old_temperature_values(scratch.old_temperature_values)
1879 *   {}
1880 *  
1881 *  
1882 * @endcode
1883 *
1884 * After defining the objects used in the assembly of the Stokes system,
1885 * we do the same for the assembly of the matrices necessary for the
1886 * temperature system. The general structure is very similar:
1887 *
1888 * @code
1889 *   template <int dim>
1890 *   struct TemperatureMatrix
1891 *   {
1892 *   TemperatureMatrix(const FiniteElement<dim> &temperature_fe,
1893 *   const Mapping<dim> &mapping,
1894 *   const Quadrature<dim> &temperature_quadrature);
1895 *  
1896 *   TemperatureMatrix(const TemperatureMatrix &data);
1897 *  
1898 *  
1899 *   FEValues<dim> temperature_fe_values;
1900 *  
1901 *   std::vector<double> phi_T;
1902 *   std::vector<Tensor<1, dim>> grad_phi_T;
1903 *   };
1904 *  
1905 *  
1906 *   template <int dim>
1907 *   TemperatureMatrix<dim>::TemperatureMatrix(
1908 *   const FiniteElement<dim> &temperature_fe,
1909 *   const Mapping<dim> &mapping,
1910 *   const Quadrature<dim> &temperature_quadrature)
1911 *   : temperature_fe_values(mapping,
1912 *   temperature_fe,
1913 *   temperature_quadrature,
1914 *   update_values | update_gradients |
1915 *   update_JxW_values)
1916 *   , phi_T(temperature_fe.n_dofs_per_cell())
1917 *   , grad_phi_T(temperature_fe.n_dofs_per_cell())
1918 *   {}
1919 *  
1920 *  
1921 *   template <int dim>
1922 *   TemperatureMatrix<dim>::TemperatureMatrix(
1923 *   const TemperatureMatrix &scratch)
1924 *   : temperature_fe_values(
1925 *   scratch.temperature_fe_values.get_mapping(),
1926 *   scratch.temperature_fe_values.get_fe(),
1927 *   scratch.temperature_fe_values.get_quadrature(),
1928 *   scratch.temperature_fe_values.get_update_flags())
1929 *   , phi_T(scratch.phi_T)
1930 *   , grad_phi_T(scratch.grad_phi_T)
1931 *   {}
1932 *  
1933 *  
1934 * @endcode
1935 *
1936 * The final scratch object is used in the assembly of the right hand
1937 * side of the temperature system. This object is significantly larger
1938 * than the ones above because a lot more quantities enter the
1939 * computation of the right hand side of the temperature equation. In
1940 * particular, the temperature values and gradients of the previous two
1941 * time steps need to be evaluated at the quadrature points, as well as
1942 * the velocities and the strain rates (i.e. the symmetric gradients of
1943 * the velocity) that enter the right hand side as friction heating
1944 * terms. Despite the number of terms, the following should be rather
1945 * self explanatory:
1946 *
1947 * @code
1948 *   template <int dim>
1949 *   struct TemperatureRHS
1950 *   {
1951 *   TemperatureRHS(const FiniteElement<dim> &temperature_fe,
1952 *   const FiniteElement<dim> &stokes_fe,
1953 *   const Mapping<dim> &mapping,
1954 *   const Quadrature<dim> &quadrature);
1955 *  
1956 *   TemperatureRHS(const TemperatureRHS &data);
1957 *  
1958 *  
1959 *   FEValues<dim> temperature_fe_values;
1960 *   FEValues<dim> stokes_fe_values;
1961 *  
1962 *   std::vector<double> phi_T;
1963 *   std::vector<Tensor<1, dim>> grad_phi_T;
1964 *  
1965 *   std::vector<Tensor<1, dim>> old_velocity_values;
1966 *   std::vector<Tensor<1, dim>> old_old_velocity_values;
1967 *  
1968 *   std::vector<SymmetricTensor<2, dim>> old_strain_rates;
1969 *   std::vector<SymmetricTensor<2, dim>> old_old_strain_rates;
1970 *  
1971 *   std::vector<double> old_temperature_values;
1972 *   std::vector<double> old_old_temperature_values;
1973 *   std::vector<Tensor<1, dim>> old_temperature_grads;
1974 *   std::vector<Tensor<1, dim>> old_old_temperature_grads;
1975 *   std::vector<double> old_temperature_laplacians;
1976 *   std::vector<double> old_old_temperature_laplacians;
1977 *   };
1978 *  
1979 *  
1980 *   template <int dim>
1981 *   TemperatureRHS<dim>::TemperatureRHS(
1982 *   const FiniteElement<dim> &temperature_fe,
1983 *   const FiniteElement<dim> &stokes_fe,
1984 *   const Mapping<dim> &mapping,
1985 *   const Quadrature<dim> &quadrature)
1986 *   : temperature_fe_values(mapping,
1987 *   temperature_fe,
1988 *   quadrature,
1989 *   update_values | update_gradients |
1990 *   update_hessians | update_quadrature_points |
1991 *   update_JxW_values)
1992 *   , stokes_fe_values(mapping,
1993 *   stokes_fe,
1994 *   quadrature,
1995 *   update_values | update_gradients)
1996 *   , phi_T(temperature_fe.n_dofs_per_cell())
1997 *   , grad_phi_T(temperature_fe.n_dofs_per_cell())
1998 *   ,
1999 *  
2000 *   old_velocity_values(quadrature.size())
2001 *   , old_old_velocity_values(quadrature.size())
2002 *   , old_strain_rates(quadrature.size())
2003 *   , old_old_strain_rates(quadrature.size())
2004 *   ,
2005 *  
2006 *   old_temperature_values(quadrature.size())
2007 *   , old_old_temperature_values(quadrature.size())
2008 *   , old_temperature_grads(quadrature.size())
2009 *   , old_old_temperature_grads(quadrature.size())
2010 *   , old_temperature_laplacians(quadrature.size())
2011 *   , old_old_temperature_laplacians(quadrature.size())
2012 *   {}
2013 *  
2014 *  
2015 *   template <int dim>
2016 *   TemperatureRHS<dim>::TemperatureRHS(const TemperatureRHS &scratch)
2017 *   : temperature_fe_values(
2018 *   scratch.temperature_fe_values.get_mapping(),
2019 *   scratch.temperature_fe_values.get_fe(),
2020 *   scratch.temperature_fe_values.get_quadrature(),
2021 *   scratch.temperature_fe_values.get_update_flags())
2022 *   , stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
2023 *   scratch.stokes_fe_values.get_fe(),
2024 *   scratch.stokes_fe_values.get_quadrature(),
2025 *   scratch.stokes_fe_values.get_update_flags())
2026 *   , phi_T(scratch.phi_T)
2027 *   , grad_phi_T(scratch.grad_phi_T)
2028 *   ,
2029 *  
2030 *   old_velocity_values(scratch.old_velocity_values)
2031 *   , old_old_velocity_values(scratch.old_old_velocity_values)
2032 *   , old_strain_rates(scratch.old_strain_rates)
2033 *   , old_old_strain_rates(scratch.old_old_strain_rates)
2034 *   ,
2035 *  
2036 *   old_temperature_values(scratch.old_temperature_values)
2037 *   , old_old_temperature_values(scratch.old_old_temperature_values)
2038 *   , old_temperature_grads(scratch.old_temperature_grads)
2039 *   , old_old_temperature_grads(scratch.old_old_temperature_grads)
2040 *   , old_temperature_laplacians(scratch.old_temperature_laplacians)
2041 *   , old_old_temperature_laplacians(scratch.old_old_temperature_laplacians)
2042 *   {}
2043 *   } // namespace Scratch
2044 *  
2045 *  
2046 * @endcode
2047 *
2048 * The CopyData objects are even simpler than the Scratch objects as all
2049 * they have to do is to store the results of local computations until
2050 * they can be copied into the global matrix or vector objects. These
2051 * structures therefore only need to provide a constructor, a copy
2052 * operation, and some arrays for local matrix, local vectors and the
2053 * relation between local and global degrees of freedom (a.k.a.
2054 * <code>local_dof_indices</code>). Again, we have one such structure for
2055 * each of the four operations we will parallelize using the WorkStream
2056 * class:
2057 *
2058 * @code
2059 *   namespace CopyData
2060 *   {
2061 *   template <int dim>
2062 *   struct StokesPreconditioner
2063 *   {
2064 *   StokesPreconditioner(const FiniteElement<dim> &stokes_fe);
2065 *   StokesPreconditioner(const StokesPreconditioner &data);
2066 *   StokesPreconditioner &operator=(const StokesPreconditioner &) = default;
2067 *  
2068 *   FullMatrix<double> local_matrix;
2069 *   std::vector<types::global_dof_index> local_dof_indices;
2070 *   };
2071 *  
2072 *   template <int dim>
2073 *   StokesPreconditioner<dim>::StokesPreconditioner(
2074 *   const FiniteElement<dim> &stokes_fe)
2075 *   : local_matrix(stokes_fe.n_dofs_per_cell(), stokes_fe.n_dofs_per_cell())
2076 *   , local_dof_indices(stokes_fe.n_dofs_per_cell())
2077 *   {}
2078 *  
2079 *   template <int dim>
2080 *   StokesPreconditioner<dim>::StokesPreconditioner(
2081 *   const StokesPreconditioner &data)
2082 *   : local_matrix(data.local_matrix)
2083 *   , local_dof_indices(data.local_dof_indices)
2084 *   {}
2085 *  
2086 *  
2087 *  
2088 *   template <int dim>
2089 *   struct StokesSystem : public StokesPreconditioner<dim>
2090 *   {
2091 *   StokesSystem(const FiniteElement<dim> &stokes_fe);
2092 *  
2093 *   Vector<double> local_rhs;
2094 *   };
2095 *  
2096 *   template <int dim>
2097 *   StokesSystem<dim>::StokesSystem(const FiniteElement<dim> &stokes_fe)
2098 *   : StokesPreconditioner<dim>(stokes_fe)
2099 *   , local_rhs(stokes_fe.n_dofs_per_cell())
2100 *   {}
2101 *  
2102 *  
2103 *  
2104 *   template <int dim>
2105 *   struct TemperatureMatrix
2106 *   {
2107 *   TemperatureMatrix(const FiniteElement<dim> &temperature_fe);
2108 *  
2109 *   FullMatrix<double> local_mass_matrix;
2110 *   FullMatrix<double> local_stiffness_matrix;
2111 *   std::vector<types::global_dof_index> local_dof_indices;
2112 *   };
2113 *  
2114 *   template <int dim>
2115 *   TemperatureMatrix<dim>::TemperatureMatrix(
2116 *   const FiniteElement<dim> &temperature_fe)
2117 *   : local_mass_matrix(temperature_fe.n_dofs_per_cell(),
2118 *   temperature_fe.n_dofs_per_cell())
2119 *   , local_stiffness_matrix(temperature_fe.n_dofs_per_cell(),
2120 *   temperature_fe.n_dofs_per_cell())
2121 *   , local_dof_indices(temperature_fe.n_dofs_per_cell())
2122 *   {}
2123 *  
2124 *  
2125 *  
2126 *   template <int dim>
2127 *   struct TemperatureRHS
2128 *   {
2129 *   TemperatureRHS(const FiniteElement<dim> &temperature_fe);
2130 *  
2131 *   Vector<double> local_rhs;
2132 *   std::vector<types::global_dof_index> local_dof_indices;
2133 *   FullMatrix<double> matrix_for_bc;
2134 *   };
2135 *  
2136 *   template <int dim>
2137 *   TemperatureRHS<dim>::TemperatureRHS(
2138 *   const FiniteElement<dim> &temperature_fe)
2139 *   : local_rhs(temperature_fe.n_dofs_per_cell())
2140 *   , local_dof_indices(temperature_fe.n_dofs_per_cell())
2141 *   , matrix_for_bc(temperature_fe.n_dofs_per_cell(),
2142 *   temperature_fe.n_dofs_per_cell())
2143 *   {}
2144 *   } // namespace CopyData
2145 *   } // namespace Assembly
2146 *  
2147 *  
2148 *  
2149 * @endcode
2150 *
2151 *
2152 * <a name="step_32-ThecodeBoussinesqFlowProblemcodeclasstemplate"></a>
2153 * <h3>The <code>BoussinesqFlowProblem</code> class template</h3>
2154 *
2155
2156 *
2157 * This is the declaration of the main class. It is very similar to @ref step_31 "step-31"
2158 * but there are a number differences we will comment on below.
2159 *
2160
2161 *
2162 * The top of the class is essentially the same as in @ref step_31 "step-31", listing the
2163 * public methods and a set of private functions that do the heavy
2164 * lifting. Compared to @ref step_31 "step-31" there are only two additions to this
2165 * section: the function <code>get_cfl_number()</code> that computes the
2166 * maximum CFL number over all cells which we then compute the global time
2167 * step from, and the function <code>get_entropy_variation()</code> that is
2168 * used in the computation of the entropy stabilization. It is akin to the
2169 * <code>get_extrapolated_temperature_range()</code> we have used in @ref step_31 "step-31"
2170 * for this purpose, but works on the entropy instead of the temperature
2171 * instead.
2172 *
2173 * @code
2174 *   template <int dim>
2175 *   class BoussinesqFlowProblem
2176 *   {
2177 *   public:
2178 *   struct Parameters;
2179 *   BoussinesqFlowProblem(Parameters &parameters);
2180 *   void run();
2181 *  
2182 *   private:
2183 *   void setup_dofs();
2184 *   void assemble_stokes_preconditioner();
2185 *   void build_stokes_preconditioner();
2186 *   void assemble_stokes_system();
2187 *   void assemble_temperature_matrix();
2188 *   void assemble_temperature_system(const double maximal_velocity);
2189 *   double get_maximal_velocity() const;
2190 *   double get_cfl_number() const;
2191 *   double get_entropy_variation(const double average_temperature) const;
2192 *   std::pair<double, double> get_extrapolated_temperature_range() const;
2193 *   void solve();
2194 *   void output_results();
2195 *   void refine_mesh(const unsigned int max_grid_level);
2196 *  
2197 *   double compute_viscosity(
2198 *   const std::vector<double> &old_temperature,
2199 *   const std::vector<double> &old_old_temperature,
2200 *   const std::vector<Tensor<1, dim>> &old_temperature_grads,
2201 *   const std::vector<Tensor<1, dim>> &old_old_temperature_grads,
2202 *   const std::vector<double> &old_temperature_laplacians,
2203 *   const std::vector<double> &old_old_temperature_laplacians,
2204 *   const std::vector<Tensor<1, dim>> &old_velocity_values,
2205 *   const std::vector<Tensor<1, dim>> &old_old_velocity_values,
2206 *   const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
2207 *   const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
2208 *   const double global_u_infty,
2209 *   const double global_T_variation,
2210 *   const double average_temperature,
2211 *   const double global_entropy_variation,
2212 *   const double cell_diameter) const;
2213 *  
2214 *   public:
2215 * @endcode
2216 *
2217 * The first significant new component is the definition of a struct for
2218 * the parameters according to the discussion in the introduction. This
2219 * structure is initialized by reading from a parameter file during
2220 * construction of this object.
2221 *
2222 * @code
2223 *   struct Parameters
2224 *   {
2225 *   Parameters(const std::string &parameter_filename);
2226 *  
2227 *   static void declare_parameters(ParameterHandler &prm);
2228 *   void parse_parameters(ParameterHandler &prm);
2229 *  
2230 *   double end_time;
2231 *  
2232 *   unsigned int initial_global_refinement;
2233 *   unsigned int initial_adaptive_refinement;
2234 *  
2235 *   bool generate_graphical_output;
2236 *   unsigned int graphical_output_interval;
2237 *  
2238 *   unsigned int adaptive_refinement_interval;
2239 *  
2240 *   double stabilization_alpha;
2241 *   double stabilization_c_R;
2242 *   double stabilization_beta;
2243 *  
2244 *   unsigned int stokes_velocity_degree;
2245 *   bool use_locally_conservative_discretization;
2246 *  
2247 *   unsigned int temperature_degree;
2248 *   };
2249 *  
2250 *   private:
2251 *   Parameters &parameters;
2252 *  
2253 * @endcode
2254 *
2255 * The <code>pcout</code> (for <i>%parallel <code>std::cout</code></i>)
2256 * object is used to simplify writing output: each MPI process can use
2257 * this to generate output as usual, but since each of these processes
2258 * will (hopefully) produce the same output it will just be replicated
2259 * many times over; with the ConditionalOStream class, only the output
2260 * generated by one MPI process will actually be printed to screen,
2261 * whereas the output by all the other threads will simply be forgotten.
2262 *
2263 * @code
2264 *   ConditionalOStream pcout;
2265 *  
2266 * @endcode
2267 *
2268 * The following member variables will then again be similar to those in
2269 * @ref step_31 "step-31" (and to other tutorial programs). As mentioned in the
2270 * introduction, we fully distribute computations, so we will have to use
2271 * the parallel::distributed::Triangulation class (see @ref step_40 "step-40") but the
2272 * remainder of these variables is rather standard with two exceptions:
2273 *
2274
2275 *
2276 * - The <code>mapping</code> variable is used to denote a higher-order
2277 * polynomial mapping. As mentioned in the introduction, we use this
2278 * mapping when forming integrals through quadrature for all cells.
2279 *
2280
2281 *
2282 * - In a bit of naming confusion, you will notice below that some of the
2283 * variables from namespace TrilinosWrappers are taken from namespace
2284 * TrilinosWrappers::MPI (such as the right hand side vectors) whereas
2285 * others are not (such as the various matrices). This is due to legacy
2286 * reasons. We will frequently have to query velocities
2287 * and temperatures at arbitrary quadrature points; consequently, rather
2288 * than importing ghost information of a vector whenever we need access
2289 * to degrees of freedom that are relevant locally but owned by another
2290 * processor, we solve linear systems in %parallel but then immediately
2291 * initialize a vector including ghost entries of the solution for further
2292 * processing. The various <code>*_solution</code> vectors are therefore
2293 * filled immediately after solving their respective linear system in
2294 * %parallel and will always contain values for all
2295 * @ref GlossLocallyRelevantDof "locally relevant degrees of freedom";
2296 * the fully distributed vectors that we obtain from the solution process
2297 * and that only ever contain the
2298 * @ref GlossLocallyOwnedDof "locally owned degrees of freedom" are
2299 * destroyed immediately after the solution process and after we have
2300 * copied the relevant values into the member variable vectors.
2301 *
2302 * @code
2303 *   parallel::distributed::Triangulation<dim> triangulation;
2304 *   double global_Omega_diameter;
2305 *  
2306 *   const MappingQ<dim> mapping;
2307 *  
2308 *   const FESystem<dim> stokes_fe;
2309 *   DoFHandler<dim> stokes_dof_handler;
2310 *   AffineConstraints<double> stokes_constraints;
2311 *  
2312 *   TrilinosWrappers::BlockSparseMatrix stokes_matrix;
2313 *   TrilinosWrappers::BlockSparseMatrix stokes_preconditioner_matrix;
2314 *  
2315 *   TrilinosWrappers::MPI::BlockVector stokes_solution;
2316 *   TrilinosWrappers::MPI::BlockVector old_stokes_solution;
2317 *   TrilinosWrappers::MPI::BlockVector stokes_rhs;
2318 *  
2319 *  
2320 *   const FE_Q<dim> temperature_fe;
2321 *   DoFHandler<dim> temperature_dof_handler;
2322 *   AffineConstraints<double> temperature_constraints;
2323 *  
2324 *   TrilinosWrappers::SparseMatrix temperature_mass_matrix;
2325 *   TrilinosWrappers::SparseMatrix temperature_stiffness_matrix;
2326 *   TrilinosWrappers::SparseMatrix temperature_matrix;
2327 *  
2328 *   TrilinosWrappers::MPI::Vector temperature_solution;
2329 *   TrilinosWrappers::MPI::Vector old_temperature_solution;
2330 *   TrilinosWrappers::MPI::Vector old_old_temperature_solution;
2331 *   TrilinosWrappers::MPI::Vector temperature_rhs;
2332 *  
2333 *  
2334 *   double time_step;
2335 *   double old_time_step;
2336 *   unsigned int timestep_number;
2337 *  
2338 *   std::shared_ptr<TrilinosWrappers::PreconditionAMG> Amg_preconditioner;
2339 *   std::shared_ptr<TrilinosWrappers::PreconditionJacobi> Mp_preconditioner;
2340 *   std::shared_ptr<TrilinosWrappers::PreconditionJacobi> T_preconditioner;
2341 *  
2342 *   bool rebuild_stokes_matrix;
2343 *   bool rebuild_stokes_preconditioner;
2344 *   bool rebuild_temperature_matrices;
2345 *   bool rebuild_temperature_preconditioner;
2346 *  
2347 * @endcode
2348 *
2349 * The next member variable, <code>computing_timer</code> is used to
2350 * conveniently account for compute time spent in certain "sections" of
2351 * the code that are repeatedly entered. For example, we will enter (and
2352 * leave) sections for Stokes matrix assembly and would like to accumulate
2353 * the run time spent in this section over all time steps. Every so many
2354 * time steps as well as at the end of the program (through the destructor
2355 * of the TimerOutput class) we will then produce a nice summary of the
2356 * times spent in the different sections into which we categorize the
2357 * run-time of this program.
2358 *
2359 * @code
2360 *   TimerOutput computing_timer;
2361 *  
2362 * @endcode
2363 *
2364 * After these member variables we have a number of auxiliary functions
2365 * that have been broken out of the ones listed above. Specifically, there
2366 * are first three functions that we call from <code>setup_dofs</code> and
2367 * then the ones that do the assembling of linear systems:
2368 *
2369 * @code
2370 *   void setup_stokes_matrix(
2371 *   const std::vector<IndexSet> &stokes_partitioning,
2372 *   const std::vector<IndexSet> &stokes_relevant_partitioning);
2373 *   void setup_stokes_preconditioner(
2374 *   const std::vector<IndexSet> &stokes_partitioning,
2375 *   const std::vector<IndexSet> &stokes_relevant_partitioning);
2376 *   void setup_temperature_matrices(
2377 *   const IndexSet &temperature_partitioning,
2378 *   const IndexSet &temperature_relevant_partitioning);
2379 *  
2380 *  
2381 * @endcode
2382 *
2383 * Following the @ref MTWorkStream "task-based parallelization" paradigm,
2384 * we split all the assembly routines into two parts: a first part that
2385 * can do all the calculations on a certain cell without taking care of
2386 * other threads, and a second part (which is writing the local data into
2387 * the global matrices and vectors) which can be entered by only one
2388 * thread at a time. In order to implement that, we provide functions for
2389 * each of those two steps for all the four assembly routines that we use
2390 * in this program. The following eight functions do exactly this:
2391 *
2392 * @code
2393 *   void local_assemble_stokes_preconditioner(
2394 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2395 *   Assembly::Scratch::StokesPreconditioner<dim> &scratch,
2396 *   Assembly::CopyData::StokesPreconditioner<dim> &data);
2397 *  
2398 *   void copy_local_to_global_stokes_preconditioner(
2399 *   const Assembly::CopyData::StokesPreconditioner<dim> &data);
2400 *  
2401 *  
2402 *   void local_assemble_stokes_system(
2403 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2404 *   Assembly::Scratch::StokesSystem<dim> &scratch,
2405 *   Assembly::CopyData::StokesSystem<dim> &data);
2406 *  
2407 *   void copy_local_to_global_stokes_system(
2408 *   const Assembly::CopyData::StokesSystem<dim> &data);
2409 *  
2410 *  
2411 *   void local_assemble_temperature_matrix(
2412 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2413 *   Assembly::Scratch::TemperatureMatrix<dim> &scratch,
2414 *   Assembly::CopyData::TemperatureMatrix<dim> &data);
2415 *  
2416 *   void copy_local_to_global_temperature_matrix(
2417 *   const Assembly::CopyData::TemperatureMatrix<dim> &data);
2418 *  
2419 *  
2420 *  
2421 *   void local_assemble_temperature_rhs(
2422 *   const std::pair<double, double> global_T_range,
2423 *   const double global_max_velocity,
2424 *   const double global_entropy_variation,
2425 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2426 *   Assembly::Scratch::TemperatureRHS<dim> &scratch,
2427 *   Assembly::CopyData::TemperatureRHS<dim> &data);
2428 *  
2429 *   void copy_local_to_global_temperature_rhs(
2430 *   const Assembly::CopyData::TemperatureRHS<dim> &data);
2431 *  
2432 * @endcode
2433 *
2434 * Finally, we forward declare a member class that we will define later on
2435 * and that will be used to compute a number of quantities from our
2436 * solution vectors that we'd like to put into the output files for
2437 * visualization.
2438 *
2439 * @code
2440 *   class Postprocessor;
2441 *   };
2442 *  
2443 *  
2444 * @endcode
2445 *
2446 *
2447 * <a name="step_32-BoussinesqFlowProblemclassimplementation"></a>
2448 * <h3>BoussinesqFlowProblem class implementation</h3>
2449 *
2450
2451 *
2452 *
2453 * <a name="step_32-BoussinesqFlowProblemParameters"></a>
2454 * <h4>BoussinesqFlowProblem::Parameters</h4>
2455 *
2456
2457 *
2458 * Here comes the definition of the parameters for the Stokes problem. We
2459 * allow to set the end time for the simulation, the level of refinements
2460 * (both global and adaptive, which in the sum specify what maximum level
2461 * the cells are allowed to have), and the interval between refinements in
2462 * the time stepping.
2463 *
2464
2465 *
2466 * Then, we let the user specify constants for the stabilization parameters
2467 * (as discussed in the introduction), the polynomial degree for the Stokes
2468 * velocity space, whether to use the locally conservative discretization
2469 * based on FE_DGP elements for the pressure or not (FE_Q elements for
2470 * pressure), and the polynomial degree for the temperature interpolation.
2471 *
2472
2473 *
2474 * The constructor checks for a valid input file (if not, a file with
2475 * default parameters for the quantities is written), and eventually parses
2476 * the parameters.
2477 *
2478 * @code
2479 *   template <int dim>
2480 *   BoussinesqFlowProblem<dim>::Parameters::Parameters(
2481 *   const std::string &parameter_filename)
2482 *   : end_time(1e8)
2483 *   , initial_global_refinement(2)
2484 *   , initial_adaptive_refinement(2)
2485 *   , adaptive_refinement_interval(10)
2486 *   , stabilization_alpha(2)
2487 *   , stabilization_c_R(0.11)
2488 *   , stabilization_beta(0.078)
2489 *   , stokes_velocity_degree(2)
2490 *   , use_locally_conservative_discretization(true)
2491 *   , temperature_degree(2)
2492 *   {
2493 *   ParameterHandler prm;
2494 *   BoussinesqFlowProblem<dim>::Parameters::declare_parameters(prm);
2495 *  
2496 *   std::ifstream parameter_file(parameter_filename);
2497 *  
2498 *   if (!parameter_file)
2499 *   {
2500 *   parameter_file.close();
2501 *  
2502 *   std::ofstream parameter_out(parameter_filename);
2503 *   prm.print_parameters(parameter_out, ParameterHandler::PRM);
2504 *  
2505 *   AssertThrow(
2506 *   false,
2507 *   ExcMessage(
2508 *   "Input parameter file <" + parameter_filename +
2509 *   "> not found. Creating a template file of the same name."));
2510 *   }
2511 *  
2512 *   prm.parse_input(parameter_file);
2513 *   parse_parameters(prm);
2514 *   }
2515 *  
2516 *  
2517 *  
2518 * @endcode
2519 *
2520 * Next we have a function that declares the parameters that we expect in
2521 * the input file, together with their data types, default values and a
2522 * description:
2523 *
2524 * @code
2525 *   template <int dim>
2526 *   void BoussinesqFlowProblem<dim>::Parameters::declare_parameters(
2527 *   ParameterHandler &prm)
2528 *   {
2529 *   prm.declare_entry("End time",
2530 *   "1e8",
2531 *   Patterns::Double(0),
2532 *   "The end time of the simulation in years.");
2533 *   prm.declare_entry("Initial global refinement",
2534 *   "2",
2535 *   Patterns::Integer(0),
2536 *   "The number of global refinement steps performed on "
2537 *   "the initial coarse mesh, before the problem is first "
2538 *   "solved there.");
2539 *   prm.declare_entry("Initial adaptive refinement",
2540 *   "2",
2541 *   Patterns::Integer(0),
2542 *   "The number of adaptive refinement steps performed after "
2543 *   "initial global refinement.");
2544 *   prm.declare_entry("Time steps between mesh refinement",
2545 *   "10",
2546 *   Patterns::Integer(1),
2547 *   "The number of time steps after which the mesh is to be "
2548 *   "adapted based on computed error indicators.");
2549 *   prm.declare_entry("Generate graphical output",
2550 *   "false",
2551 *   Patterns::Bool(),
2552 *   "Whether graphical output is to be generated or not. "
2553 *   "You may not want to get graphical output if the number "
2554 *   "of processors is large.");
2555 *   prm.declare_entry("Time steps between graphical output",
2556 *   "50",
2557 *   Patterns::Integer(1),
2558 *   "The number of time steps between each generation of "
2559 *   "graphical output files.");
2560 *  
2561 *   prm.enter_subsection("Stabilization parameters");
2562 *   {
2563 *   prm.declare_entry("alpha",
2564 *   "2",
2565 *   Patterns::Double(1, 2),
2566 *   "The exponent in the entropy viscosity stabilization.");
2567 *   prm.declare_entry("c_R",
2568 *   "0.11",
2569 *   Patterns::Double(0),
2570 *   "The c_R factor in the entropy viscosity "
2571 *   "stabilization.");
2572 *   prm.declare_entry("beta",
2573 *   "0.078",
2574 *   Patterns::Double(0),
2575 *   "The beta factor in the artificial viscosity "
2576 *   "stabilization. An appropriate value for 2d is 0.052 "
2577 *   "and 0.078 for 3d.");
2578 *   }
2579 *   prm.leave_subsection();
2580 *  
2581 *   prm.enter_subsection("Discretization");
2582 *   {
2583 *   prm.declare_entry(
2584 *   "Stokes velocity polynomial degree",
2585 *   "2",
2586 *   Patterns::Integer(1),
2587 *   "The polynomial degree to use for the velocity variables "
2588 *   "in the Stokes system.");
2589 *   prm.declare_entry(
2590 *   "Temperature polynomial degree",
2591 *   "2",
2592 *   Patterns::Integer(1),
2593 *   "The polynomial degree to use for the temperature variable.");
2594 *   prm.declare_entry(
2595 *   "Use locally conservative discretization",
2596 *   "true",
2597 *   Patterns::Bool(),
2598 *   "Whether to use a Stokes discretization that is locally "
2599 *   "conservative at the expense of a larger number of degrees "
2600 *   "of freedom, or to go with a cheaper discretization "
2601 *   "that does not locally conserve mass (although it is "
2602 *   "globally conservative.");
2603 *   }
2604 *   prm.leave_subsection();
2605 *   }
2606 *  
2607 *  
2608 *  
2609 * @endcode
2610 *
2611 * And then we need a function that reads the contents of the
2612 * ParameterHandler object we get by reading the input file and puts the
2613 * results into variables that store the values of the parameters we have
2614 * previously declared:
2615 *
2616 * @code
2617 *   template <int dim>
2618 *   void BoussinesqFlowProblem<dim>::Parameters::parse_parameters(
2619 *   ParameterHandler &prm)
2620 *   {
2621 *   end_time = prm.get_double("End time");
2622 *   initial_global_refinement = prm.get_integer("Initial global refinement");
2623 *   initial_adaptive_refinement =
2624 *   prm.get_integer("Initial adaptive refinement");
2625 *  
2626 *   adaptive_refinement_interval =
2627 *   prm.get_integer("Time steps between mesh refinement");
2628 *  
2629 *   generate_graphical_output = prm.get_bool("Generate graphical output");
2630 *   graphical_output_interval =
2631 *   prm.get_integer("Time steps between graphical output");
2632 *  
2633 *   prm.enter_subsection("Stabilization parameters");
2634 *   {
2635 *   stabilization_alpha = prm.get_double("alpha");
2636 *   stabilization_c_R = prm.get_double("c_R");
2637 *   stabilization_beta = prm.get_double("beta");
2638 *   }
2639 *   prm.leave_subsection();
2640 *  
2641 *   prm.enter_subsection("Discretization");
2642 *   {
2643 *   stokes_velocity_degree =
2644 *   prm.get_integer("Stokes velocity polynomial degree");
2645 *   temperature_degree = prm.get_integer("Temperature polynomial degree");
2646 *   use_locally_conservative_discretization =
2647 *   prm.get_bool("Use locally conservative discretization");
2648 *   }
2649 *   prm.leave_subsection();
2650 *   }
2651 *  
2652 *  
2653 *  
2654 * @endcode
2655 *
2656 *
2657 * <a name="step_32-BoussinesqFlowProblemBoussinesqFlowProblem"></a>
2658 * <h4>BoussinesqFlowProblem::BoussinesqFlowProblem</h4>
2659 *
2660
2661 *
2662 * The constructor of the problem is very similar to the constructor in
2663 * @ref step_31 "step-31". What is different is the %parallel communication: Trilinos uses
2664 * a message passing interface (MPI) for data distribution. When entering
2665 * the BoussinesqFlowProblem class, we have to decide how the parallelization
2666 * is to be done. We choose a rather simple strategy and let all processors
2667 * that are running the program work together, specified by the communicator
2668 * <code>MPI_COMM_WORLD</code>. Next, we create the output stream (as we
2669 * already did in @ref step_18 "step-18") that only generates output on the first MPI
2670 * process and is completely forgetful on all others. The implementation of
2671 * this idea is to check the process number when <code>pcout</code> gets a
2672 * true argument, and it uses the <code>std::cout</code> stream for
2673 * output. If we are one processor five, for instance, then we will give a
2674 * <code>false</code> argument to <code>pcout</code>, which means that the
2675 * output of that processor will not be printed. With the exception of the
2676 * mapping object (for which we use polynomials of degree 4) all but the
2677 * final member variable are exactly the same as in @ref step_31 "step-31".
2678 *
2679
2680 *
2681 * This final object, the TimerOutput object, is then told to restrict
2682 * output to the <code>pcout</code> stream (processor 0), and then we
2683 * specify that we want to get a summary table at the end of the program
2684 * which shows us wallclock times (as opposed to CPU times). We will
2685 * manually also request intermediate summaries every so many time steps in
2686 * the <code>run()</code> function below.
2687 *
2688 * @code
2689 *   template <int dim>
2690 *   BoussinesqFlowProblem<dim>::BoussinesqFlowProblem(Parameters &parameters_)
2691 *   : parameters(parameters_)
2692 *   , pcout(std::cout, (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0))
2693 *   ,
2694 *  
2695 *   triangulation(MPI_COMM_WORLD,
2699 *   ,
2700 *  
2701 *   global_Omega_diameter(0.)
2702 *   ,
2703 *  
2704 *   mapping(4)
2705 *   ,
2706 *  
2707 *   stokes_fe(FE_Q<dim>(parameters.stokes_velocity_degree) ^ dim,
2708 *   (parameters.use_locally_conservative_discretization ?
2709 *   static_cast<const FiniteElement<dim> &>(
2710 *   FE_DGP<dim>(parameters.stokes_velocity_degree - 1)) :
2711 *   static_cast<const FiniteElement<dim> &>(
2712 *   FE_Q<dim>(parameters.stokes_velocity_degree - 1))))
2713 *   ,
2714 *  
2715 *   stokes_dof_handler(triangulation)
2716 *   ,
2717 *  
2718 *   temperature_fe(parameters.temperature_degree)
2719 *   , temperature_dof_handler(triangulation)
2720 *   ,
2721 *  
2722 *   time_step(0)
2723 *   , old_time_step(0)
2724 *   , timestep_number(0)
2725 *   , rebuild_stokes_matrix(true)
2726 *   , rebuild_stokes_preconditioner(true)
2727 *   , rebuild_temperature_matrices(true)
2728 *   , rebuild_temperature_preconditioner(true)
2729 *   ,
2730 *  
2731 *   computing_timer(MPI_COMM_WORLD,
2732 *   pcout,
2735 *   {}
2736 *  
2737 *  
2738 *  
2739 * @endcode
2740 *
2741 *
2742 * <a name="step_32-TheBoussinesqFlowProblemhelperfunctions"></a>
2743 * <h4>The BoussinesqFlowProblem helper functions</h4>
2744 *
2745 * <a name="step_32-BoussinesqFlowProblemget_maximal_velocity"></a>
2746 * <h5>BoussinesqFlowProblem::get_maximal_velocity</h5>
2747 *
2748
2749 *
2750 * Except for two small details, the function to compute the global maximum
2751 * of the velocity is the same as in @ref step_31 "step-31". The first detail is actually
2752 * common to all functions that implement loops over all cells in the
2753 * triangulation: When operating in %parallel, each processor can only work
2754 * on a chunk of cells since each processor only has a certain part of the
2755 * entire triangulation. This chunk of cells that we want to work on is
2756 * identified via a so-called <code>subdomain_id</code>, as we also did in
2757 * @ref step_18 "step-18". All we need to change is hence to perform the cell-related
2758 * operations only on cells that are owned by the current process (as
2759 * opposed to ghost or artificial cells), i.e. for which the subdomain id
2760 * equals the number of the process ID. Since this is a commonly used
2761 * operation, there is a shortcut for this operation: we can ask whether the
2762 * cell is owned by the current processor using
2763 * <code>cell-@>is_locally_owned()</code>.
2764 *
2765
2766 *
2767 * The second difference is the way we calculate the maximum value. Before,
2768 * we could simply have a <code>double</code> variable that we checked
2769 * against on each quadrature point for each cell. Now, we have to be a bit
2770 * more careful since each processor only operates on a subset of
2771 * cells. What we do is to first let each processor calculate the maximum
2772 * among its cells, and then do a global communication operation
2773 * <code>Utilities::MPI::max</code> that computes the maximum value among
2774 * all the maximum values of the individual processors. MPI provides such a
2775 * call, but it's even simpler to use the respective function in namespace
2776 * Utilities::MPI using the MPI communicator object since that will do the
2777 * right thing even if we work without MPI and on a single machine only. The
2778 * call to <code>Utilities::MPI::max</code> needs two arguments, namely the
2779 * local maximum (input) and the MPI communicator, which is MPI_COMM_WORLD
2780 * in this example.
2781 *
2782 * @code
2783 *   template <int dim>
2784 *   double BoussinesqFlowProblem<dim>::get_maximal_velocity() const
2785 *   {
2786 *   const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
2787 *   parameters.stokes_velocity_degree);
2788 *   const unsigned int n_q_points = quadrature_formula.size();
2789 *  
2790 *   FEValues<dim> fe_values(mapping,
2791 *   stokes_fe,
2792 *   quadrature_formula,
2793 *   update_values);
2794 *   std::vector<Tensor<1, dim>> velocity_values(n_q_points);
2795 *  
2796 *   const FEValuesExtractors::Vector velocities(0);
2797 *  
2798 *   double max_local_velocity = 0;
2799 *  
2800 *   for (const auto &cell : stokes_dof_handler.active_cell_iterators())
2801 *   if (cell->is_locally_owned())
2802 *   {
2803 *   fe_values.reinit(cell);
2804 *   fe_values[velocities].get_function_values(stokes_solution,
2805 *   velocity_values);
2806 *  
2807 *   for (unsigned int q = 0; q < n_q_points; ++q)
2808 *   max_local_velocity =
2809 *   std::max(max_local_velocity, velocity_values[q].norm());
2810 *   }
2811 *  
2812 *   return Utilities::MPI::max(max_local_velocity, MPI_COMM_WORLD);
2813 *   }
2814 *  
2815 *  
2816 * @endcode
2817 *
2818 *
2819 * <a name="step_32-BoussinesqFlowProblemget_cfl_number"></a>
2820 * <h5>BoussinesqFlowProblem::get_cfl_number</h5>
2821 *
2822
2823 *
2824 * The next function does something similar, but we now compute the CFL
2825 * number, i.e., maximal velocity on a cell divided by the cell
2826 * diameter. This number is necessary to determine the time step size, as we
2827 * use a semi-explicit time stepping scheme for the temperature equation
2828 * (see @ref step_31 "step-31" for a discussion). We compute it in the same way as above:
2829 * Compute the local maximum over all locally owned cells, then exchange it
2830 * via MPI to find the global maximum.
2831 *
2832 * @code
2833 *   template <int dim>
2834 *   double BoussinesqFlowProblem<dim>::get_cfl_number() const
2835 *   {
2836 *   const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
2837 *   parameters.stokes_velocity_degree);
2838 *   const unsigned int n_q_points = quadrature_formula.size();
2839 *  
2840 *   FEValues<dim> fe_values(mapping,
2841 *   stokes_fe,
2842 *   quadrature_formula,
2843 *   update_values);
2844 *   std::vector<Tensor<1, dim>> velocity_values(n_q_points);
2845 *  
2846 *   const FEValuesExtractors::Vector velocities(0);
2847 *  
2848 *   double max_local_cfl = 0;
2849 *  
2850 *   for (const auto &cell : stokes_dof_handler.active_cell_iterators())
2851 *   if (cell->is_locally_owned())
2852 *   {
2853 *   fe_values.reinit(cell);
2854 *   fe_values[velocities].get_function_values(stokes_solution,
2855 *   velocity_values);
2856 *  
2857 *   double max_local_velocity = 1e-10;
2858 *   for (unsigned int q = 0; q < n_q_points; ++q)
2859 *   max_local_velocity =
2860 *   std::max(max_local_velocity, velocity_values[q].norm());
2861 *   max_local_cfl =
2862 *   std::max(max_local_cfl, max_local_velocity / cell->diameter());
2863 *   }
2864 *  
2865 *   return Utilities::MPI::max(max_local_cfl, MPI_COMM_WORLD);
2866 *   }
2867 *  
2868 *  
2869 * @endcode
2870 *
2871 *
2872 * <a name="step_32-BoussinesqFlowProblemget_entropy_variation"></a>
2873 * <h5>BoussinesqFlowProblem::get_entropy_variation</h5>
2874 *
2875
2876 *
2877 * Next comes the computation of the global entropy variation
2878 * @f$\|E(T)-\bar{E}(T)\|_\infty@f$ where the entropy @f$E@f$ is defined as
2879 * discussed in the introduction. This is needed for the evaluation of the
2880 * stabilization in the temperature equation as explained in the
2881 * introduction. The entropy variation is actually only needed if we use
2882 * @f$\alpha=2@f$ as a power in the residual computation. The infinity norm is
2883 * computed by the maxima over quadrature points, as usual in discrete
2884 * computations.
2885 *
2886
2887 *
2888 * In order to compute this quantity, we first have to find the
2889 * space-average @f$\bar{E}(T)@f$ and then evaluate the maximum. However, that
2890 * means that we would need to perform two loops. We can avoid the overhead
2891 * by noting that @f$\|E(T)-\bar{E}(T)\|_\infty =
2892 * \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
2893 * \bar{E}(T)-E_{\textrm{min}}(T)\big)@f$, i.e., the maximum out of the
2894 * deviation from the average entropy in positive and negative
2895 * directions. The four quantities we need for the latter formula (maximum
2896 * entropy, minimum entropy, average entropy, area) can all be evaluated in
2897 * the same loop over all cells, so we choose this simpler variant.
2898 *
2899 * @code
2900 *   template <int dim>
2901 *   double BoussinesqFlowProblem<dim>::get_entropy_variation(
2902 *   const double average_temperature) const
2903 *   {
2904 *   if (parameters.stabilization_alpha != 2)
2905 *   return 1.;
2906 *  
2907 *   const QGauss<dim> quadrature_formula(parameters.temperature_degree + 1);
2908 *   const unsigned int n_q_points = quadrature_formula.size();
2909 *  
2910 *   FEValues<dim> fe_values(temperature_fe,
2911 *   quadrature_formula,
2912 *   update_values | update_JxW_values);
2913 *   std::vector<double> old_temperature_values(n_q_points);
2914 *   std::vector<double> old_old_temperature_values(n_q_points);
2915 *  
2916 * @endcode
2917 *
2918 * In the two functions above we computed the maximum of numbers that were
2919 * all non-negative, so we knew that zero was certainly a lower bound. On
2920 * the other hand, here we need to find the maximum deviation from the
2921 * average value, i.e., we will need to know the maximal and minimal
2922 * values of the entropy for which we don't a priori know the sign.
2923 *
2924
2925 *
2926 * To compute it, we can therefore start with the largest and smallest
2927 * possible values we can store in a double precision number: The minimum
2928 * is initialized with a bigger and the maximum with a smaller number than
2929 * any one that is going to appear. We are then guaranteed that these
2930 * numbers will be overwritten in the loop on the first cell or, if this
2931 * processor does not own any cells, in the communication step at the
2932 * latest. The following loop then computes the minimum and maximum local
2933 * entropy as well as keeps track of the area/volume of the part of the
2934 * domain we locally own and the integral over the entropy on it:
2935 *
2936 * @code
2937 *   double min_entropy = std::numeric_limits<double>::max(),
2938 *   max_entropy = std::numeric_limits<double>::lowest(), area = 0,
2939 *   entropy_integrated = 0;
2940 *  
2941 *   for (const auto &cell : temperature_dof_handler.active_cell_iterators())
2942 *   if (cell->is_locally_owned())
2943 *   {
2944 *   fe_values.reinit(cell);
2945 *   fe_values.get_function_values(old_temperature_solution,
2946 *   old_temperature_values);
2947 *   fe_values.get_function_values(old_old_temperature_solution,
2948 *   old_old_temperature_values);
2949 *   for (unsigned int q = 0; q < n_q_points; ++q)
2950 *   {
2951 *   const double T =
2952 *   (old_temperature_values[q] + old_old_temperature_values[q]) / 2;
2953 *   const double entropy =
2954 *   ((T - average_temperature) * (T - average_temperature));
2955 *  
2956 *   min_entropy = std::min(min_entropy, entropy);
2957 *   max_entropy = std::max(max_entropy, entropy);
2958 *   area += fe_values.JxW(q);
2959 *   entropy_integrated += fe_values.JxW(q) * entropy;
2960 *   }
2961 *   }
2962 *  
2963 * @endcode
2964 *
2965 * Now we only need to exchange data between processors: we need to sum
2966 * the two integrals (<code>area</code>, <code>entropy_integrated</code>),
2967 * and get the extrema for maximum and minimum. We could do this through
2968 * four different data exchanges, but we can it with two:
2969 * Utilities::MPI::sum also exists in a variant that takes an array of
2970 * values that are all to be summed up. And we can also utilize the
2971 * Utilities::MPI::max function by realizing that forming the minimum over
2972 * the minimal entropies equals forming the negative of the maximum over
2973 * the negative of the minimal entropies; this maximum can then be
2974 * combined with forming the maximum over the maximal entropies.
2975 *
2976 * @code
2977 *   const double local_sums[2] = {entropy_integrated, area},
2978 *   local_maxima[2] = {-min_entropy, max_entropy};
2979 *   double global_sums[2], global_maxima[2];
2980 *  
2981 *   Utilities::MPI::sum(local_sums, MPI_COMM_WORLD, global_sums);
2982 *   Utilities::MPI::max(local_maxima, MPI_COMM_WORLD, global_maxima);
2983 *  
2984 * @endcode
2985 *
2986 * Having computed everything this way, we can then compute the average
2987 * entropy and find the @f$L^\infty@f$ norm by taking the larger of the
2988 * deviation of the maximum or minimum from the average:
2989 *
2990 * @code
2991 *   const double average_entropy = global_sums[0] / global_sums[1];
2992 *   const double entropy_diff = std::max(global_maxima[1] - average_entropy,
2993 *   average_entropy - (-global_maxima[0]));
2994 *   return entropy_diff;
2995 *   }
2996 *  
2997 *  
2998 *  
2999 * @endcode
3000 *
3001 *
3002 * <a name="step_32-BoussinesqFlowProblemget_extrapolated_temperature_range"></a>
3003 * <h5>BoussinesqFlowProblem::get_extrapolated_temperature_range</h5>
3004 *
3005
3006 *
3007 * The next function computes the minimal and maximal value of the
3008 * extrapolated temperature over the entire domain. Again, this is only a
3009 * slightly modified version of the respective function in @ref step_31 "step-31". As in
3010 * the function above, we collect local minima and maxima and then compute
3011 * the global extrema using the same trick as above.
3012 *
3013
3014 *
3015 * As already discussed in @ref step_31 "step-31", the function needs to distinguish
3016 * between the first and all following time steps because it uses a higher
3017 * order temperature extrapolation scheme when at least two previous time
3018 * steps are available.
3019 *
3020 * @code
3021 *   template <int dim>
3022 *   std::pair<double, double>
3023 *   BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range() const
3024 *   {
3025 *   const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
3026 *   parameters.temperature_degree);
3027 *   const unsigned int n_q_points = quadrature_formula.size();
3028 *  
3029 *   FEValues<dim> fe_values(mapping,
3030 *   temperature_fe,
3031 *   quadrature_formula,
3032 *   update_values);
3033 *   std::vector<double> old_temperature_values(n_q_points);
3034 *   std::vector<double> old_old_temperature_values(n_q_points);
3035 *  
3036 *   double min_local_temperature = std::numeric_limits<double>::max(),
3037 *   max_local_temperature = std::numeric_limits<double>::lowest();
3038 *  
3039 *   if (timestep_number != 0)
3040 *   {
3041 *   for (const auto &cell : temperature_dof_handler.active_cell_iterators())
3042 *   if (cell->is_locally_owned())
3043 *   {
3044 *   fe_values.reinit(cell);
3045 *   fe_values.get_function_values(old_temperature_solution,
3046 *   old_temperature_values);
3047 *   fe_values.get_function_values(old_old_temperature_solution,
3048 *   old_old_temperature_values);
3049 *  
3050 *   for (unsigned int q = 0; q < n_q_points; ++q)
3051 *   {
3052 *   const double temperature =
3053 *   (1. + time_step / old_time_step) *
3054 *   old_temperature_values[q] -
3055 *   time_step / old_time_step * old_old_temperature_values[q];
3056 *  
3057 *   min_local_temperature =
3058 *   std::min(min_local_temperature, temperature);
3059 *   max_local_temperature =
3060 *   std::max(max_local_temperature, temperature);
3061 *   }
3062 *   }
3063 *   }
3064 *   else
3065 *   {
3066 *   for (const auto &cell : temperature_dof_handler.active_cell_iterators())
3067 *   if (cell->is_locally_owned())
3068 *   {
3069 *   fe_values.reinit(cell);
3070 *   fe_values.get_function_values(old_temperature_solution,
3071 *   old_temperature_values);
3072 *  
3073 *   for (unsigned int q = 0; q < n_q_points; ++q)
3074 *   {
3075 *   const double temperature = old_temperature_values[q];
3076 *  
3077 *   min_local_temperature =
3078 *   std::min(min_local_temperature, temperature);
3079 *   max_local_temperature =
3080 *   std::max(max_local_temperature, temperature);
3081 *   }
3082 *   }
3083 *   }
3084 *  
3085 *   double local_extrema[2] = {-min_local_temperature, max_local_temperature};
3086 *   double global_extrema[2];
3087 *   Utilities::MPI::max(local_extrema, MPI_COMM_WORLD, global_extrema);
3088 *  
3089 *   return std::make_pair(-global_extrema[0], global_extrema[1]);
3090 *   }
3091 *  
3092 *  
3093 * @endcode
3094 *
3095 *
3096 * <a name="step_32-BoussinesqFlowProblemcompute_viscosity"></a>
3097 * <h5>BoussinesqFlowProblem::compute_viscosity</h5>
3098 *
3099
3100 *
3101 * The function that calculates the viscosity is purely local and so needs
3102 * no communication at all. It is mostly the same as in @ref step_31 "step-31" but with an
3103 * updated formulation of the viscosity if @f$\alpha=2@f$ is chosen:
3104 *
3105 * @code
3106 *   template <int dim>
3107 *   double BoussinesqFlowProblem<dim>::compute_viscosity(
3108 *   const std::vector<double> &old_temperature,
3109 *   const std::vector<double> &old_old_temperature,
3110 *   const std::vector<Tensor<1, dim>> &old_temperature_grads,
3111 *   const std::vector<Tensor<1, dim>> &old_old_temperature_grads,
3112 *   const std::vector<double> &old_temperature_laplacians,
3113 *   const std::vector<double> &old_old_temperature_laplacians,
3114 *   const std::vector<Tensor<1, dim>> &old_velocity_values,
3115 *   const std::vector<Tensor<1, dim>> &old_old_velocity_values,
3116 *   const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
3117 *   const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
3118 *   const double global_u_infty,
3119 *   const double global_T_variation,
3120 *   const double average_temperature,
3121 *   const double global_entropy_variation,
3122 *   const double cell_diameter) const
3123 *   {
3124 *   if (global_u_infty == 0)
3125 *   return 5e-3 * cell_diameter;
3126 *  
3127 *   const unsigned int n_q_points = old_temperature.size();
3128 *  
3129 *   double max_residual = 0;
3130 *   double max_velocity = 0;
3131 *  
3132 *   for (unsigned int q = 0; q < n_q_points; ++q)
3133 *   {
3134 *   const Tensor<1, dim> u =
3135 *   (old_velocity_values[q] + old_old_velocity_values[q]) / 2;
3136 *  
3137 *   const SymmetricTensor<2, dim> strain_rate =
3138 *   (old_strain_rates[q] + old_old_strain_rates[q]) / 2;
3139 *  
3140 *   const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
3141 *   const double dT_dt =
3142 *   (old_temperature[q] - old_old_temperature[q]) / old_time_step;
3143 *   const double u_grad_T =
3144 *   u * (old_temperature_grads[q] + old_old_temperature_grads[q]) / 2;
3145 *  
3146 *   const double kappa_Delta_T =
3147 *   EquationData::kappa *
3148 *   (old_temperature_laplacians[q] + old_old_temperature_laplacians[q]) /
3149 *   2;
3150 *   const double gamma =
3151 *   ((EquationData::radiogenic_heating * EquationData::density(T) +
3152 *   2 * EquationData::eta * strain_rate * strain_rate) /
3153 *   (EquationData::density(T) * EquationData::specific_heat));
3154 *  
3155 *   double residual = std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
3156 *   if (parameters.stabilization_alpha == 2)
3157 *   residual *= std::abs(T - average_temperature);
3158 *  
3159 *   max_residual = std::max(residual, max_residual);
3160 *   max_velocity = std::max(std::sqrt(u * u), max_velocity);
3161 *   }
3162 *  
3163 *   const double max_viscosity =
3164 *   (parameters.stabilization_beta * max_velocity * cell_diameter);
3165 *   if (timestep_number == 0)
3166 *   return max_viscosity;
3167 *   else
3168 *   {
3169 *   Assert(old_time_step > 0, ExcInternalError());
3170 *  
3171 *   double entropy_viscosity;
3172 *   if (parameters.stabilization_alpha == 2)
3173 *   entropy_viscosity =
3174 *   (parameters.stabilization_c_R * cell_diameter * cell_diameter *
3175 *   max_residual / global_entropy_variation);
3176 *   else
3177 *   entropy_viscosity =
3178 *   (parameters.stabilization_c_R * cell_diameter *
3179 *   global_Omega_diameter * max_velocity * max_residual /
3180 *   (global_u_infty * global_T_variation));
3181 *  
3182 *   return std::min(max_viscosity, entropy_viscosity);
3183 *   }
3184 *   }
3185 *  
3186 *  
3187 *  
3188 * @endcode
3189 *
3190 *
3191 * <a name="step_32-TheBoussinesqFlowProblemsetupfunctions"></a>
3192 * <h4>The BoussinesqFlowProblem setup functions</h4>
3193 *
3194
3195 *
3196 * The following three functions set up the Stokes matrix, the matrix used
3197 * for the Stokes preconditioner, and the temperature matrix. The code is
3198 * mostly the same as in @ref step_31 "step-31", but it has been broken out into three
3199 * functions of their own for simplicity.
3200 *
3201
3202 *
3203 * The main functional difference between the code here and that in @ref step_31 "step-31"
3204 * is that the matrices we want to set up are distributed across multiple
3205 * processors. Since we still want to build up the sparsity pattern first
3206 * for efficiency reasons, we could continue to build the <i>entire</i>
3207 * sparsity pattern as a BlockDynamicSparsityPattern, as we did in
3208 * @ref step_31 "step-31". However, that would be inefficient: every processor would build
3209 * the same sparsity pattern, but only initialize a small part of the matrix
3210 * using it. It also violates the principle that every processor should only
3211 * work on those cells it owns (and, if necessary the layer of ghost cells
3212 * around it).
3213 *
3214
3215 *
3216 * Rather, we use an object of type TrilinosWrappers::BlockSparsityPattern,
3217 * which is (obviously) a wrapper around a sparsity pattern object provided
3218 * by Trilinos. The advantage is that the Trilinos sparsity pattern class
3219 * can communicate across multiple processors: if this processor fills in
3220 * all the nonzero entries that result from the cells it owns, and every
3221 * other processor does so as well, then at the end after some MPI
3222 * communication initiated by the <code>compress()</code> call, we will have
3223 * the globally assembled sparsity pattern available with which the global
3224 * matrix can be initialized.
3225 *
3226
3227 *
3228 * There is one important aspect when initializing Trilinos sparsity
3229 * patterns in parallel: In addition to specifying the locally owned rows
3230 * and columns of the matrices via the @p stokes_partitioning index set, we
3231 * also supply information about all the rows we are possibly going to write
3232 * into when assembling on a certain processor. The set of locally relevant
3233 * rows contains all such rows (possibly also a few unnecessary ones, but it
3234 * is difficult to find the exact row indices before actually getting
3235 * indices on all cells and resolving constraints). This additional
3236 * information allows to exactly determine the structure for the
3237 * off-processor data found during assembly. While Trilinos matrices are
3238 * able to collect this information on the fly as well (when initializing
3239 * them from some other reinit method), it is less efficient and leads to
3240 * problems when assembling matrices with multiple threads. In this program,
3241 * we pessimistically assume that only one processor at a time can write
3242 * into the matrix while assembly (whereas the computation is parallel),
3243 * which is fine for Trilinos matrices. In practice, one can do better by
3244 * hinting WorkStream at cells that do not share vertices, allowing for
3245 * parallelism among those cells (see the graph coloring algorithms and
3246 * WorkStream with colored iterators argument). However, that only works
3247 * when only one MPI processor is present because Trilinos' internal data
3248 * structures for accumulating off-processor data on the fly are not thread
3249 * safe. With the initialization presented here, there is no such problem
3250 * and one could safely introduce graph coloring for this algorithm.
3251 *
3252
3253 *
3254 * The only other change we need to make is to tell the
3255 * DoFTools::make_sparsity_pattern() function that it is only supposed to
3256 * work on a subset of cells, namely the ones whose
3257 * <code>subdomain_id</code> equals the number of the current processor, and
3258 * to ignore all other cells.
3259 *
3260
3261 *
3262 * This strategy is replicated across all three of the following functions.
3263 *
3264
3265 *
3266 * Note that Trilinos matrices store the information contained in the
3267 * sparsity patterns, so we can safely release the <code>sp</code> variable
3268 * once the matrix has been given the sparsity structure.
3269 *
3270 * @code
3271 *   template <int dim>
3272 *   void BoussinesqFlowProblem<dim>::setup_stokes_matrix(
3273 *   const std::vector<IndexSet> &stokes_partitioning,
3274 *   const std::vector<IndexSet> &stokes_relevant_partitioning)
3275 *   {
3276 *   stokes_matrix.clear();
3277 *  
3278 *   TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
3279 *   stokes_partitioning,
3280 *   stokes_relevant_partitioning,
3281 *   MPI_COMM_WORLD);
3282 *  
3283 *   Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
3284 *   for (unsigned int c = 0; c < dim + 1; ++c)
3285 *   for (unsigned int d = 0; d < dim + 1; ++d)
3286 *   if (!((c == dim) && (d == dim)))
3287 *   coupling[c][d] = DoFTools::always;
3288 *   else
3289 *   coupling[c][d] = DoFTools::none;
3290 *  
3291 *   DoFTools::make_sparsity_pattern(stokes_dof_handler,
3292 *   coupling,
3293 *   sp,
3294 *   stokes_constraints,
3295 *   false,
3296 *   Utilities::MPI::this_mpi_process(
3297 *   MPI_COMM_WORLD));
3298 *   sp.compress();
3299 *  
3300 *   stokes_matrix.reinit(sp);
3301 *   }
3302 *  
3303 *  
3304 *  
3305 *   template <int dim>
3306 *   void BoussinesqFlowProblem<dim>::setup_stokes_preconditioner(
3307 *   const std::vector<IndexSet> &stokes_partitioning,
3308 *   const std::vector<IndexSet> &stokes_relevant_partitioning)
3309 *   {
3310 *   Amg_preconditioner.reset();
3311 *   Mp_preconditioner.reset();
3312 *  
3313 *   stokes_preconditioner_matrix.clear();
3314 *  
3315 *   TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
3316 *   stokes_partitioning,
3317 *   stokes_relevant_partitioning,
3318 *   MPI_COMM_WORLD);
3319 *  
3320 *   Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
3321 *   for (unsigned int c = 0; c < dim + 1; ++c)
3322 *   for (unsigned int d = 0; d < dim + 1; ++d)
3323 *   if (c == d)
3324 *   coupling[c][d] = DoFTools::always;
3325 *   else
3326 *   coupling[c][d] = DoFTools::none;
3327 *  
3328 *   DoFTools::make_sparsity_pattern(stokes_dof_handler,
3329 *   coupling,
3330 *   sp,
3331 *   stokes_constraints,
3332 *   false,
3333 *   Utilities::MPI::this_mpi_process(
3334 *   MPI_COMM_WORLD));
3335 *   sp.compress();
3336 *  
3337 *   stokes_preconditioner_matrix.reinit(sp);
3338 *   }
3339 *  
3340 *  
3341 *   template <int dim>
3342 *   void BoussinesqFlowProblem<dim>::setup_temperature_matrices(
3343 *   const IndexSet &temperature_partitioner,
3344 *   const IndexSet &temperature_relevant_partitioner)
3345 *   {
3346 *   T_preconditioner.reset();
3347 *   temperature_mass_matrix.clear();
3348 *   temperature_stiffness_matrix.clear();
3349 *   temperature_matrix.clear();
3350 *  
3351 *   TrilinosWrappers::SparsityPattern sp(temperature_partitioner,
3352 *   temperature_partitioner,
3353 *   temperature_relevant_partitioner,
3354 *   MPI_COMM_WORLD);
3355 *   DoFTools::make_sparsity_pattern(temperature_dof_handler,
3356 *   sp,
3357 *   temperature_constraints,
3358 *   false,
3359 *   Utilities::MPI::this_mpi_process(
3360 *   MPI_COMM_WORLD));
3361 *   sp.compress();
3362 *  
3363 *   temperature_matrix.reinit(sp);
3364 *   temperature_mass_matrix.reinit(sp);
3365 *   temperature_stiffness_matrix.reinit(sp);
3366 *   }
3367 *  
3368 *  
3369 *  
3370 * @endcode
3371 *
3372 * The remainder of the setup function (after splitting out the three
3373 * functions above) mostly has to deal with the things we need to do for
3374 * parallelization across processors. Because setting all of this up is a
3375 * significant compute time expense of the program, we put everything we do
3376 * here into a timer group so that we can get summary information about the
3377 * fraction of time spent in this part of the program at its end.
3378 *
3379
3380 *
3381 * At the top as usual we enumerate degrees of freedom and sort them by
3382 * component/block, followed by writing their numbers to the screen from
3383 * processor zero. The DoFHandler::distributed_dofs() function, when applied
3384 * to a parallel::distributed::Triangulation object, sorts degrees of
3385 * freedom in such a way that all degrees of freedom associated with
3386 * subdomain zero come before all those associated with subdomain one,
3387 * etc. For the Stokes part, this entails, however, that velocities and
3388 * pressures become intermixed, but this is trivially solved by sorting
3389 * again by blocks; it is worth noting that this latter operation leaves the
3390 * relative ordering of all velocities and pressures alone, i.e. within the
3391 * velocity block we will still have all those associated with subdomain
3392 * zero before all velocities associated with subdomain one, etc. This is
3393 * important since we store each of the blocks of this matrix distributed
3394 * across all processors and want this to be done in such a way that each
3395 * processor stores that part of the matrix that is roughly equal to the
3396 * degrees of freedom located on those cells that it will actually work on.
3397 *
3398
3399 *
3400 * When printing the numbers of degrees of freedom, note that these numbers
3401 * are going to be large if we use many processors. Consequently, we let the
3402 * stream put a comma separator in between every three digits. The state of
3403 * the stream, using the locale, is saved from before to after this
3404 * operation. While slightly opaque, the code works because the default
3405 * locale (which we get using the constructor call
3406 * <code>std::locale("")</code>) implies printing numbers with a comma
3407 * separator for every third digit (i.e., thousands, millions, billions).
3408 *
3409
3410 *
3411 * In this function as well as many below, we measure how much time
3412 * we spend here and collect that in a section called "Setup dof
3413 * systems" across function invocations. This is done using an
3414 * TimerOutput::Scope object that gets a timer going in the section
3415 * with above name of the `computing_timer` object upon construction
3416 * of the local variable; the timer is stopped again when the
3417 * destructor of the `timing_section` variable is called. This, of
3418 * course, happens either at the end of the function, or if we leave
3419 * the function through a `return` statement or when an exception is
3420 * thrown somewhere -- in other words, whenever we leave this
3421 * function in any way. The use of such "scope" objects therefore
3422 * makes sure that we do not have to manually add code that tells
3423 * the timer to stop at every location where this function may be
3424 * left.
3425 *
3426 * @code
3427 *   template <int dim>
3428 *   void BoussinesqFlowProblem<dim>::setup_dofs()
3429 *   {
3430 *   TimerOutput::Scope timing_section(computing_timer, "Setup dof systems");
3431 *  
3432 *   stokes_dof_handler.distribute_dofs(stokes_fe);
3433 *  
3434 *   std::vector<unsigned int> stokes_sub_blocks(dim + 1, 0);
3435 *   stokes_sub_blocks[dim] = 1;
3436 *   DoFRenumbering::component_wise(stokes_dof_handler, stokes_sub_blocks);
3437 *  
3438 *   temperature_dof_handler.distribute_dofs(temperature_fe);
3439 *  
3440 *   const std::vector<types::global_dof_index> stokes_dofs_per_block =
3441 *   DoFTools::count_dofs_per_fe_block(stokes_dof_handler, stokes_sub_blocks);
3442 *  
3443 *   const types::global_dof_index n_u = stokes_dofs_per_block[0],
3444 *   n_p = stokes_dofs_per_block[1],
3445 *   n_T = temperature_dof_handler.n_dofs();
3446 *  
3447 *   std::locale s = pcout.get_stream().getloc();
3448 *   pcout.get_stream().imbue(std::locale(""));
3449 *   pcout << "Number of active cells: " << triangulation.n_global_active_cells()
3450 *   << " (on " << triangulation.n_levels() << " levels)" << std::endl
3451 *   << "Number of degrees of freedom: " << n_u + n_p + n_T << " (" << n_u
3452 *   << '+' << n_p << '+' << n_T << ')' << std::endl
3453 *   << std::endl;
3454 *   pcout.get_stream().imbue(s);
3455 *  
3456 *  
3457 * @endcode
3458 *
3459 * After this, we have to set up the various partitioners (of type
3460 * <code>IndexSet</code>, see the introduction) that describe which parts
3461 * of each matrix or vector will be stored where, then call the functions
3462 * that actually set up the matrices, and at the end also resize the
3463 * various vectors we keep around in this program.
3464 *
3465
3466 *
3467 *
3468 * @code
3469 *   const IndexSet &stokes_locally_owned_index_set =
3470 *   stokes_dof_handler.locally_owned_dofs();
3471 *   const IndexSet stokes_locally_relevant_set =
3472 *   DoFTools::extract_locally_relevant_dofs(stokes_dof_handler);
3473 *  
3474 *   std::vector<IndexSet> stokes_partitioning;
3475 *   stokes_partitioning.push_back(
3476 *   stokes_locally_owned_index_set.get_view(0, n_u));
3477 *   stokes_partitioning.push_back(
3478 *   stokes_locally_owned_index_set.get_view(n_u, n_u + n_p));
3479 *  
3480 *   std::vector<IndexSet> stokes_relevant_partitioning;
3481 *   stokes_relevant_partitioning.push_back(
3482 *   stokes_locally_relevant_set.get_view(0, n_u));
3483 *   stokes_relevant_partitioning.push_back(
3484 *   stokes_locally_relevant_set.get_view(n_u, n_u + n_p));
3485 *  
3486 *   const IndexSet temperature_partitioning =
3487 *   temperature_dof_handler.locally_owned_dofs();
3488 *   const IndexSet temperature_relevant_partitioning =
3489 *   DoFTools::extract_locally_relevant_dofs(temperature_dof_handler);
3490 *  
3491 * @endcode
3492 *
3493 * Following this, we can compute constraints for the solution vectors,
3494 * including hanging node constraints and homogeneous and inhomogeneous
3495 * boundary values for the Stokes and temperature fields. Note that as for
3496 * everything else, the constraint objects can not hold <i>all</i>
3497 * constraints on every processor. Rather, each processor needs to store
3498 * only those that are actually necessary for correctness given that it
3499 * only assembles linear systems on cells it owns. As discussed in the
3500 * @ref distributed_paper "this paper", the set of constraints we need to
3501 * know about is exactly the set of constraints on all locally relevant
3502 * degrees of freedom, so this is what we use to initialize the constraint
3503 * objects.
3504 *
3505 * @code
3506 *   {
3507 *   stokes_constraints.clear();
3508 *   stokes_constraints.reinit(stokes_locally_owned_index_set,
3509 *   stokes_locally_relevant_set);
3510 *  
3511 *   DoFTools::make_hanging_node_constraints(stokes_dof_handler,
3512 *   stokes_constraints);
3513 *  
3514 *   const FEValuesExtractors::Vector velocity_components(0);
3515 *   VectorTools::interpolate_boundary_values(
3516 *   stokes_dof_handler,
3517 *   0,
3518 *   Functions::ZeroFunction<dim>(dim + 1),
3519 *   stokes_constraints,
3520 *   stokes_fe.component_mask(velocity_components));
3521 *  
3522 *   std::set<types::boundary_id> no_normal_flux_boundaries;
3523 *   no_normal_flux_boundaries.insert(1);
3524 *   VectorTools::compute_no_normal_flux_constraints(stokes_dof_handler,
3525 *   0,
3526 *   no_normal_flux_boundaries,
3527 *   stokes_constraints,
3528 *   mapping);
3529 *   stokes_constraints.close();
3530 *   }
3531 *   {
3532 *   temperature_constraints.clear();
3533 *   temperature_constraints.reinit(temperature_partitioning,
3534 *   temperature_relevant_partitioning);
3535 *  
3536 *   DoFTools::make_hanging_node_constraints(temperature_dof_handler,
3537 *   temperature_constraints);
3538 *   VectorTools::interpolate_boundary_values(
3539 *   temperature_dof_handler,
3540 *   0,
3541 *   EquationData::TemperatureInitialValues<dim>(),
3542 *   temperature_constraints);
3543 *   VectorTools::interpolate_boundary_values(
3544 *   temperature_dof_handler,
3545 *   1,
3546 *   EquationData::TemperatureInitialValues<dim>(),
3547 *   temperature_constraints);
3548 *   temperature_constraints.close();
3549 *   }
3550 *  
3551 * @endcode
3552 *
3553 * All this done, we can then initialize the various matrix and vector
3554 * objects to their proper sizes. At the end, we also record that all
3555 * matrices and preconditioners have to be re-computed at the beginning of
3556 * the next time step. Note how we initialize the vectors for the Stokes
3557 * and temperature right hand sides: These are writable vectors (last
3558 * boolean argument set to @p true) that have the correct one-to-one
3559 * partitioning of locally owned elements but are still given the relevant
3560 * partitioning for means of figuring out the vector entries that are
3561 * going to be set right away. As for matrices, this allows for writing
3562 * local contributions into the vector with multiple threads (always
3563 * assuming that the same vector entry is not accessed by multiple threads
3564 * at the same time). The other vectors only allow for read access of
3565 * individual elements, including ghosts, but are not suitable for
3566 * solvers.
3567 *
3568 * @code
3569 *   setup_stokes_matrix(stokes_partitioning, stokes_relevant_partitioning);
3570 *   setup_stokes_preconditioner(stokes_partitioning,
3571 *   stokes_relevant_partitioning);
3572 *   setup_temperature_matrices(temperature_partitioning,
3573 *   temperature_relevant_partitioning);
3574 *  
3575 *   stokes_rhs.reinit(stokes_partitioning,
3576 *   stokes_relevant_partitioning,
3577 *   MPI_COMM_WORLD,
3578 *   true);
3579 *   stokes_solution.reinit(stokes_relevant_partitioning, MPI_COMM_WORLD);
3580 *   old_stokes_solution.reinit(stokes_solution);
3581 *  
3582 *   temperature_rhs.reinit(temperature_partitioning,
3583 *   temperature_relevant_partitioning,
3584 *   MPI_COMM_WORLD,
3585 *   true);
3586 *   temperature_solution.reinit(temperature_relevant_partitioning,
3587 *   MPI_COMM_WORLD);
3588 *   old_temperature_solution.reinit(temperature_solution);
3589 *   old_old_temperature_solution.reinit(temperature_solution);
3590 *  
3591 *   rebuild_stokes_matrix = true;
3592 *   rebuild_stokes_preconditioner = true;
3593 *   rebuild_temperature_matrices = true;
3594 *   rebuild_temperature_preconditioner = true;
3595 *   }
3596 *  
3597 *  
3598 *  
3599 * @endcode
3600 *
3601 *
3602 * <a name="step_32-TheBoussinesqFlowProblemassemblyfunctions"></a>
3603 * <h4>The BoussinesqFlowProblem assembly functions</h4>
3604 *
3605
3606 *
3607 * Following the discussion in the introduction and in the @ref threads
3608 * topic, we split the assembly functions into different parts:
3609 *
3610
3611 *
3612 * <ul> <li> The local calculations of matrices and right hand sides, given
3613 * a certain cell as input (these functions are named
3614 * <code>local_assemble_*</code> below). The resulting function is, in other
3615 * words, essentially the body of the loop over all cells in @ref step_31 "step-31". Note,
3616 * however, that these functions store the result from the local
3617 * calculations in variables of classes from the CopyData namespace.
3618 *
3619
3620 *
3621 * <li>These objects are then given to the second step which writes the
3622 * local data into the global data structures (these functions are named
3623 * <code>copy_local_to_global_*</code> below). These functions are pretty
3624 * trivial.
3625 *
3626
3627 *
3628 * <li>These two subfunctions are then used in the respective assembly
3629 * routine (called <code>assemble_*</code> below), where a WorkStream object
3630 * is set up and runs over all the cells that belong to the processor's
3631 * subdomain. </ul>
3632 *
3633
3634 *
3635 *
3636 * <a name="step_32-Stokespreconditionerassembly"></a>
3637 * <h5>Stokes preconditioner assembly</h5>
3638 *
3639
3640 *
3641 * Let us start with the functions that builds the Stokes
3642 * preconditioner. The first two of these are pretty trivial, given the
3643 * discussion above. Note in particular that the main point in using the
3644 * scratch data object is that we want to avoid allocating any objects on
3645 * the free space each time we visit a new cell. As a consequence, the
3646 * assembly function below only has automatic local variables, and
3647 * everything else is accessed through the scratch data object, which is
3648 * allocated only once before we start the loop over all cells:
3649 *
3650 * @code
3651 *   template <int dim>
3652 *   void BoussinesqFlowProblem<dim>::local_assemble_stokes_preconditioner(
3653 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
3654 *   Assembly::Scratch::StokesPreconditioner<dim> &scratch,
3655 *   Assembly::CopyData::StokesPreconditioner<dim> &data)
3656 *   {
3657 *   const unsigned int dofs_per_cell = stokes_fe.n_dofs_per_cell();
3658 *   const unsigned int n_q_points =
3659 *   scratch.stokes_fe_values.n_quadrature_points;
3660 *  
3661 *   const FEValuesExtractors::Vector velocities(0);
3662 *   const FEValuesExtractors::Scalar pressure(dim);
3663 *  
3664 *   scratch.stokes_fe_values.reinit(cell);
3665 *   cell->get_dof_indices(data.local_dof_indices);
3666 *  
3667 *   data.local_matrix = 0;
3668 *  
3669 *   for (unsigned int q = 0; q < n_q_points; ++q)
3670 *   {
3671 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
3672 *   {
3673 *   scratch.grad_phi_u[k] =
3674 *   scratch.stokes_fe_values[velocities].gradient(k, q);
3675 *   scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value(k, q);
3676 *   }
3677 *  
3678 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
3679 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
3680 *   data.local_matrix(i, j) +=
3681 *   (EquationData::eta *
3682 *   scalar_product(scratch.grad_phi_u[i], scratch.grad_phi_u[j]) +
3683 *   (1. / EquationData::eta) * EquationData::pressure_scaling *
3684 *   EquationData::pressure_scaling *
3685 *   (scratch.phi_p[i] * scratch.phi_p[j])) *
3686 *   scratch.stokes_fe_values.JxW(q);
3687 *   }
3688 *   }
3689 *  
3690 *  
3691 *  
3692 *   template <int dim>
3693 *   void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_preconditioner(
3694 *   const Assembly::CopyData::StokesPreconditioner<dim> &data)
3695 *   {
3696 *   stokes_constraints.distribute_local_to_global(data.local_matrix,
3697 *   data.local_dof_indices,
3698 *   stokes_preconditioner_matrix);
3699 *   }
3700 *  
3701 *  
3702 * @endcode
3703 *
3704 * Now for the function that actually puts things together, using the
3705 * WorkStream functions. WorkStream::run needs a start and end iterator to
3706 * enumerate the cells it is supposed to work on. Typically, one would use
3707 * DoFHandler::begin_active() and DoFHandler::end() for that but here we
3708 * actually only want the subset of cells that in fact are owned by the
3709 * current processor. This is where the FilteredIterator class comes into
3710 * play: you give it a range of cells and it provides an iterator that only
3711 * iterates over that subset of cells that satisfy a certain predicate (a
3712 * predicate is a function of one argument that either returns true or
3713 * false). The predicate we use here is IteratorFilters::LocallyOwnedCell,
3714 * i.e., it returns true exactly if the cell is owned by the current
3715 * processor. The resulting iterator range is then exactly what we need.
3716 *
3717
3718 *
3719 * With this obstacle out of the way, we call the WorkStream::run
3720 * function with this set of cells, scratch and copy objects, and
3721 * with pointers to two functions: the local assembly and
3722 * copy-local-to-global function. These functions need to have very
3723 * specific signatures: three arguments in the first and one
3724 * argument in the latter case (see the documentation of the
3725 * WorkStream::run function for the meaning of these arguments).
3726 * Note how we use a lambda functions to
3727 * create a function object that satisfies this requirement. It uses
3728 * function arguments for the local assembly function that specify
3729 * cell, scratch data, and copy data, as well as function argument
3730 * for the copy function that expects the
3731 * data to be written into the global matrix (also see the discussion in
3732 * @ref step_13 "step-13"'s <code>assemble_linear_system()</code> function). On the other
3733 * hand, the implicit zeroth argument of member functions (namely
3734 * the <code>this</code> pointer of the object on which that member
3735 * function is to operate on) is <i>bound</i> to the
3736 * <code>this</code> pointer of the current function and is captured. The
3737 * WorkStream::run function, as a consequence, does not need to know
3738 * anything about the object these functions work on.
3739 *
3740
3741 *
3742 * When the WorkStream is executed, it will create several local assembly
3743 * routines of the first kind for several cells and let some available
3744 * processors work on them. The function that needs to be synchronized,
3745 * i.e., the write operation into the global matrix, however, is executed by
3746 * only one thread at a time in the prescribed order. Of course, this only
3747 * holds for the parallelization on a single MPI process. Different MPI
3748 * processes will have their own WorkStream objects and do that work
3749 * completely independently (and in different memory spaces). In a
3750 * distributed calculation, some data will accumulate at degrees of freedom
3751 * that are not owned by the respective processor. It would be inefficient
3752 * to send data around every time we encounter such a dof. What happens
3753 * instead is that the Trilinos sparse matrix will keep that data and send
3754 * it to the owner at the end of assembly, by calling the
3755 * <code>compress()</code> command.
3756 *
3757 * @code
3758 *   template <int dim>
3759 *   void BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner()
3760 *   {
3761 *   stokes_preconditioner_matrix = 0;
3762 *  
3763 *   const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
3764 *  
3765 *   using CellFilter =
3766 *   FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>;
3767 *  
3768 *   auto worker =
3769 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
3770 *   Assembly::Scratch::StokesPreconditioner<dim> &scratch,
3771 *   Assembly::CopyData::StokesPreconditioner<dim> &data) {
3772 *   this->local_assemble_stokes_preconditioner(cell, scratch, data);
3773 *   };
3774 *  
3775 *   auto copier =
3776 *   [this](const Assembly::CopyData::StokesPreconditioner<dim> &data) {
3777 *   this->copy_local_to_global_stokes_preconditioner(data);
3778 *   };
3779 *  
3780 *   WorkStream::run(CellFilter(IteratorFilters::LocallyOwnedCell(),
3781 *   stokes_dof_handler.begin_active()),
3782 *   CellFilter(IteratorFilters::LocallyOwnedCell(),
3783 *   stokes_dof_handler.end()),
3784 *   worker,
3785 *   copier,
3786 *   Assembly::Scratch::StokesPreconditioner<dim>(
3787 *   stokes_fe,
3788 *   quadrature_formula,
3789 *   mapping,
3791 *   Assembly::CopyData::StokesPreconditioner<dim>(stokes_fe));
3792 *  
3793 *   stokes_preconditioner_matrix.compress(VectorOperation::add);
3794 *   }
3795 *  
3796 *  
3797 *  
3798 * @endcode
3799 *
3800 * The final function in this block initiates assembly of the Stokes
3801 * preconditioner matrix and then in fact builds the Stokes
3802 * preconditioner. It is mostly the same as in the serial case. The only
3803 * difference to @ref step_31 "step-31" is that we use a Jacobi preconditioner for the
3804 * pressure mass matrix instead of IC, as discussed in the introduction.
3805 *
3806 * @code
3807 *   template <int dim>
3808 *   void BoussinesqFlowProblem<dim>::build_stokes_preconditioner()
3809 *   {
3810 *   if (rebuild_stokes_preconditioner == false)
3811 *   return;
3812 *  
3813 *   TimerOutput::Scope timer_section(computing_timer,
3814 *   " Build Stokes preconditioner");
3815 *   pcout << " Rebuilding Stokes preconditioner..." << std::flush;
3816 *  
3817 *   assemble_stokes_preconditioner();
3818 *  
3819 *   const FEValuesExtractors::Vector velocity_components(0);
3820 *   const std::vector<std::vector<bool>> constant_modes =
3822 *   stokes_dof_handler, stokes_fe.component_mask(velocity_components));
3823 *  
3824 *   Mp_preconditioner =
3825 *   std::make_shared<TrilinosWrappers::PreconditionJacobi>();
3826 *   Amg_preconditioner = std::make_shared<TrilinosWrappers::PreconditionAMG>();
3827 *  
3829 *   Amg_data.constant_modes = constant_modes;
3830 *   Amg_data.elliptic = true;
3831 *   Amg_data.higher_order_elements = true;
3832 *   Amg_data.smoother_sweeps = 2;
3833 *   Amg_data.aggregation_threshold = 0.02;
3834 *  
3835 *   Mp_preconditioner->initialize(stokes_preconditioner_matrix.block(1, 1));
3836 *   Amg_preconditioner->initialize(stokes_preconditioner_matrix.block(0, 0),
3837 *   Amg_data);
3838 *  
3839 *   rebuild_stokes_preconditioner = false;
3840 *  
3841 *   pcout << std::endl;
3842 *   }
3843 *  
3844 *  
3845 * @endcode
3846 *
3847 *
3848 * <a name="step_32-Stokessystemassembly"></a>
3849 * <h5>Stokes system assembly</h5>
3850 *
3851
3852 *
3853 * The next three functions implement the assembly of the Stokes system,
3854 * again split up into a part performing local calculations, one for writing
3855 * the local data into the global matrix and vector, and one for actually
3856 * running the loop over all cells with the help of the WorkStream
3857 * class. Note that the assembly of the Stokes matrix needs only to be done
3858 * in case we have changed the mesh. Otherwise, just the
3859 * (temperature-dependent) right hand side needs to be calculated
3860 * here. Since we are working with distributed matrices and vectors, we have
3861 * to call the respective <code>compress()</code> functions in the end of
3862 * the assembly in order to send non-local data to the owner process.
3863 *
3864 * @code
3865 *   template <int dim>
3866 *   void BoussinesqFlowProblem<dim>::local_assemble_stokes_system(
3867 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
3868 *   Assembly::Scratch::StokesSystem<dim> &scratch,
3869 *   Assembly::CopyData::StokesSystem<dim> &data)
3870 *   {
3871 *   const unsigned int dofs_per_cell =
3872 *   scratch.stokes_fe_values.get_fe().n_dofs_per_cell();
3873 *   const unsigned int n_q_points =
3874 *   scratch.stokes_fe_values.n_quadrature_points;
3875 *  
3876 *   const FEValuesExtractors::Vector velocities(0);
3877 *   const FEValuesExtractors::Scalar pressure(dim);
3878 *  
3879 *   scratch.stokes_fe_values.reinit(cell);
3880 *  
3881 *   const typename DoFHandler<dim>::active_cell_iterator temperature_cell =
3882 *   cell->as_dof_handler_iterator(temperature_dof_handler);
3883 *   scratch.temperature_fe_values.reinit(temperature_cell);
3884 *  
3885 *   if (rebuild_stokes_matrix)
3886 *   data.local_matrix = 0;
3887 *   data.local_rhs = 0;
3888 *  
3889 *   scratch.temperature_fe_values.get_function_values(
3890 *   old_temperature_solution, scratch.old_temperature_values);
3891 *  
3892 *   for (unsigned int q = 0; q < n_q_points; ++q)
3893 *   {
3894 *   const double old_temperature = scratch.old_temperature_values[q];
3895 *  
3896 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
3897 *   {
3898 *   scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value(k, q);
3899 *   if (rebuild_stokes_matrix)
3900 *   {
3901 *   scratch.grads_phi_u[k] =
3902 *   scratch.stokes_fe_values[velocities].symmetric_gradient(k, q);
3903 *   scratch.div_phi_u[k] =
3904 *   scratch.stokes_fe_values[velocities].divergence(k, q);
3905 *   scratch.phi_p[k] =
3906 *   scratch.stokes_fe_values[pressure].value(k, q);
3907 *   }
3908 *   }
3909 *  
3910 *   if (rebuild_stokes_matrix == true)
3911 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
3912 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
3913 *   data.local_matrix(i, j) +=
3914 *   (EquationData::eta * 2 *
3915 *   (scratch.grads_phi_u[i] * scratch.grads_phi_u[j]) -
3916 *   (EquationData::pressure_scaling * scratch.div_phi_u[i] *
3917 *   scratch.phi_p[j]) -
3918 *   (EquationData::pressure_scaling * scratch.phi_p[i] *
3919 *   scratch.div_phi_u[j])) *
3920 *   scratch.stokes_fe_values.JxW(q);
3921 *  
3922 *   const Tensor<1, dim> gravity = EquationData::gravity_vector(
3923 *   scratch.stokes_fe_values.quadrature_point(q));
3924 *  
3925 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
3926 *   data.local_rhs(i) += (EquationData::density(old_temperature) *
3927 *   gravity * scratch.phi_u[i]) *
3928 *   scratch.stokes_fe_values.JxW(q);
3929 *   }
3930 *  
3931 *   cell->get_dof_indices(data.local_dof_indices);
3932 *   }
3933 *  
3934 *  
3935 *  
3936 *   template <int dim>
3937 *   void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_system(
3938 *   const Assembly::CopyData::StokesSystem<dim> &data)
3939 *   {
3940 *   if (rebuild_stokes_matrix == true)
3941 *   stokes_constraints.distribute_local_to_global(data.local_matrix,
3942 *   data.local_rhs,
3943 *   data.local_dof_indices,
3944 *   stokes_matrix,
3945 *   stokes_rhs);
3946 *   else
3947 *   stokes_constraints.distribute_local_to_global(data.local_rhs,
3948 *   data.local_dof_indices,
3949 *   stokes_rhs);
3950 *   }
3951 *  
3952 *  
3953 *  
3954 *   template <int dim>
3955 *   void BoussinesqFlowProblem<dim>::assemble_stokes_system()
3956 *   {
3957 *   TimerOutput::Scope timer_section(computing_timer,
3958 *   " Assemble Stokes system");
3959 *  
3960 *   if (rebuild_stokes_matrix == true)
3961 *   stokes_matrix = 0;
3962 *  
3963 *   stokes_rhs = 0;
3964 *  
3965 *   const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
3966 *  
3967 *   using CellFilter =
3969 *  
3970 *   WorkStream::run(
3971 *   CellFilter(IteratorFilters::LocallyOwnedCell(),
3972 *   stokes_dof_handler.begin_active()),
3973 *   CellFilter(IteratorFilters::LocallyOwnedCell(), stokes_dof_handler.end()),
3974 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
3975 *   Assembly::Scratch::StokesSystem<dim> &scratch,
3976 *   Assembly::CopyData::StokesSystem<dim> &data) {
3977 *   this->local_assemble_stokes_system(cell, scratch, data);
3978 *   },
3979 *   [this](const Assembly::CopyData::StokesSystem<dim> &data) {
3980 *   this->copy_local_to_global_stokes_system(data);
3981 *   },
3982 *   Assembly::Scratch::StokesSystem<dim>(
3983 *   stokes_fe,
3984 *   mapping,
3985 *   quadrature_formula,
3987 *   (rebuild_stokes_matrix == true ? update_gradients : UpdateFlags(0))),
3988 *   temperature_fe,
3989 *   update_values),
3990 *   Assembly::CopyData::StokesSystem<dim>(stokes_fe));
3991 *  
3992 *   if (rebuild_stokes_matrix == true)
3993 *   stokes_matrix.compress(VectorOperation::add);
3994 *   stokes_rhs.compress(VectorOperation::add);
3995 *  
3996 *   rebuild_stokes_matrix = false;
3997 *  
3998 *   pcout << std::endl;
3999 *   }
4000 *  
4001 *  
4002 * @endcode
4003 *
4004 *
4005 * <a name="step_32-Temperaturematrixassembly"></a>
4006 * <h5>Temperature matrix assembly</h5>
4007 *
4008
4009 *
4010 * The task to be performed by the next three functions is to calculate a
4011 * mass matrix and a Laplace matrix on the temperature system. These will be
4012 * combined in order to yield the semi-implicit time stepping matrix that
4013 * consists of the mass matrix plus a time step-dependent weight factor
4014 * times the Laplace matrix. This function is again essentially the body of
4015 * the loop over all cells from @ref step_31 "step-31".
4016 *
4017
4018 *
4019 * The two following functions perform similar services as the ones above.
4020 *
4021 * @code
4022 *   template <int dim>
4023 *   void BoussinesqFlowProblem<dim>::local_assemble_temperature_matrix(
4024 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
4025 *   Assembly::Scratch::TemperatureMatrix<dim> &scratch,
4026 *   Assembly::CopyData::TemperatureMatrix<dim> &data)
4027 *   {
4028 *   const unsigned int dofs_per_cell =
4029 *   scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
4030 *   const unsigned int n_q_points =
4031 *   scratch.temperature_fe_values.n_quadrature_points;
4032 *  
4033 *   scratch.temperature_fe_values.reinit(cell);
4034 *   cell->get_dof_indices(data.local_dof_indices);
4035 *  
4036 *   data.local_mass_matrix = 0;
4037 *   data.local_stiffness_matrix = 0;
4038 *  
4039 *   for (unsigned int q = 0; q < n_q_points; ++q)
4040 *   {
4041 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
4042 *   {
4043 *   scratch.grad_phi_T[k] =
4044 *   scratch.temperature_fe_values.shape_grad(k, q);
4045 *   scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
4046 *   }
4047 *  
4048 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
4049 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
4050 *   {
4051 *   data.local_mass_matrix(i, j) +=
4052 *   (scratch.phi_T[i] * scratch.phi_T[j] *
4053 *   scratch.temperature_fe_values.JxW(q));
4054 *   data.local_stiffness_matrix(i, j) +=
4055 *   (EquationData::kappa * scratch.grad_phi_T[i] *
4056 *   scratch.grad_phi_T[j] * scratch.temperature_fe_values.JxW(q));
4057 *   }
4058 *   }
4059 *   }
4060 *  
4061 *  
4062 *  
4063 *   template <int dim>
4064 *   void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_matrix(
4065 *   const Assembly::CopyData::TemperatureMatrix<dim> &data)
4066 *   {
4067 *   temperature_constraints.distribute_local_to_global(data.local_mass_matrix,
4068 *   data.local_dof_indices,
4069 *   temperature_mass_matrix);
4070 *   temperature_constraints.distribute_local_to_global(
4071 *   data.local_stiffness_matrix,
4072 *   data.local_dof_indices,
4073 *   temperature_stiffness_matrix);
4074 *   }
4075 *  
4076 *  
4077 *   template <int dim>
4078 *   void BoussinesqFlowProblem<dim>::assemble_temperature_matrix()
4079 *   {
4080 *   if (rebuild_temperature_matrices == false)
4081 *   return;
4082 *  
4083 *   TimerOutput::Scope timer_section(computing_timer,
4084 *   " Assemble temperature matrices");
4085 *   temperature_mass_matrix = 0;
4086 *   temperature_stiffness_matrix = 0;
4087 *  
4088 *   const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
4089 *  
4090 *   using CellFilter =
4092 *  
4093 *   WorkStream::run(
4094 *   CellFilter(IteratorFilters::LocallyOwnedCell(),
4095 *   temperature_dof_handler.begin_active()),
4096 *   CellFilter(IteratorFilters::LocallyOwnedCell(),
4097 *   temperature_dof_handler.end()),
4098 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
4099 *   Assembly::Scratch::TemperatureMatrix<dim> &scratch,
4100 *   Assembly::CopyData::TemperatureMatrix<dim> &data) {
4101 *   this->local_assemble_temperature_matrix(cell, scratch, data);
4102 *   },
4103 *   [this](const Assembly::CopyData::TemperatureMatrix<dim> &data) {
4104 *   this->copy_local_to_global_temperature_matrix(data);
4105 *   },
4106 *   Assembly::Scratch::TemperatureMatrix<dim>(temperature_fe,
4107 *   mapping,
4108 *   quadrature_formula),
4109 *   Assembly::CopyData::TemperatureMatrix<dim>(temperature_fe));
4110 *  
4111 *   temperature_mass_matrix.compress(VectorOperation::add);
4112 *   temperature_stiffness_matrix.compress(VectorOperation::add);
4113 *  
4114 *   rebuild_temperature_matrices = false;
4115 *   rebuild_temperature_preconditioner = true;
4116 *   }
4117 *  
4118 *  
4119 * @endcode
4120 *
4121 *
4122 * <a name="step_32-Temperaturerighthandsideassembly"></a>
4123 * <h5>Temperature right hand side assembly</h5>
4124 *
4125
4126 *
4127 * This is the last assembly function. It calculates the right hand side of
4128 * the temperature system, which includes the convection and the
4129 * stabilization terms. It includes a lot of evaluations of old solutions at
4130 * the quadrature points (which are necessary for calculating the artificial
4131 * viscosity of stabilization), but is otherwise similar to the other
4132 * assembly functions. Notice, once again, how we resolve the dilemma of
4133 * having inhomogeneous boundary conditions, by just making a right hand
4134 * side at this point (compare the comments for the <code>project()</code>
4135 * function above): We create some matrix columns with exactly the values
4136 * that would be entered for the temperature @ref GlossStiffnessMatrix "stiffness matrix", in case we
4137 * have inhomogeneously constrained dofs. That will account for the correct
4138 * balance of the right hand side vector with the matrix system of
4139 * temperature.
4140 *
4141 * @code
4142 *   template <int dim>
4143 *   void BoussinesqFlowProblem<dim>::local_assemble_temperature_rhs(
4144 *   const std::pair<double, double> global_T_range,
4145 *   const double global_max_velocity,
4146 *   const double global_entropy_variation,
4147 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
4148 *   Assembly::Scratch::TemperatureRHS<dim> &scratch,
4149 *   Assembly::CopyData::TemperatureRHS<dim> &data)
4150 *   {
4151 *   const bool use_bdf2_scheme = (timestep_number != 0);
4152 *  
4153 *   const unsigned int dofs_per_cell =
4154 *   scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
4155 *   const unsigned int n_q_points =
4156 *   scratch.temperature_fe_values.n_quadrature_points;
4157 *  
4158 *   const FEValuesExtractors::Vector velocities(0);
4159 *  
4160 *   data.local_rhs = 0;
4161 *   data.matrix_for_bc = 0;
4162 *   cell->get_dof_indices(data.local_dof_indices);
4163 *  
4164 *   scratch.temperature_fe_values.reinit(cell);
4165 *  
4166 *   typename DoFHandler<dim>::active_cell_iterator stokes_cell =
4167 *   cell->as_dof_handler_iterator(stokes_dof_handler);
4168 *   scratch.stokes_fe_values.reinit(stokes_cell);
4169 *  
4170 *   scratch.temperature_fe_values.get_function_values(
4171 *   old_temperature_solution, scratch.old_temperature_values);
4172 *   scratch.temperature_fe_values.get_function_values(
4173 *   old_old_temperature_solution, scratch.old_old_temperature_values);
4174 *  
4175 *   scratch.temperature_fe_values.get_function_gradients(
4176 *   old_temperature_solution, scratch.old_temperature_grads);
4177 *   scratch.temperature_fe_values.get_function_gradients(
4178 *   old_old_temperature_solution, scratch.old_old_temperature_grads);
4179 *  
4180 *   scratch.temperature_fe_values.get_function_laplacians(
4181 *   old_temperature_solution, scratch.old_temperature_laplacians);
4182 *   scratch.temperature_fe_values.get_function_laplacians(
4183 *   old_old_temperature_solution, scratch.old_old_temperature_laplacians);
4184 *  
4185 *   scratch.stokes_fe_values[velocities].get_function_values(
4186 *   stokes_solution, scratch.old_velocity_values);
4187 *   scratch.stokes_fe_values[velocities].get_function_values(
4188 *   old_stokes_solution, scratch.old_old_velocity_values);
4189 *   scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
4190 *   stokes_solution, scratch.old_strain_rates);
4191 *   scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
4192 *   old_stokes_solution, scratch.old_old_strain_rates);
4193 *  
4194 *   const double nu =
4195 *   compute_viscosity(scratch.old_temperature_values,
4196 *   scratch.old_old_temperature_values,
4197 *   scratch.old_temperature_grads,
4198 *   scratch.old_old_temperature_grads,
4199 *   scratch.old_temperature_laplacians,
4200 *   scratch.old_old_temperature_laplacians,
4201 *   scratch.old_velocity_values,
4202 *   scratch.old_old_velocity_values,
4203 *   scratch.old_strain_rates,
4204 *   scratch.old_old_strain_rates,
4205 *   global_max_velocity,
4206 *   global_T_range.second - global_T_range.first,
4207 *   0.5 * (global_T_range.second + global_T_range.first),
4208 *   global_entropy_variation,
4209 *   cell->diameter());
4210 *  
4211 *   for (unsigned int q = 0; q < n_q_points; ++q)
4212 *   {
4213 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
4214 *   {
4215 *   scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
4216 *   scratch.grad_phi_T[k] =
4217 *   scratch.temperature_fe_values.shape_grad(k, q);
4218 *   }
4219 *  
4220 *  
4221 *   const double T_term_for_rhs =
4222 *   (use_bdf2_scheme ?
4223 *   (scratch.old_temperature_values[q] *
4224 *   (1 + time_step / old_time_step) -
4225 *   scratch.old_old_temperature_values[q] * (time_step * time_step) /
4226 *   (old_time_step * (time_step + old_time_step))) :
4227 *   scratch.old_temperature_values[q]);
4228 *  
4229 *   const double ext_T =
4230 *   (use_bdf2_scheme ? (scratch.old_temperature_values[q] *
4231 *   (1 + time_step / old_time_step) -
4232 *   scratch.old_old_temperature_values[q] *
4233 *   time_step / old_time_step) :
4234 *   scratch.old_temperature_values[q]);
4235 *  
4236 *   const Tensor<1, dim> ext_grad_T =
4237 *   (use_bdf2_scheme ? (scratch.old_temperature_grads[q] *
4238 *   (1 + time_step / old_time_step) -
4239 *   scratch.old_old_temperature_grads[q] * time_step /
4240 *   old_time_step) :
4241 *   scratch.old_temperature_grads[q]);
4242 *  
4243 *   const Tensor<1, dim> extrapolated_u =
4244 *   (use_bdf2_scheme ?
4245 *   (scratch.old_velocity_values[q] * (1 + time_step / old_time_step) -
4246 *   scratch.old_old_velocity_values[q] * time_step / old_time_step) :
4247 *   scratch.old_velocity_values[q]);
4248 *  
4249 *   const SymmetricTensor<2, dim> extrapolated_strain_rate =
4250 *   (use_bdf2_scheme ?
4251 *   (scratch.old_strain_rates[q] * (1 + time_step / old_time_step) -
4252 *   scratch.old_old_strain_rates[q] * time_step / old_time_step) :
4253 *   scratch.old_strain_rates[q]);
4254 *  
4255 *   const double gamma =
4256 *   ((EquationData::radiogenic_heating * EquationData::density(ext_T) +
4257 *   2 * EquationData::eta * extrapolated_strain_rate *
4258 *   extrapolated_strain_rate) /
4259 *   (EquationData::density(ext_T) * EquationData::specific_heat));
4260 *  
4261 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
4262 *   {
4263 *   data.local_rhs(i) +=
4264 *   (T_term_for_rhs * scratch.phi_T[i] -
4265 *   time_step * extrapolated_u * ext_grad_T * scratch.phi_T[i] -
4266 *   time_step * nu * ext_grad_T * scratch.grad_phi_T[i] +
4267 *   time_step * gamma * scratch.phi_T[i]) *
4268 *   scratch.temperature_fe_values.JxW(q);
4269 *  
4270 *   if (temperature_constraints.is_inhomogeneously_constrained(
4271 *   data.local_dof_indices[i]))
4272 *   {
4273 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
4274 *   data.matrix_for_bc(j, i) +=
4275 *   (scratch.phi_T[i] * scratch.phi_T[j] *
4276 *   (use_bdf2_scheme ? ((2 * time_step + old_time_step) /
4277 *   (time_step + old_time_step)) :
4278 *   1.) +
4279 *   scratch.grad_phi_T[i] * scratch.grad_phi_T[j] *
4280 *   EquationData::kappa * time_step) *
4281 *   scratch.temperature_fe_values.JxW(q);
4282 *   }
4283 *   }
4284 *   }
4285 *   }
4286 *  
4287 *  
4288 *   template <int dim>
4289 *   void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_rhs(
4290 *   const Assembly::CopyData::TemperatureRHS<dim> &data)
4291 *   {
4292 *   temperature_constraints.distribute_local_to_global(data.local_rhs,
4293 *   data.local_dof_indices,
4294 *   temperature_rhs,
4295 *   data.matrix_for_bc);
4296 *   }
4297 *  
4298 *  
4299 *  
4300 * @endcode
4301 *
4302 * In the function that runs the WorkStream for actually calculating the
4303 * right hand side, we also generate the final matrix. As mentioned above,
4304 * it is a sum of the mass matrix and the Laplace matrix, times some time
4305 * step-dependent weight. This weight is specified by the BDF-2 time
4306 * integration scheme, see the introduction in @ref step_31 "step-31". What is new in this
4307 * tutorial program (in addition to the use of MPI parallelization and the
4308 * WorkStream class), is that we now precompute the temperature
4309 * preconditioner as well. The reason is that the setup of the Jacobi
4310 * preconditioner takes a noticeable time compared to the solver because we
4311 * usually only need between 10 and 20 iterations for solving the
4312 * temperature system (this might sound strange, as Jacobi really only
4313 * consists of a diagonal, but in Trilinos it is derived from more general
4314 * framework for point relaxation preconditioners which is a bit
4315 * inefficient). Hence, it is more efficient to precompute the
4316 * preconditioner, even though the matrix entries may slightly change
4317 * because the time step might change. This is not too big a problem because
4318 * we remesh every few time steps (and regenerate the preconditioner then).
4319 *
4320 * @code
4321 *   template <int dim>
4322 *   void BoussinesqFlowProblem<dim>::assemble_temperature_system(
4323 *   const double maximal_velocity)
4324 *   {
4325 *   const bool use_bdf2_scheme = (timestep_number != 0);
4326 *  
4327 *   if (use_bdf2_scheme == true)
4328 *   {
4329 *   temperature_matrix.copy_from(temperature_mass_matrix);
4330 *   temperature_matrix *=
4331 *   (2 * time_step + old_time_step) / (time_step + old_time_step);
4332 *   temperature_matrix.add(time_step, temperature_stiffness_matrix);
4333 *   }
4334 *   else
4335 *   {
4336 *   temperature_matrix.copy_from(temperature_mass_matrix);
4337 *   temperature_matrix.add(time_step, temperature_stiffness_matrix);
4338 *   }
4339 *  
4340 *   if (rebuild_temperature_preconditioner == true)
4341 *   {
4342 *   T_preconditioner =
4343 *   std::make_shared<TrilinosWrappers::PreconditionJacobi>();
4344 *   T_preconditioner->initialize(temperature_matrix);
4345 *   rebuild_temperature_preconditioner = false;
4346 *   }
4347 *  
4348 * @endcode
4349 *
4350 * The next part is computing the right hand side vectors. To do so, we
4351 * first compute the average temperature @f$T_m@f$ that we use for evaluating
4352 * the artificial viscosity stabilization through the residual @f$E(T) =
4353 * (T-T_m)^2@f$. We do this by defining the midpoint between maximum and
4354 * minimum temperature as average temperature in the definition of the
4355 * entropy viscosity. An alternative would be to use the integral average,
4356 * but the results are not very sensitive to this choice. The rest then
4357 * only requires calling WorkStream::run again, binding the arguments to
4358 * the <code>local_assemble_temperature_rhs</code> function that are the
4359 * same in every call to the correct values:
4360 *
4361 * @code
4362 *   temperature_rhs = 0;
4363 *  
4364 *   const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
4365 *   const std::pair<double, double> global_T_range =
4366 *   get_extrapolated_temperature_range();
4367 *  
4368 *   const double average_temperature =
4369 *   0.5 * (global_T_range.first + global_T_range.second);
4370 *   const double global_entropy_variation =
4371 *   get_entropy_variation(average_temperature);
4372 *  
4373 *   using CellFilter =
4375 *  
4376 *   auto worker =
4377 *   [this, global_T_range, maximal_velocity, global_entropy_variation](
4378 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
4379 *   Assembly::Scratch::TemperatureRHS<dim> &scratch,
4380 *   Assembly::CopyData::TemperatureRHS<dim> &data) {
4381 *   this->local_assemble_temperature_rhs(global_T_range,
4382 *   maximal_velocity,
4383 *   global_entropy_variation,
4384 *   cell,
4385 *   scratch,
4386 *   data);
4387 *   };
4388 *  
4389 *   auto copier = [this](const Assembly::CopyData::TemperatureRHS<dim> &data) {
4390 *   this->copy_local_to_global_temperature_rhs(data);
4391 *   };
4392 *  
4394 *   temperature_dof_handler.begin_active()),
4395 *   CellFilter(IteratorFilters::LocallyOwnedCell(),
4396 *   temperature_dof_handler.end()),
4397 *   worker,
4398 *   copier,
4399 *   Assembly::Scratch::TemperatureRHS<dim>(
4400 *   temperature_fe, stokes_fe, mapping, quadrature_formula),
4401 *   Assembly::CopyData::TemperatureRHS<dim>(temperature_fe));
4402 *  
4403 *   temperature_rhs.compress(VectorOperation::add);
4404 *   }
4405 *  
4406 *  
4407 *  
4408 * @endcode
4409 *
4410 *
4411 * <a name="step_32-BoussinesqFlowProblemsolve"></a>
4412 * <h4>BoussinesqFlowProblem::solve</h4>
4413 *
4414
4415 *
4416 * This function solves the linear systems in each time step of the
4417 * Boussinesq problem. First, we work on the Stokes system and then on the
4418 * temperature system. In essence, it does the same things as the respective
4419 * function in @ref step_31 "step-31". However, there are a few changes here.
4420 *
4421
4422 *
4423 * The first change is related to the way we store our solution: we keep the
4424 * vectors with locally owned degrees of freedom plus ghost nodes on each
4425 * MPI node. When we enter a solver which is supposed to perform
4426 * matrix-vector products with a distributed matrix, this is not the
4427 * appropriate form, though. There, we will want to have the solution vector
4428 * to be distributed in the same way as the matrix, i.e. without any
4429 * ghosts. So what we do first is to generate a distributed vector called
4430 * <code>distributed_stokes_solution</code> and put only the locally owned
4431 * dofs into that, which is neatly done by the <code>operator=</code> of the
4432 * Trilinos vector.
4433 *
4434
4435 *
4436 * Next, we scale the pressure solution (or rather, the initial guess) for
4437 * the solver so that it matches with the length scales in the matrices, as
4438 * discussed in the introduction. We also immediately scale the pressure
4439 * solution back to the correct units after the solution is completed. We
4440 * also need to set the pressure values at hanging nodes to zero. This we
4441 * also did in @ref step_31 "step-31" in order not to disturb the Schur complement by some
4442 * vector entries that actually are irrelevant during the solve stage. As a
4443 * difference to @ref step_31 "step-31", here we do it only for the locally owned pressure
4444 * dofs. After solving for the Stokes solution, each processor copies the
4445 * distributed solution back into the solution vector that also includes
4446 * ghost elements.
4447 *
4448
4449 *
4450 * The third and most obvious change is that we have two variants for the
4451 * Stokes solver: A fast solver that sometimes breaks down, and a robust
4452 * solver that is slower. This is what we already discussed in the
4453 * introduction. Here is how we realize it: First, we perform 30 iterations
4454 * with the fast solver based on the simple preconditioner based on the AMG
4455 * V-cycle instead of an approximate solve (this is indicated by the
4456 * <code>false</code> argument to the
4457 * <code>LinearSolvers::BlockSchurPreconditioner</code> object). If we
4458 * converge, everything is fine. If we do not converge, the solver control
4459 * object will throw an exception SolverControl::NoConvergence. Usually,
4460 * this would abort the program because we don't catch them in our usual
4461 * <code>solve()</code> functions. This is certainly not what we want to
4462 * happen here. Rather, we want to switch to the strong solver and continue
4463 * the solution process with whatever vector we got so far. Hence, we catch
4464 * the exception with the C++ try/catch mechanism. We then simply go through
4465 * the same solver sequence again in the <code>catch</code> clause, this
4466 * time passing the @p true flag to the preconditioner for the strong
4467 * solver, signaling an approximate CG solve.
4468 *
4469 * @code
4470 *   template <int dim>
4471 *   void BoussinesqFlowProblem<dim>::solve()
4472 *   {
4473 *   {
4474 *   TimerOutput::Scope timer_section(computing_timer,
4475 *   " Solve Stokes system");
4476 *  
4477 *   pcout << " Solving Stokes system... " << std::flush;
4478 *  
4479 *   TrilinosWrappers::MPI::BlockVector distributed_stokes_solution(
4480 *   stokes_rhs);
4481 *   distributed_stokes_solution = stokes_solution;
4482 *  
4483 *   distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
4484 *  
4485 *   const unsigned int
4486 *   start = (distributed_stokes_solution.block(0).size() +
4487 *   distributed_stokes_solution.block(1).local_range().first),
4488 *   end = (distributed_stokes_solution.block(0).size() +
4489 *   distributed_stokes_solution.block(1).local_range().second);
4490 *   for (unsigned int i = start; i < end; ++i)
4491 *   if (stokes_constraints.is_constrained(i))
4492 *   distributed_stokes_solution(i) = 0;
4493 *  
4494 *  
4495 *   PrimitiveVectorMemory<TrilinosWrappers::MPI::BlockVector> mem;
4496 *  
4497 *   unsigned int n_iterations = 0;
4498 *   const double solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
4499 *   SolverControl solver_control(30, solver_tolerance);
4500 *  
4501 *   try
4502 *   {
4503 *   const LinearSolvers::BlockSchurPreconditioner<
4504 *   TrilinosWrappers::PreconditionAMG,
4505 *   TrilinosWrappers::PreconditionJacobi>
4506 *   preconditioner(stokes_matrix,
4507 *   stokes_preconditioner_matrix,
4508 *   *Mp_preconditioner,
4509 *   *Amg_preconditioner,
4510 *   false);
4511 *  
4512 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
4513 *   solver_control,
4514 *   mem,
4515 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
4516 *   30));
4517 *   solver.solve(stokes_matrix,
4518 *   distributed_stokes_solution,
4519 *   stokes_rhs,
4520 *   preconditioner);
4521 *  
4522 *   n_iterations = solver_control.last_step();
4523 *   }
4524 *  
4525 *   catch (SolverControl::NoConvergence &)
4526 *   {
4527 *   const LinearSolvers::BlockSchurPreconditioner<
4528 *   TrilinosWrappers::PreconditionAMG,
4529 *   TrilinosWrappers::PreconditionJacobi>
4530 *   preconditioner(stokes_matrix,
4531 *   stokes_preconditioner_matrix,
4532 *   *Mp_preconditioner,
4533 *   *Amg_preconditioner,
4534 *   true);
4535 *  
4536 *   SolverControl solver_control_refined(stokes_matrix.m(),
4537 *   solver_tolerance);
4538 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
4539 *   solver_control_refined,
4540 *   mem,
4541 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
4542 *   50));
4543 *   solver.solve(stokes_matrix,
4544 *   distributed_stokes_solution,
4545 *   stokes_rhs,
4546 *   preconditioner);
4547 *  
4548 *   n_iterations =
4549 *   (solver_control.last_step() + solver_control_refined.last_step());
4550 *   }
4551 *  
4552 *  
4553 *   stokes_constraints.distribute(distributed_stokes_solution);
4554 *  
4555 *   distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
4556 *  
4557 *   stokes_solution = distributed_stokes_solution;
4558 *   pcout << n_iterations << " iterations." << std::endl;
4559 *   }
4560 *  
4561 *  
4562 * @endcode
4563 *
4564 * Now let's turn to the temperature part: First, we compute the time step
4565 * size. We found that we need smaller time steps for 3d than for 2d for
4566 * the shell geometry. This is because the cells are more distorted in
4567 * that case (it is the smallest edge length that determines the CFL
4568 * number). Instead of computing the time step from maximum velocity and
4569 * minimal mesh size as in @ref step_31 "step-31", we compute local CFL numbers, i.e., on
4570 * each cell we compute the maximum velocity times the mesh size, and
4571 * compute the maximum of them. Hence, we need to choose the factor in
4572 * front of the time step slightly smaller. (We later re-considered this
4573 * approach towards time stepping. If you're curious about this, you may
4574 * want to read the time stepping section in @cite HDGB17 .)
4575 *
4576
4577 *
4578 * After temperature right hand side assembly, we solve the linear
4579 * system for temperature (with fully distributed vectors without
4580 * ghost elements and using the solution from the last timestep as
4581 * our initial guess for the iterative solver), apply constraints,
4582 * and copy the vector back to one with ghosts.
4583 *
4584
4585 *
4586 * In the end, we extract the temperature range similarly to @ref step_31 "step-31" to
4587 * produce some output (for example in order to help us choose the
4588 * stabilization constants, as discussed in the introduction). The only
4589 * difference is that we need to exchange maxima over all processors.
4590 *
4591 * @code
4592 *   {
4593 *   TimerOutput::Scope timer_section(computing_timer,
4594 *   " Assemble temperature rhs");
4595 *  
4596 *   old_time_step = time_step;
4597 *  
4598 *   const double scaling = (dim == 3 ? 0.25 : 1.0);
4599 *   time_step = (scaling / (2.1 * dim * std::sqrt(1. * dim)) /
4600 *   (parameters.temperature_degree * get_cfl_number()));
4601 *  
4602 *   const double maximal_velocity = get_maximal_velocity();
4603 *   pcout << " Maximal velocity: "
4604 *   << maximal_velocity * EquationData::year_in_seconds * 100
4605 *   << " cm/year" << std::endl;
4606 *   pcout << " "
4607 *   << "Time step: " << time_step / EquationData::year_in_seconds
4608 *   << " years" << std::endl;
4609 *  
4610 *   assemble_temperature_system(maximal_velocity);
4611 *   }
4612 *  
4613 *   {
4614 *   TimerOutput::Scope timer_section(computing_timer,
4615 *   " Solve temperature system");
4616 *  
4617 *   SolverControl solver_control(temperature_matrix.m(),
4618 *   1e-12 * temperature_rhs.l2_norm());
4619 *   SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
4620 *  
4621 *   TrilinosWrappers::MPI::Vector distributed_temperature_solution(
4622 *   temperature_rhs);
4623 *   distributed_temperature_solution = old_temperature_solution;
4624 *  
4625 *   cg.solve(temperature_matrix,
4626 *   distributed_temperature_solution,
4627 *   temperature_rhs,
4628 *   *T_preconditioner);
4629 *  
4630 *   temperature_constraints.distribute(distributed_temperature_solution);
4631 *   temperature_solution = distributed_temperature_solution;
4632 *  
4633 *   pcout << " " << solver_control.last_step()
4634 *   << " CG iterations for temperature" << std::endl;
4635 *  
4636 *   double temperature[2] = {std::numeric_limits<double>::max(),
4637 *   std::numeric_limits<double>::lowest()};
4638 *   double global_temperature[2];
4639 *  
4640 *   for (unsigned int i =
4641 *   distributed_temperature_solution.local_range().first;
4642 *   i < distributed_temperature_solution.local_range().second;
4643 *   ++i)
4644 *   {
4645 *   temperature[0] =
4646 *   std::min<double>(temperature[0],
4647 *   distributed_temperature_solution(i));
4648 *   temperature[1] =
4649 *   std::max<double>(temperature[1],
4650 *   distributed_temperature_solution(i));
4651 *   }
4652 *  
4653 *   temperature[0] *= -1.0;
4654 *   Utilities::MPI::max(temperature, MPI_COMM_WORLD, global_temperature);
4655 *   global_temperature[0] *= -1.0;
4656 *  
4657 *   pcout << " Temperature range: " << global_temperature[0] << ' '
4658 *   << global_temperature[1] << std::endl;
4659 *   }
4660 *   }
4661 *  
4662 *  
4663 * @endcode
4664 *
4665 *
4666 * <a name="step_32-BoussinesqFlowProblemoutput_results"></a>
4667 * <h4>BoussinesqFlowProblem::output_results</h4>
4668 *
4669
4670 *
4671 * Next comes the function that generates the output. The quantities to
4672 * output could be introduced manually like we did in @ref step_31 "step-31". An
4673 * alternative is to hand this task over to a class PostProcessor that
4674 * inherits from the class DataPostprocessor, which can be attached to
4675 * DataOut. This allows us to output derived quantities from the solution,
4676 * like the friction heating included in this example. It overloads the
4677 * virtual function DataPostprocessor::evaluate_vector_field(),
4678 * which is then internally called from DataOut::build_patches(). We have to
4679 * give it values of the numerical solution, its derivatives, normals to the
4680 * cell, the actual evaluation points and any additional quantities. This
4681 * follows the same procedure as discussed in @ref step_29 "step-29" and other programs.
4682 *
4683 * @code
4684 *   template <int dim>
4685 *   class BoussinesqFlowProblem<dim>::Postprocessor
4686 *   : public DataPostprocessor<dim>
4687 *   {
4688 *   public:
4689 *   Postprocessor(const unsigned int partition, const double minimal_pressure);
4690 *  
4691 *   virtual void evaluate_vector_field(
4692 *   const DataPostprocessorInputs::Vector<dim> &inputs,
4693 *   std::vector<Vector<double>> &computed_quantities) const override;
4694 *  
4695 *   virtual std::vector<std::string> get_names() const override;
4696 *  
4697 *   virtual std::vector<
4698 *   DataComponentInterpretation::DataComponentInterpretation>
4699 *   get_data_component_interpretation() const override;
4700 *  
4701 *   virtual UpdateFlags get_needed_update_flags() const override;
4702 *  
4703 *   private:
4704 *   const unsigned int partition;
4705 *   const double minimal_pressure;
4706 *   };
4707 *  
4708 *  
4709 *   template <int dim>
4710 *   BoussinesqFlowProblem<dim>::Postprocessor::Postprocessor(
4711 *   const unsigned int partition,
4712 *   const double minimal_pressure)
4713 *   : partition(partition)
4714 *   , minimal_pressure(minimal_pressure)
4715 *   {}
4716 *  
4717 *  
4718 * @endcode
4719 *
4720 * Here we define the names for the variables we want to output. These are
4721 * the actual solution values for velocity, pressure, and temperature, as
4722 * well as the friction heating and to each cell the number of the processor
4723 * that owns it. This allows us to visualize the partitioning of the domain
4724 * among the processors. Except for the velocity, which is vector-valued,
4725 * all other quantities are scalar.
4726 *
4727 * @code
4728 *   template <int dim>
4729 *   std::vector<std::string>
4730 *   BoussinesqFlowProblem<dim>::Postprocessor::get_names() const
4731 *   {
4732 *   std::vector<std::string> solution_names(dim, "velocity");
4733 *   solution_names.emplace_back("p");
4734 *   solution_names.emplace_back("T");
4735 *   solution_names.emplace_back("friction_heating");
4736 *   solution_names.emplace_back("partition");
4737 *  
4738 *   return solution_names;
4739 *   }
4740 *  
4741 *  
4742 *   template <int dim>
4743 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
4744 *   BoussinesqFlowProblem<dim>::Postprocessor::get_data_component_interpretation()
4745 *   const
4746 *   {
4747 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
4748 *   interpretation(dim,
4749 *   DataComponentInterpretation::component_is_part_of_vector);
4750 *  
4751 *   interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4752 *   interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4753 *   interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4754 *   interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4755 *  
4756 *   return interpretation;
4757 *   }
4758 *  
4759 *  
4760 *   template <int dim>
4761 *   UpdateFlags
4762 *   BoussinesqFlowProblem<dim>::Postprocessor::get_needed_update_flags() const
4763 *   {
4764 *   return update_values | update_gradients | update_quadrature_points;
4765 *   }
4766 *  
4767 *  
4768 * @endcode
4769 *
4770 * Now we implement the function that computes the derived quantities. As we
4771 * also did for the output, we rescale the velocity from its SI units to
4772 * something more readable, namely cm/year. Next, the pressure is scaled to
4773 * be between 0 and the maximum pressure. This makes it more easily
4774 * comparable -- in essence making all pressure variables positive or
4775 * zero. Temperature is taken as is, and the friction heating is computed as
4776 * @f$2 \eta \varepsilon(\mathbf{u}) \cdot \varepsilon(\mathbf{u})@f$.
4777 *
4778
4779 *
4780 * The quantities we output here are more for illustration, rather than for
4781 * actual scientific value. We come back to this briefly in the results
4782 * section of this program and explain what one may in fact be interested in.
4783 *
4784 * @code
4785 *   template <int dim>
4786 *   void BoussinesqFlowProblem<dim>::Postprocessor::evaluate_vector_field(
4787 *   const DataPostprocessorInputs::Vector<dim> &inputs,
4788 *   std::vector<Vector<double>> &computed_quantities) const
4789 *   {
4790 *   const unsigned int n_evaluation_points = inputs.solution_values.size();
4791 *   Assert(inputs.solution_gradients.size() == n_evaluation_points,
4792 *   ExcInternalError());
4793 *   Assert(computed_quantities.size() == n_evaluation_points,
4794 *   ExcInternalError());
4795 *   Assert(inputs.solution_values[0].size() == dim + 2, ExcInternalError());
4796 *  
4797 *   for (unsigned int p = 0; p < n_evaluation_points; ++p)
4798 *   {
4799 *   for (unsigned int d = 0; d < dim; ++d)
4800 *   computed_quantities[p](d) = (inputs.solution_values[p](d) *
4801 *   EquationData::year_in_seconds * 100);
4802 *  
4803 *   const double pressure =
4804 *   (inputs.solution_values[p](dim) - minimal_pressure);
4805 *   computed_quantities[p](dim) = pressure;
4806 *  
4807 *   const double temperature = inputs.solution_values[p](dim + 1);
4808 *   computed_quantities[p](dim + 1) = temperature;
4809 *  
4810 *   Tensor<2, dim> grad_u;
4811 *   for (unsigned int d = 0; d < dim; ++d)
4812 *   grad_u[d] = inputs.solution_gradients[p][d];
4813 *   const SymmetricTensor<2, dim> strain_rate = symmetrize(grad_u);
4814 *   computed_quantities[p](dim + 2) =
4815 *   2 * EquationData::eta * strain_rate * strain_rate;
4816 *  
4817 *   computed_quantities[p](dim + 3) = partition;
4818 *   }
4819 *   }
4820 *  
4821 *  
4822 * @endcode
4823 *
4824 * The <code>output_results()</code> function has a similar task to the one
4825 * in @ref step_31 "step-31". However, here we are going to demonstrate a different
4826 * technique on how to merge output from different DoFHandler objects. The
4827 * way we're going to achieve this recombination is to create a joint
4828 * DoFHandler that collects both components, the Stokes solution and the
4829 * temperature solution. This can be nicely done by combining the finite
4830 * elements from the two systems to form one FESystem, and let this
4831 * collective system define a new DoFHandler object. To be sure that
4832 * everything was done correctly, we perform a sanity check that ensures
4833 * that we got all the dofs from both Stokes and temperature even in the
4834 * combined system. We then combine the data vectors. Unfortunately, there
4835 * is no straight-forward relation that tells us how to sort Stokes and
4836 * temperature vector into the joint vector. The way we can get around this
4837 * trouble is to rely on the information collected in the FESystem. For each
4838 * dof on a cell, the joint finite element knows to which equation component
4839 * (velocity component, pressure, or temperature) it belongs – that's the
4840 * information we need! So we step through all cells (with iterators into
4841 * all three DoFHandlers moving in sync), and for each joint cell dof, we
4842 * read out that component using the FiniteElement::system_to_base_index
4843 * function (see there for a description of what the various parts of its
4844 * return value contain). We also need to keep track whether we're on a
4845 * Stokes dof or a temperature dof, which is contained in
4846 * joint_fe.system_to_base_index(i).first.first. Eventually, the dof_indices
4847 * data structures on either of the three systems tell us how the relation
4848 * between global vector and local dofs looks like on the present cell,
4849 * which concludes this tedious work. We make sure that each processor only
4850 * works on the subdomain it owns locally (and not on ghost or artificial
4851 * cells) when building the joint solution vector. The same will then have
4852 * to be done in DataOut::build_patches(), but that function does so
4853 * automatically.
4854 *
4855
4856 *
4857 * What we end up with is a set of patches that we can write using the
4858 * functions in DataOutBase in a variety of output formats. Here, we then
4859 * have to pay attention that what each processor writes is really only its
4860 * own part of the domain, i.e. we will want to write each processor's
4861 * contribution into a separate file. This we do by adding an additional
4862 * number to the filename when we write the solution. This is not really
4863 * new, we did it similarly in @ref step_40 "step-40". Note that we write in the compressed
4864 * format @p .vtu instead of plain vtk files, which saves quite some
4865 * storage.
4866 *
4867
4868 *
4869 * All the rest of the work is done in the PostProcessor class.
4870 *
4871 * @code
4872 *   template <int dim>
4873 *   void BoussinesqFlowProblem<dim>::output_results()
4874 *   {
4875 *   TimerOutput::Scope timer_section(computing_timer, "Postprocessing");
4876 *  
4877 *   const FESystem<dim> joint_fe(stokes_fe, 1, temperature_fe, 1);
4878 *  
4879 *   DoFHandler<dim> joint_dof_handler(triangulation);
4880 *   joint_dof_handler.distribute_dofs(joint_fe);
4881 *   Assert(joint_dof_handler.n_dofs() ==
4882 *   stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
4883 *   ExcInternalError());
4884 *  
4885 *   TrilinosWrappers::MPI::Vector joint_solution;
4886 *   joint_solution.reinit(joint_dof_handler.locally_owned_dofs(),
4887 *   MPI_COMM_WORLD);
4888 *  
4889 *   {
4890 *   std::vector<types::global_dof_index> local_joint_dof_indices(
4891 *   joint_fe.n_dofs_per_cell());
4892 *   std::vector<types::global_dof_index> local_stokes_dof_indices(
4893 *   stokes_fe.n_dofs_per_cell());
4894 *   std::vector<types::global_dof_index> local_temperature_dof_indices(
4895 *   temperature_fe.n_dofs_per_cell());
4896 *  
4897 *   typename DoFHandler<dim>::active_cell_iterator
4898 *   joint_cell = joint_dof_handler.begin_active(),
4899 *   joint_endc = joint_dof_handler.end(),
4900 *   stokes_cell = stokes_dof_handler.begin_active(),
4901 *   temperature_cell = temperature_dof_handler.begin_active();
4902 *   for (; joint_cell != joint_endc;
4903 *   ++joint_cell, ++stokes_cell, ++temperature_cell)
4904 *   if (joint_cell->is_locally_owned())
4905 *   {
4906 *   joint_cell->get_dof_indices(local_joint_dof_indices);
4907 *   stokes_cell->get_dof_indices(local_stokes_dof_indices);
4908 *   temperature_cell->get_dof_indices(local_temperature_dof_indices);
4909 *  
4910 *   for (unsigned int i = 0; i < joint_fe.n_dofs_per_cell(); ++i)
4911 *   if (joint_fe.system_to_base_index(i).first.first == 0)
4912 *   {
4913 *   Assert(joint_fe.system_to_base_index(i).second <
4914 *   local_stokes_dof_indices.size(),
4915 *   ExcInternalError());
4916 *  
4917 *   joint_solution(local_joint_dof_indices[i]) = stokes_solution(
4918 *   local_stokes_dof_indices[joint_fe.system_to_base_index(i)
4919 *   .second]);
4920 *   }
4921 *   else
4922 *   {
4923 *   Assert(joint_fe.system_to_base_index(i).first.first == 1,
4924 *   ExcInternalError());
4925 *   Assert(joint_fe.system_to_base_index(i).second <
4926 *   local_temperature_dof_indices.size(),
4927 *   ExcInternalError());
4928 *   joint_solution(local_joint_dof_indices[i]) =
4929 *   temperature_solution(
4930 *   local_temperature_dof_indices
4931 *   [joint_fe.system_to_base_index(i).second]);
4932 *   }
4933 *   }
4934 *   }
4935 *  
4936 *   joint_solution.compress(VectorOperation::insert);
4937 *  
4938 *   const IndexSet locally_relevant_joint_dofs =
4939 *   DoFTools::extract_locally_relevant_dofs(joint_dof_handler);
4940 *   TrilinosWrappers::MPI::Vector locally_relevant_joint_solution;
4941 *   locally_relevant_joint_solution.reinit(locally_relevant_joint_dofs,
4942 *   MPI_COMM_WORLD);
4943 *   locally_relevant_joint_solution = joint_solution;
4944 *  
4945 *   Postprocessor postprocessor(Utilities::MPI::this_mpi_process(
4946 *   MPI_COMM_WORLD),
4947 *   stokes_solution.block(1).min());
4948 *  
4949 *   DataOut<dim> data_out;
4950 *   data_out.attach_dof_handler(joint_dof_handler);
4951 *   data_out.add_data_vector(locally_relevant_joint_solution, postprocessor);
4952 *   data_out.build_patches();
4953 *  
4954 *   static int out_index = 0;
4955 *   data_out.write_vtu_with_pvtu_record(
4956 *   "./", "solution", out_index, MPI_COMM_WORLD, 5);
4957 *  
4958 *   ++out_index;
4959 *   }
4960 *  
4961 *  
4962 *  
4963 * @endcode
4964 *
4965 *
4966 * <a name="step_32-BoussinesqFlowProblemrefine_mesh"></a>
4967 * <h4>BoussinesqFlowProblem::refine_mesh</h4>
4968 *
4969
4970 *
4971 * This function isn't really new either. Since the <code>setup_dofs</code>
4972 * function that we call in the middle has its own timer section, we split
4973 * timing this function into two sections. It will also allow us to easily
4974 * identify which of the two is more expensive.
4975 *
4976
4977 *
4978 * One thing of note, however, is that we only want to compute error
4979 * indicators on the locally owned subdomain. In order to achieve this, we
4980 * pass one additional argument to the KellyErrorEstimator::estimate
4981 * function. Note that the vector for error estimates is resized to the
4982 * number of active cells present on the current process, which is less than
4983 * the total number of active cells on all processors (but more than the
4984 * number of locally owned active cells); each processor only has a few
4985 * coarse cells around the locally owned ones, as also explained in @ref step_40 "step-40".
4986 *
4987
4988 *
4989 * The local error estimates are then handed to a %parallel version of
4991 * also @ref step_40 "step-40") which looks at the errors and finds the cells that need
4992 * refinement by comparing the error values across processors. As in
4993 * @ref step_31 "step-31", we want to limit the maximum grid level. So in case some cells
4994 * have been marked that are already at the finest level, we simply clear
4995 * the refine flags.
4996 *
4997 * @code
4998 *   template <int dim>
4999 *   void
5000 *   BoussinesqFlowProblem<dim>::refine_mesh(const unsigned int max_grid_level)
5001 *   {
5003 *   temperature_dof_handler);
5005 *   stokes_dof_handler);
5006 *  
5007 *   {
5008 *   TimerOutput::Scope timer_section(computing_timer,
5009 *   "Refine mesh structure, part 1");
5010 *  
5011 *   Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
5012 *  
5014 *   temperature_dof_handler,
5015 *   QGauss<dim - 1>(parameters.temperature_degree + 1),
5016 *   std::map<types::boundary_id, const Function<dim> *>(),
5017 *   temperature_solution,
5018 *   estimated_error_per_cell,
5019 *   ComponentMask(),
5020 *   nullptr,
5021 *   0,
5022 *   triangulation.locally_owned_subdomain());
5023 *  
5025 *   triangulation, estimated_error_per_cell, 0.3, 0.1);
5026 *  
5027 *   if (triangulation.n_levels() > max_grid_level)
5028 *   for (typename Triangulation<dim>::active_cell_iterator cell =
5029 *   triangulation.begin_active(max_grid_level);
5030 *   cell != triangulation.end();
5031 *   ++cell)
5032 *   cell->clear_refine_flag();
5033 *  
5034 * @endcode
5035 *
5036 * With all flags marked as necessary, we can then tell the
5037 * SolutionTransfer objects to get ready to transfer data from one mesh to
5038 * the next, which they will do when notified by
5039 * Triangulation as part of the @p execute_coarsening_and_refinement() call.
5040 * The syntax is similar to the non-%parallel solution transfer (with the
5041 * exception that here a pointer to the vector entries is enough). The
5042 * remainder of the function further down below is then concerned with
5043 * setting up the data structures again after mesh refinement and
5044 * restoring the solution vectors on the new mesh.
5045 *
5046 * @code
5047 *   const std::vector<const TrilinosWrappers::MPI::Vector *> x_temperature = {
5048 *   &temperature_solution, &old_temperature_solution};
5049 *   const std::vector<const TrilinosWrappers::MPI::BlockVector *> x_stokes = {
5050 *   &stokes_solution, &old_stokes_solution};
5051 *  
5052 *   triangulation.prepare_coarsening_and_refinement();
5053 *  
5054 *   temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
5055 *   stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
5056 *  
5057 *   triangulation.execute_coarsening_and_refinement();
5058 *   }
5059 *  
5060 *   setup_dofs();
5061 *  
5062 *   {
5063 *   TimerOutput::Scope timer_section(computing_timer,
5064 *   "Refine mesh structure, part 2");
5065 *  
5066 *   {
5067 *   TrilinosWrappers::MPI::Vector distributed_temp1(temperature_rhs);
5068 *   TrilinosWrappers::MPI::Vector distributed_temp2(temperature_rhs);
5069 *  
5070 *   std::vector<TrilinosWrappers::MPI::Vector *> tmp = {&distributed_temp1,
5071 *   &distributed_temp2};
5072 *   temperature_trans.interpolate(tmp);
5073 *  
5074 * @endcode
5075 *
5076 * enforce constraints to make the interpolated solution conforming on
5077 * the new mesh:
5078 *
5079 * @code
5080 *   temperature_constraints.distribute(distributed_temp1);
5081 *   temperature_constraints.distribute(distributed_temp2);
5082 *  
5083 *   temperature_solution = distributed_temp1;
5084 *   old_temperature_solution = distributed_temp2;
5085 *   }
5086 *  
5087 *   {
5088 *   TrilinosWrappers::MPI::BlockVector distributed_stokes(stokes_rhs);
5089 *   TrilinosWrappers::MPI::BlockVector old_distributed_stokes(stokes_rhs);
5090 *  
5091 *   std::vector<TrilinosWrappers::MPI::BlockVector *> stokes_tmp = {
5092 *   &distributed_stokes, &old_distributed_stokes};
5093 *  
5094 *   stokes_trans.interpolate(stokes_tmp);
5095 *  
5096 * @endcode
5097 *
5098 * enforce constraints to make the interpolated solution conforming on
5099 * the new mesh:
5100 *
5101 * @code
5102 *   stokes_constraints.distribute(distributed_stokes);
5103 *   stokes_constraints.distribute(old_distributed_stokes);
5104 *  
5105 *   stokes_solution = distributed_stokes;
5106 *   old_stokes_solution = old_distributed_stokes;
5107 *   }
5108 *   }
5109 *   }
5110 *  
5111 *  
5112 *  
5113 * @endcode
5114 *
5115 *
5116 * <a name="step_32-BoussinesqFlowProblemrun"></a>
5117 * <h4>BoussinesqFlowProblem::run</h4>
5118 *
5119
5120 *
5121 * This is the final and controlling function in this class. It, in fact,
5122 * runs the entire rest of the program and is, once more, very similar to
5123 * @ref step_31 "step-31". The only substantial difference is that we use a different mesh
5124 * now (a GridGenerator::hyper_shell instead of a simple cube geometry).
5125 *
5126 * @code
5127 *   template <int dim>
5128 *   void BoussinesqFlowProblem<dim>::run()
5129 *   {
5130 *   GridGenerator::hyper_shell(triangulation,
5131 *   Point<dim>(),
5132 *   EquationData::R0,
5133 *   EquationData::R1,
5134 *   (dim == 3) ? 96 : 12,
5135 *   true);
5136 *  
5137 *   global_Omega_diameter = GridTools::diameter(triangulation);
5138 *  
5139 *   triangulation.refine_global(parameters.initial_global_refinement);
5140 *  
5141 *   setup_dofs();
5142 *  
5143 *   unsigned int pre_refinement_step = 0;
5144 *  
5145 *   start_time_iteration:
5146 *  
5147 *   {
5148 *   TrilinosWrappers::MPI::Vector solution(
5149 *   temperature_dof_handler.locally_owned_dofs());
5150 * @endcode
5151 *
5152 * VectorTools::project supports parallel vector classes with most
5153 * standard finite elements via deal.II's own native MatrixFree framework:
5154 * since we use standard Lagrange elements of moderate order this function
5155 * works well here.
5156 *
5157 * @code
5158 *   VectorTools::project(temperature_dof_handler,
5159 *   temperature_constraints,
5160 *   QGauss<dim>(parameters.temperature_degree + 2),
5161 *   EquationData::TemperatureInitialValues<dim>(),
5162 *   solution);
5163 * @endcode
5164 *
5165 * Having so computed the current temperature field, let us set the member
5166 * variable that holds the temperature nodes. Strictly speaking, we really
5167 * only need to set <code>old_temperature_solution</code> since the first
5168 * thing we will do is to compute the Stokes solution that only requires
5169 * the previous time step's temperature field. That said, nothing good can
5170 * come from not initializing the other vectors as well (especially since
5171 * it's a relatively cheap operation and we only have to do it once at the
5172 * beginning of the program) if we ever want to extend our numerical
5173 * method or physical model, and so we initialize
5174 * <code>old_temperature_solution</code> and
5175 * <code>old_old_temperature_solution</code> as well. The assignment makes
5176 * sure that the vectors on the left hand side (which where initialized to
5177 * contain ghost elements as well) also get the correct ghost elements. In
5178 * other words, the assignment here requires communication between
5179 * processors:
5180 *
5181 * @code
5182 *   temperature_solution = solution;
5183 *   old_temperature_solution = solution;
5184 *   old_old_temperature_solution = solution;
5185 *   }
5186 *  
5187 *   timestep_number = 0;
5188 *   time_step = old_time_step = 0;
5189 *  
5190 *   double time = 0;
5191 *  
5192 *   do
5193 *   {
5194 *   pcout << "Timestep " << timestep_number
5195 *   << ": t=" << time / EquationData::year_in_seconds << " years"
5196 *   << std::endl;
5197 *  
5198 *   assemble_stokes_system();
5199 *   build_stokes_preconditioner();
5200 *   assemble_temperature_matrix();
5201 *  
5202 *   solve();
5203 *  
5204 *   pcout << std::endl;
5205 *  
5206 *   if ((timestep_number == 0) &&
5207 *   (pre_refinement_step < parameters.initial_adaptive_refinement))
5208 *   {
5209 *   refine_mesh(parameters.initial_global_refinement +
5210 *   parameters.initial_adaptive_refinement);
5211 *   ++pre_refinement_step;
5212 *   goto start_time_iteration;
5213 *   }
5214 *   else if ((timestep_number > 0) &&
5215 *   (timestep_number % parameters.adaptive_refinement_interval ==
5216 *   0))
5217 *   refine_mesh(parameters.initial_global_refinement +
5218 *   parameters.initial_adaptive_refinement);
5219 *  
5220 *   if ((parameters.generate_graphical_output == true) &&
5221 *   (timestep_number % parameters.graphical_output_interval == 0))
5222 *   output_results();
5223 *  
5224 * @endcode
5225 *
5226 * In order to speed up linear solvers, we extrapolate the solutions
5227 * from the old time levels to the new one. This gives a very good
5228 * initial guess, cutting the number of iterations needed in solvers
5229 * by more than one half. We do not need to extrapolate in the last
5230 * iteration, so if we reached the final time, we stop here.
5231 *
5232
5233 *
5234 * As the last thing during a time step (before actually bumping up
5235 * the number of the time step), we check whether the current time
5236 * step number is divisible by 100, and if so we let the computing
5237 * timer print a summary of CPU times spent so far.
5238 *
5239 * @code
5240 *   if (time > parameters.end_time * EquationData::year_in_seconds)
5241 *   break;
5242 *  
5243 *   TrilinosWrappers::MPI::BlockVector old_old_stokes_solution;
5244 *   old_old_stokes_solution = old_stokes_solution;
5245 *   old_stokes_solution = stokes_solution;
5246 *   old_old_temperature_solution = old_temperature_solution;
5247 *   old_temperature_solution = temperature_solution;
5248 *   if (old_time_step > 0)
5249 *   {
5250 * @endcode
5251 *
5252 * Trilinos sadd does not like ghost vectors even as input. Copy
5253 * into distributed vectors for now:
5254 *
5255 * @code
5256 *   {
5257 *   TrilinosWrappers::MPI::BlockVector distr_solution(stokes_rhs);
5258 *   distr_solution = stokes_solution;
5259 *   TrilinosWrappers::MPI::BlockVector distr_old_solution(stokes_rhs);
5260 *   distr_old_solution = old_old_stokes_solution;
5261 *   distr_solution.sadd(1. + time_step / old_time_step,
5262 *   -time_step / old_time_step,
5263 *   distr_old_solution);
5264 *   stokes_solution = distr_solution;
5265 *   }
5266 *   {
5267 *   TrilinosWrappers::MPI::Vector distr_solution(temperature_rhs);
5268 *   distr_solution = temperature_solution;
5269 *   TrilinosWrappers::MPI::Vector distr_old_solution(temperature_rhs);
5270 *   distr_old_solution = old_old_temperature_solution;
5271 *   distr_solution.sadd(1. + time_step / old_time_step,
5272 *   -time_step / old_time_step,
5273 *   distr_old_solution);
5274 *   temperature_solution = distr_solution;
5275 *   }
5276 *   }
5277 *  
5278 *   if ((timestep_number > 0) && (timestep_number % 100 == 0))
5279 *   computing_timer.print_summary();
5280 *  
5281 *   time += time_step;
5282 *   ++timestep_number;
5283 *   }
5284 *   while (true);
5285 *  
5286 * @endcode
5287 *
5288 * If we are generating graphical output, do so also for the last time
5289 * step unless we had just done so before we left the do-while loop
5290 *
5291 * @code
5292 *   if ((parameters.generate_graphical_output == true) &&
5293 *   !((timestep_number - 1) % parameters.graphical_output_interval == 0))
5294 *   output_results();
5295 *   }
5296 *   } // namespace Step32
5297 *  
5298 *  
5299 *  
5300 * @endcode
5301 *
5302 *
5303 * <a name="step_32-Thecodemaincodefunction"></a>
5304 * <h3>The <code>main</code> function</h3>
5305 *
5306
5307 *
5308 * The main function is short as usual and very similar to the one in
5309 * @ref step_31 "step-31". Since we use a parameter file which is specified as an argument in
5310 * the command line, we have to read it in here and pass it on to the
5311 * Parameters class for parsing. If no filename is given in the command line,
5312 * we simply use the <code>step-32.prm</code> file which is distributed
5313 * together with the program.
5314 *
5315
5316 *
5317 * Because 3d computations are simply very slow unless you throw a lot of
5318 * processors at them, the program defaults to 2d. You can get the 3d version
5319 * by changing the constant dimension below to 3.
5320 *
5321 * @code
5322 *   int main(int argc, char *argv[])
5323 *   {
5324 *   try
5325 *   {
5326 *   using namespace Step32;
5327 *   using namespace dealii;
5328 *  
5329 *   Utilities::MPI::MPI_InitFinalize mpi_initialization(
5330 *   argc, argv, numbers::invalid_unsigned_int);
5331 *  
5332 *   std::string parameter_filename;
5333 *   if (argc >= 2)
5334 *   parameter_filename = argv[1];
5335 *   else
5336 *   parameter_filename = "step-32.prm";
5337 *  
5338 *   const int dim = 2;
5339 *   BoussinesqFlowProblem<dim>::Parameters parameters(parameter_filename);
5340 *   BoussinesqFlowProblem<dim> flow_problem(parameters);
5341 *   flow_problem.run();
5342 *   }
5343 *   catch (std::exception &exc)
5344 *   {
5345 *   std::cerr << std::endl
5346 *   << std::endl
5347 *   << "----------------------------------------------------"
5348 *   << std::endl;
5349 *   std::cerr << "Exception on processing: " << std::endl
5350 *   << exc.what() << std::endl
5351 *   << "Aborting!" << std::endl
5352 *   << "----------------------------------------------------"
5353 *   << std::endl;
5354 *  
5355 *   return 1;
5356 *   }
5357 *   catch (...)
5358 *   {
5359 *   std::cerr << std::endl
5360 *   << std::endl
5361 *   << "----------------------------------------------------"
5362 *   << std::endl;
5363 *   std::cerr << "Unknown exception!" << std::endl
5364 *   << "Aborting!" << std::endl
5365 *   << "----------------------------------------------------"
5366 *   << std::endl;
5367 *   return 1;
5368 *   }
5369 *  
5370 *   return 0;
5371 *   }
5372 * @endcode
5373<a name="step_32-Results"></a><h1>Results</h1>
5374
5375
5376When run, the program simulates convection in 3d in much the same way
5377as @ref step_31 "step-31" did, though with an entirely different testcase.
5378
5379
5380<a name="step_32-Comparisonofresultswithstep31"></a><h3>Comparison of results with step-31</h3>
5381
5382
5383Before we go to this testcase, however, let us show a few results from a
5384slightly earlier version of this program that was solving exactly the
5385testcase we used in @ref step_31 "step-31", just that we now solve it in parallel and with
5386much higher resolution. We show these results mainly for comparison.
5387
5388Here are two images that show this higher resolution if we choose a 3d
5389computation in <code>main()</code> and if we set
5390<code>initial_refinement=3</code> and
5391<code>n_pre_refinement_steps=4</code>. At the time steps shown, the
5392meshes had around 72,000 and 236,000 cells, for a total of 2,680,000
5393and 8,250,000 degrees of freedom, respectively, more than an order of
5394magnitude more than we had available in @ref step_31 "step-31":
5395
5396<table align="center" class="doxtable">
5397 <tr>
5398 <td>
5399 <img src="https://www.dealii.org/images/steps/developer/step-32.3d.cube.0.png" alt="">
5400 </td>
5401 </tr>
5402 <tr>
5403 <td>
5404 <img src="https://www.dealii.org/images/steps/developer/step-32.3d.cube.1.png" alt="">
5405 </td>
5406 </tr>
5407</table>
5408
5409The computation was done on a subset of 50 processors of the Brazos
5410cluster at Texas A&amp;M University.
5411
5412
5413<a name="step_32-Resultsfora2dcircularshelltestcase"></a><h3>Results for a 2d circular shell testcase</h3>
5414
5415
5416Next, we will run @ref step_32 "step-32" with the parameter file in the directory with one
5417change: we increase the final time to 1e9. Here we are using 16 processors. The
5418command to launch is (note that @ref step_32 "step-32".prm is the default):
5419
5420<code>
5421<pre>
5422\$ mpirun -np 16 ./step-32
5423</pre>
5424</code>
5425
5426Note that running a job on a cluster typically requires going through a job
5427scheduler, which we won't discuss here. The output will look roughly like
5428this:
5429
5430<code>
5431<pre>
5432\$ mpirun -np 16 ./step-32
5433Number of active cells: 12,288 (on 6 levels)
5434Number of degrees of freedom: 186,624 (99,840+36,864+49,920)
5435
5436Timestep 0: t=0 years
5437
5438 Rebuilding Stokes preconditioner...
5439 Solving Stokes system... 41 iterations.
5440 Maximal velocity: 60.4935 cm/year
5441 Time step: 18166.9 years
5442 17 CG iterations for temperature
5443 Temperature range: 973 4273.16
5444
5445Number of active cells: 15,921 (on 7 levels)
5446Number of degrees of freedom: 252,723 (136,640+47,763+68,320)
5447
5448Timestep 0: t=0 years
5449
5450 Rebuilding Stokes preconditioner...
5451 Solving Stokes system... 50 iterations.
5452 Maximal velocity: 60.3223 cm/year
5453 Time step: 10557.6 years
5454 19 CG iterations for temperature
5455 Temperature range: 973 4273.16
5456
5457Number of active cells: 19,926 (on 8 levels)
5458Number of degrees of freedom: 321,246 (174,312+59,778+87,156)
5459
5460Timestep 0: t=0 years
5461
5462 Rebuilding Stokes preconditioner...
5463 Solving Stokes system... 50 iterations.
5464 Maximal velocity: 57.8396 cm/year
5465 Time step: 5453.78 years
5466 18 CG iterations for temperature
5467 Temperature range: 973 4273.16
5468
5469Timestep 1: t=5453.78 years
5470
5471 Solving Stokes system... 49 iterations.
5472 Maximal velocity: 59.0231 cm/year
5473 Time step: 5345.86 years
5474 18 CG iterations for temperature
5475 Temperature range: 973 4273.16
5476
5477Timestep 2: t=10799.6 years
5478
5479 Solving Stokes system... 24 iterations.
5480 Maximal velocity: 60.2139 cm/year
5481 Time step: 5241.51 years
5482 17 CG iterations for temperature
5483 Temperature range: 973 4273.16
5484
5485[...]
5486
5487Timestep 100: t=272151 years
5488
5489 Solving Stokes system... 21 iterations.
5490 Maximal velocity: 161.546 cm/year
5491 Time step: 1672.96 years
5492 17 CG iterations for temperature
5493 Temperature range: 973 4282.57
5494
5495Number of active cells: 56,085 (on 8 levels)
5496Number of degrees of freedom: 903,408 (490,102+168,255+245,051)
5497
5498
5499
5500+---------------------------------------------+------------+------------+
5501| Total wallclock time elapsed since start | 115s | |
5502| | | |
5503| Section | no. calls | wall time | % of total |
5504+---------------------------------+-----------+------------+------------+
5505| Assemble Stokes system | 103 | 2.82s | 2.5% |
5506| Assemble temperature matrices | 12 | 0.452s | 0.39% |
5507| Assemble temperature rhs | 103 | 11.5s | 10% |
5508| Build Stokes preconditioner | 12 | 2.09s | 1.8% |
5509| Solve Stokes system | 103 | 90.4s | 79% |
5510| Solve temperature system | 103 | 1.53s | 1.3% |
5511| Postprocessing | 3 | 0.532s | 0.46% |
5512| Refine mesh structure, part 1 | 12 | 0.93s | 0.81% |
5513| Refine mesh structure, part 2 | 12 | 0.384s | 0.33% |
5514| Setup dof systems | 13 | 2.96s | 2.6% |
5515+---------------------------------+-----------+------------+------------+
5516
5517[...]
5518
5519+---------------------------------------------+------------+------------+
5520| Total wallclock time elapsed since start | 9.14e+04s | |
5521| | | |
5522| Section | no. calls | wall time | % of total |
5523+---------------------------------+-----------+------------+------------+
5524| Assemble Stokes system | 47045 | 2.05e+03s | 2.2% |
5525| Assemble temperature matrices | 4707 | 310s | 0.34% |
5526| Assemble temperature rhs | 47045 | 8.7e+03s | 9.5% |
5527| Build Stokes preconditioner | 4707 | 1.48e+03s | 1.6% |
5528| Solve Stokes system | 47045 | 7.34e+04s | 80% |
5529| Solve temperature system | 47045 | 1.46e+03s | 1.6% |
5530| Postprocessing | 1883 | 222s | 0.24% |
5531| Refine mesh structure, part 1 | 4706 | 641s | 0.7% |
5532| Refine mesh structure, part 2 | 4706 | 259s | 0.28% |
5533| Setup dof systems | 4707 | 1.86e+03s | 2% |
5534+---------------------------------+-----------+------------+------------+
5535</pre>
5536</code>
5537
5538The simulation terminates when the time reaches the 1 billion years
5539selected in the input file. You can extrapolate from this how long a
5540simulation would take for a different final time (the time step size
5541ultimately settles on somewhere around 20,000 years, so computing for
5542two billion years will take 100,000 time steps, give or take 20%). As
5543can be seen here, we spend most of the compute time in assembling
5544linear systems and &mdash; above all &mdash; in solving Stokes
5545systems.
5546
5547
5548To demonstrate the output we show the output from every 1250th time step here:
5549<table>
5550 <tr>
5551 <td>
5552 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-000.png" alt="">
5553 </td>
5554 <td>
5555 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-050.png" alt="">
5556 </td>
5557 <td>
5558 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-100.png" alt="">
5559 </td>
5560 </tr>
5561 <tr>
5562 <td>
5563 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-150.png" alt="">
5564 </td>
5565 <td>
5566 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-200.png" alt="">
5567 </td>
5568 <td>
5569 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-250.png" alt="">
5570 </td>
5571 </tr>
5572 <tr>
5573 <td>
5574 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-300.png" alt="">
5575 </td>
5576 <td>
5577 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-350.png" alt="">
5578 </td>
5579 <td>
5580 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-400.png" alt="">
5581 </td>
5582 </tr>
5583 <tr>
5584 <td>
5585 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-450.png" alt="">
5586 </td>
5587 <td>
5588 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-500.png" alt="">
5589 </td>
5590 <td>
5591 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-550.png" alt="">
5592 </td>
5593 </tr>
5594 <tr>
5595 <td>
5596 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-600.png" alt="">
5597 </td>
5598 <td>
5599 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-cells.png" alt="">
5600 </td>
5601 <td>
5602 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-partition.png" alt="">
5603 </td>
5604 </tr>
5605</table>
5606
5607The last two images show the grid as well as the partitioning of the mesh for
5608the same computation with 16 subdomains and 16 processors. The full dynamics of
5609this simulation are really only visible by looking at an animation, for example
5610the one <a
5611href="https://www.dealii.org/images/steps/developer/step-32-2d-temperature.webm">shown
5612on this site</a>. This image is well worth watching due to its artistic quality
5613and entrancing depiction of the evolution of the magma plumes.
5614
5615If you watch the movie, you'll see that the convection pattern goes
5616through several stages: First, it gets rid of the instable temperature
5617layering with the hot material overlain by the dense cold
5618material. After this great driver is removed and we have a sort of
5619stable situation, a few blobs start to separate from the hot boundary
5620layer at the inner ring and rise up, with a few cold fingers also
5621dropping down from the outer boundary layer. During this phase, the solution
5622remains mostly symmetric, reflecting the 12-fold symmetry of the
5623original mesh. In a final phase, the fluid enters vigorous chaotic
5624stirring in which all symmetries are lost. This is a pattern that then
5625continues to dominate flow.
5626
5627These different phases can also be identified if we look at the
5628maximal velocity as a function of time in the simulation:
5629
5630<img src="https://www.dealii.org/images/steps/developer/step-32.2d.t_vs_vmax.png" alt="">
5631
5632Here, the velocity (shown in centimeters per year) becomes very large,
5633to the order of several meters per year) at the beginning when the
5634temperature layering is instable. It then calms down to relatively
5635small values before picking up again in the chaotic stirring
5636regime. There, it remains in the range of 10-40 centimeters per year,
5637quite within the physically expected region.
5638
5639
5640<a name="step_32-Resultsfora3dsphericalshelltestcase"></a><h3>Results for a 3d spherical shell testcase</h3>
5641
5642
56433d computations are very expensive computationally. Furthermore, as
5644seen above, interesting behavior only starts after quite a long time
5645requiring more CPU hours than is available on a typical
5646cluster. Consequently, rather than showing a complete simulation here,
5647let us simply show a couple of pictures we have obtained using the
5648successor to this program, called <i>ASPECT</i> (short for <i>Advanced
5649%Solver for Problems in Earth's ConvecTion</i>), that is being
5650developed independently of deal.II and that already incorporates some
5651of the extensions discussed below. The following two pictures show
5652isocontours of the temperature and the partition of the domain (along
5653with the mesh) onto 512 processors:
5654
5655<p align="center">
5656<img src="https://www.dealii.org/images/steps/developer/step-32.3d-sphere.solution.png" alt="">
5657
5658<img src="https://www.dealii.org/images/steps/developer/step-32.3d-sphere.partition.png" alt="">
5659</p>
5660
5661
5662<a name="step-32-extensions"></a>
5663<a name="step_32-Possibilitiesforextensions"></a><h3>Possibilities for extensions</h3>
5664
5665
5666There are many directions in which this program could be extended. As
5667mentioned at the end of the introduction, most of these are under active
5668development in the <i>ASPECT</i> (short for <i>Advanced %Solver for Problems
5669in Earth's ConvecTion</i>) code at the time this tutorial program is being
5670finished. Specifically, the following are certainly topics that one should
5671address to make the program more useful:
5672
5673<ul>
5674 <li> <b>Adiabatic heating/cooling:</b>
5675 The temperature field we get in our simulations after a while
5676 is mostly constant with boundary layers at the inner and outer
5677 boundary, and streamers of cold and hot material mixing
5678 everything. Yet, this doesn't match our expectation that things
5679 closer to the earth core should be hotter than closer to the
5680 surface. The reason is that the energy equation we have used does
5681 not include a term that describes adiabatic cooling and heating:
5682 rock, like gas, heats up as you compress it. Consequently, material
5683 that rises up cools adiabatically, and cold material that sinks down
5684 heats adiabatically. The correct temperature equation would
5685 therefore look somewhat like this:
5686 @f{eqnarray*}{
5687 \frac{D T}{Dt}
5688 -
5689 \nabla \cdot \kappa \nabla T &=& \gamma + \tau\frac{Dp}{Dt},
5690 @f}
5691 or, expanding the advected derivative @f$\frac{D}{Dt} =
5692 \frac{\partial}{\partial t} + \mathbf u \cdot \nabla@f$:
5693 @f{eqnarray*}{
5694 \frac{\partial T}{\partial t}
5695 +
5696 {\mathbf u} \cdot \nabla T
5697 -
5698 \nabla \cdot \kappa \nabla T &=& \gamma +
5699 \tau\left\{\frac{\partial
5700 p}{\partial t} + \mathbf u \cdot \nabla p \right\}.
5701 @f}
5702 In other words, as pressure increases in a rock volume
5703 (@f$\frac{Dp}{Dt}>0@f$) we get an additional heat source, and vice
5704 versa.
5705
5706 The time derivative of the pressure is a bit awkward to
5707 implement. If necessary, one could approximate using the fact
5708 outlined in the introduction that the pressure can be decomposed
5709 into a dynamic component due to temperature differences and the
5710 resulting flow, and a static component that results solely from the
5711 static pressure of the overlying rock. Since the latter is much
5712 bigger, one may approximate @f$p\approx p_{\text{static}}=-\rho_{\text{ref}}
5713 [1+\beta T_{\text{ref}}] \varphi@f$, and consequently
5714 @f$\frac{Dp}{Dt} \approx \left\{- \mathbf u \cdot \nabla \rho_{\text{ref}}
5715 [1+\beta T_{\text{ref}}]\varphi\right\} = \rho_{\text{ref}}
5716 [1+\beta T_{\text{ref}}] \mathbf u \cdot \mathbf g@f$.
5717 In other words, if the fluid is moving in the direction of gravity
5718 (downward) it will be compressed and because in that case @f$\mathbf u
5719 \cdot \mathbf g > 0@f$ we get a positive heat source. Conversely, the
5720 fluid will cool down if it moves against the direction of gravity.
5721
5722<li> <b>Compressibility:</b>
5723 As already hinted at in the temperature model above,
5724 mantle rocks are not incompressible. Rather, given the enormous pressures in
5725 the earth mantle (at the core-mantle boundary, the pressure is approximately
5726 140 GPa, equivalent to 1,400,000 times atmospheric pressure), rock actually
5727 does compress to something around 1.5 times the density it would have
5728 at surface pressure. Modeling this presents any number of
5729 difficulties. Primarily, the mass conservation equation is no longer
5730 @f$\textrm{div}\;\mathbf u=0@f$ but should read
5731 @f$\textrm{div}(\rho\mathbf u)=0@f$ where the density @f$\rho@f$ is now no longer
5732 spatially constant but depends on temperature and pressure. A consequence is
5733 that the model is now no longer linear; a linearized version of the Stokes
5734 equation is also no longer symmetric requiring us to rethink preconditioners
5735 and, possibly, even the discretization. We won't go into detail here as to
5736 how this can be resolved.
5737
5738<li> <b>Nonlinear material models:</b> As already hinted at in various places,
5739 material parameters such as the density, the viscosity, and the various
5740 thermal parameters are not constant throughout the earth mantle. Rather,
5741 they nonlinearly depend on the pressure and temperature, and in the case of
5742 the viscosity on the strain rate @f$\varepsilon(\mathbf u)@f$. For complicated
5743 models, the only way to solve such models accurately may be to actually
5744 iterate this dependence out in each time step, rather than simply freezing
5745 coefficients at values extrapolated from the previous time step(s).
5746
5747<li> <b>Checkpoint/restart:</b> Running this program in 2d on a number of
5748 processors allows solving realistic models in a day or two. However, in 3d,
5749 compute times are so large that one runs into two typical problems: (i) On
5750 most compute clusters, the queuing system limits run times for individual
5751 jobs are to 2 or 3 days; (ii) losing the results of a computation due to
5752 hardware failures, misconfigurations, or power outages is a shame when
5753 running on hundreds of processors for a couple of days. Both of these
5754 problems can be addressed by periodically saving the state of the program
5755 and, if necessary, restarting the program at this point. This technique is
5756 commonly called <i>checkpoint/restart</i> and it requires that the entire
5757 state of the program is written to a permanent storage location (e.g. a hard
5758 drive). Given the complexity of the data structures of this program, this is
5759 not entirely trivial (it may also involve writing gigabytes or more of
5760 data), but it can be made easier by realizing that one can save the state
5761 between two time steps where it essentially only consists of the mesh and
5762 solution vectors; during restart one would then first re-enumerate degrees
5763 of freedom in the same way as done before and then re-assemble
5764 matrices. Nevertheless, given the distributed nature of the data structures
5765 involved here, saving and restoring the state of a program is not
5766 trivial. An additional complexity is introduced by the fact that one may
5767 want to change the number of processors between runs, for example because
5768 one may wish to continue computing on a mesh that is finer than the one used
5769 to precompute a starting temperature field at an intermediate time.
5770
5771<li> <b>Predictive postprocessing:</b> The point of computations like this is
5772 not simply to solve the equations. Rather, it is typically the exploration
5773 of different physical models and their comparison with things that we can
5774 measure at the earth surface, in order to find which models are realistic
5775 and which are contradicted by reality. To this end, we need to compute
5776 quantities from our solution vectors that are related to what we can
5777 observe. Among these are, for example, heatfluxes at the surface of the
5778 earth, as well as seismic velocities throughout the mantle as these affect
5779 earthquake waves that are recorded by seismographs.
5780
5781<li> <b>Better refinement criteria:</b> As can be seen above for the
57823d case, the mesh in 3d is primarily refined along the inner
5783boundary. This is because the boundary layer there is stronger than
5784any other transition in the domain, leading us to refine there almost
5785exclusively and basically not at all following the plumes. One
5786certainly needs better refinement criteria to track the parts of the
5787solution we are really interested in better than the criterion used
5788here, namely the KellyErrorEstimator applied to the temperature, is
5789able to.
5790</ul>
5791
5792
5793There are many other ways to extend the current program. However, rather than
5794discussing them here, let us point to the much larger open
5795source code ASPECT (see https://aspect.geodynamics.org/ ) that constitutes the
5796further development of @ref step_32 "step-32" and that already includes many such possible
5797extensions.
5798 *
5799 *
5800<a name="step_32-PlainProg"></a>
5801<h1> The plain program</h1>
5802@include "step-32.cc"
5803*/
virtual void build_patches(const unsigned int n_subdivisions=0)
Definition data_out.cc:1062
void reinit(const Triangulation< dim, spacedim > &tria)
active_cell_iterator begin_active(const unsigned int level=0) const
Definition fe_q.h:554
std::pair< std::pair< unsigned int, unsigned int >, unsigned int > system_to_base_index(const unsigned int index) const
virtual void vector_value(const Point< dim > &p, Vector< RangeNumberType > &values) const
static void estimate(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const Quadrature< dim - 1 > &quadrature, const std::map< types::boundary_id, const Function< spacedim, Number > * > &neumann_bc, const ReadVector< Number > &solution, Vector< float > &error, const ComponentMask &component_mask={}, const Function< spacedim > *coefficients=nullptr, const unsigned int n_threads=numbers::invalid_unsigned_int, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id, const types::material_id material_id=numbers::invalid_material_id, const Strategy strategy=cell_diameter_over_24)
Definition point.h:113
@ wall_times
Definition timer.h:651
@ smoothing_on_refinement
Definition tria.h:1524
@ smoothing_on_coarsening
Definition tria.h:1530
#define Assert(cond, exc)
#define AssertThrow(cond, exc)
TriaActiveIterator< CellAccessor< dim, spacedim > > active_cell_iterator
Definition tria.h:1581
typename ActiveSelector::active_cell_iterator active_cell_iterator
void loop(IteratorType begin, std_cxx20::type_identity_t< IteratorType > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, AssemblerType &assembler, const LoopControl &lctrl=LoopControl())
Definition loop.h:564
const bool IsBlockVector< VectorType >::value
UpdateFlags
@ update_values
Shape function values.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
MappingQ< dim, spacedim > StaticMappingQ1< dim, spacedim >::mapping
Definition mapping_q1.h:104
std::vector< value_type > split(const typename ::Triangulation< dim, spacedim >::cell_iterator &parent, const value_type parent_value)
const Event initial
Definition event.cc:68
const Event remesh
Definition event.cc:69
void approximate(const SynchronousIterators< std::tuple< typename DoFHandler< dim, spacedim >::active_cell_iterator, Vector< float >::iterator > > &cell, const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof_handler, const InputVector &solution, const unsigned int component)
Expression sign(const Expression &x)
std::vector< std::vector< bool > > extract_constant_modes(const DoFHandler< dim, spacedim > &dof_handler, const ComponentMask &component_mask={})
void hyper_shell(Triangulation< dim, spacedim > &tria, const Point< spacedim > &center, const double inner_radius, const double outer_radius, const unsigned int n_cells=0, bool colorize=false)
void refine(Triangulation< dim, spacedim > &tria, const Vector< Number > &criteria, const double threshold, const unsigned int max_to_mark=numbers::invalid_unsigned_int)
void scale(const double scaling_factor, Triangulation< dim, spacedim > &triangulation)
double volume(const Triangulation< dim, spacedim > &tria)
double diameter(const Triangulation< dim, spacedim > &tria)
@ valid
Iterator points to a valid object.
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
constexpr char L
constexpr char T
constexpr char V
constexpr types::blas_int zero
constexpr char A
constexpr types::blas_int one
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
Definition divergence.h:471
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition utilities.cc:193
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > b(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
VectorType::value_type * end(VectorType &V)
std::vector< unsigned int > serial(const std::vector< unsigned int > &targets, const std::function< RequestType(const unsigned int)> &create_request, const std::function< AnswerType(const unsigned int, const RequestType &)> &answer_request, const std::function< void(const unsigned int, const AnswerType &)> &process_answer, const MPI_Comm comm)
T sum(const T &t, const MPI_Comm mpi_communicator)
T max(const T &t, const MPI_Comm mpi_communicator)
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
Definition mpi.cc:120
std::string compress(const std::string &input)
Definition utilities.cc:383
void project(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const AffineConstraints< typename VectorType::value_type > &constraints, const Quadrature< dim > &quadrature, const Function< spacedim, typename VectorType::value_type > &function, VectorType &vec, const bool enforce_zero_boundary=false, const Quadrature< dim - 1 > &q_boundary=(dim > 1 ? QGauss< dim - 1 >(2) :Quadrature< dim - 1 >()), const bool project_to_boundary_first=false)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
void run(const std::vector< std::vector< Iterator > > &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
void abort(const ExceptionBase &exc) noexcept
bool check(const ConstraintKinds kind_in, const unsigned int dim)
long double gamma(const unsigned int n)
int(& functions)(const void *v1, const void *v2)
constexpr double PI
Definition numbers.h:239
void refine_and_coarsen_fixed_fraction(::Triangulation< dim, spacedim > &tria, const ::Vector< Number > &criteria, const double top_fraction_of_error, const double bottom_fraction_of_error, const VectorTools::NormType norm_type=VectorTools::L1_norm)
STL namespace.
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > cos(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sin(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
Definition types.h:32
unsigned int boundary_id
Definition types.h:161
unsigned int subdomain_id
Definition types.h:52
constexpr ProductType< Number, OtherNumber >::type scalar_product(const Tensor< rank, dim, Number > &left, const Tensor< rank, dim, OtherNumber > &right)
Definition tensor.h:2547